{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e8377817-763a-4c8a-a311-28fd26f1ad7d", "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_digits" ] }, { "cell_type": "code", "execution_count": 2, "id": "8632592f-c1f0-4ea6-8b5f-1f3513458741", "metadata": {}, "outputs": [], "source": [ "digits = load_digits()" ] }, { "cell_type": "code", "execution_count": 3, "id": "d27acf66-2b68-4d63-97f3-c2436097cf45", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1797, 64)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digits.data.shape" ] }, { "cell_type": "code", "execution_count": 4, "id": "fee840d3-0e1e-4a48-a436-b94285c1acc5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0., 0., 5., 13., 9., 1., 0., 0., 0., 0., 13., 15., 10.,\n", " 15., 5., 0., 0., 3., 15., 2., 0., 11., 8., 0., 0., 4.,\n", " 12., 0., 0., 8., 8., 0., 0., 5., 8., 0., 0., 9., 8.,\n", " 0., 0., 4., 11., 0., 1., 12., 7., 0., 0., 2., 14., 5.,\n", " 10., 12., 0., 0., 0., 0., 6., 13., 10., 0., 0., 0.])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digits.data[0]" ] }, { "cell_type": "code", "execution_count": 5, "id": "feffae4a-a0fc-4f82-a8ba-297e34330ce5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0., 0., 5., 13., 9., 1., 0., 0.],\n", " [ 0., 0., 13., 15., 10., 15., 5., 0.],\n", " [ 0., 3., 15., 2., 0., 11., 8., 0.],\n", " [ 0., 4., 12., 0., 0., 8., 8., 0.],\n", " [ 0., 5., 8., 0., 0., 9., 8., 0.],\n", " [ 0., 4., 11., 0., 1., 12., 7., 0.],\n", " [ 0., 2., 14., 5., 10., 12., 0., 0.],\n", " [ 0., 0., 6., 13., 10., 0., 0., 0.]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digits.data[0].reshape(8,8)" ] }, { "cell_type": "code", "execution_count": 6, "id": "4fad9e34-b1d2-427e-8bac-494554e58879", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 7, "id": "d385e526-d0c4-40ad-96ea-eaa916c4dcde", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.gray()\n", "plt.matshow(digits.data[0].reshape(8,8))" ] }, { "cell_type": "code", "execution_count": 8, "id": "fab57c4b-4eeb-47fc-8a7f-ffbbbc778003", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digits.target[0]" ] }, { "cell_type": "code", "execution_count": 9, "id": "b640c490-c823-42cd-a1f8-52fe40d3ecc9", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 10, "id": "cf0da7e9-99ea-4341-a483-42a2ae71410f", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(digits.data)" ] }, { "cell_type": "code", "execution_count": 11, "id": "df8b60d8-ac5c-4250-8456-6c2e15f5c7f5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...54555657585960616263
00.00.05.013.09.01.00.00.00.00.0...0.00.00.00.06.013.010.00.00.00.0
10.00.00.012.013.05.00.00.00.00.0...0.00.00.00.00.011.016.010.00.00.0
20.00.00.04.015.012.00.00.00.00.0...5.00.00.00.00.03.011.016.09.00.0
30.00.07.015.013.01.00.00.00.08.0...9.00.00.00.07.013.013.09.00.00.0
40.00.00.01.011.00.00.00.00.00.0...0.00.00.00.00.02.016.04.00.00.0
\n", "

5 rows × 64 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 ... 54 55 56 \\\n", "0 0.0 0.0 5.0 13.0 9.0 1.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 12.0 13.0 5.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 4.0 15.0 12.0 0.0 0.0 0.0 0.0 ... 5.0 0.0 0.0 \n", "3 0.0 0.0 7.0 15.0 13.0 1.0 0.0 0.0 0.0 8.0 ... 9.0 0.0 0.0 \n", "4 0.0 0.0 0.0 1.0 11.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "\n", " 57 58 59 60 61 62 63 \n", "0 0.0 6.0 13.0 10.0 0.0 0.0 0.0 \n", "1 0.0 0.0 11.0 16.0 10.0 0.0 0.0 \n", "2 0.0 0.0 3.0 11.0 16.0 9.0 0.0 \n", "3 0.0 7.0 13.0 13.0 9.0 0.0 0.0 \n", "4 0.0 0.0 2.0 16.0 4.0 0.0 0.0 \n", "\n", "[5 rows x 64 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 12, "id": "aded246a-0bfa-4078-9868-03b08694e6da", "metadata": {}, "outputs": [], "source": [ "df.columns = digits.feature_names" ] }, { "cell_type": "code", "execution_count": 13, "id": "3fe7f989-8741-4664-9480-01632c58ad58", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pixel_0_0pixel_0_1pixel_0_2pixel_0_3pixel_0_4pixel_0_5pixel_0_6pixel_0_7pixel_1_0pixel_1_1...pixel_6_6pixel_6_7pixel_7_0pixel_7_1pixel_7_2pixel_7_3pixel_7_4pixel_7_5pixel_7_6pixel_7_7
00.00.05.013.09.01.00.00.00.00.0...0.00.00.00.06.013.010.00.00.00.0
10.00.00.012.013.05.00.00.00.00.0...0.00.00.00.00.011.016.010.00.00.0
20.00.00.04.015.012.00.00.00.00.0...5.00.00.00.00.03.011.016.09.00.0
30.00.07.015.013.01.00.00.00.08.0...9.00.00.00.07.013.013.09.00.00.0
40.00.00.01.011.00.00.00.00.00.0...0.00.00.00.00.02.016.04.00.00.0
\n", "

5 rows × 64 columns

\n", "
" ], "text/plain": [ " pixel_0_0 pixel_0_1 pixel_0_2 pixel_0_3 pixel_0_4 pixel_0_5 \\\n", "0 0.0 0.0 5.0 13.0 9.0 1.0 \n", "1 0.0 0.0 0.0 12.0 13.0 5.0 \n", "2 0.0 0.0 0.0 4.0 15.0 12.0 \n", "3 0.0 0.0 7.0 15.0 13.0 1.0 \n", "4 0.0 0.0 0.0 1.0 11.0 0.0 \n", "\n", " pixel_0_6 pixel_0_7 pixel_1_0 pixel_1_1 ... pixel_6_6 pixel_6_7 \\\n", "0 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", "1 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 ... 5.0 0.0 \n", "3 0.0 0.0 0.0 8.0 ... 9.0 0.0 \n", "4 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", "\n", " pixel_7_0 pixel_7_1 pixel_7_2 pixel_7_3 pixel_7_4 pixel_7_5 \\\n", "0 0.0 0.0 6.0 13.0 10.0 0.0 \n", "1 0.0 0.0 0.0 11.0 16.0 10.0 \n", "2 0.0 0.0 0.0 3.0 11.0 16.0 \n", "3 0.0 0.0 7.0 13.0 13.0 9.0 \n", "4 0.0 0.0 0.0 2.0 16.0 4.0 \n", "\n", " pixel_7_6 pixel_7_7 \n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 9.0 0.0 \n", "3 0.0 0.0 \n", "4 0.0 0.0 \n", "\n", "[5 rows x 64 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "markdown", "id": "a243ed39-7669-45c6-85e0-acb8183d658a", "metadata": {}, "source": [ "pca之前需要数据归一化" ] }, { "cell_type": "code", "execution_count": 14, "id": "ef386108-8407-4453-9eda-5947b6323f38", "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler" ] }, { "cell_type": "code", "execution_count": 15, "id": "b3873b08-537a-479a-a50c-7ad1ea7c763a", "metadata": {}, "outputs": [], "source": [ "scaler = StandardScaler()" ] }, { "cell_type": "code", "execution_count": 16, "id": "7c249404-1e53-48cd-8477-7a7c65c56ace", "metadata": {}, "outputs": [], "source": [ "x = scaler.fit_transform(digits.data)" ] }, { "cell_type": "code", "execution_count": 17, "id": "679a0e5d-add5-418c-bb82-6d5689b3339f", "metadata": {}, "outputs": [], "source": [ "y = digits.target" ] }, { "cell_type": "code", "execution_count": 18, "id": "9998b765-1e81-4829-b27b-7f6a9c2525ad", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 19, "id": "3888eff8-762d-4200-9675-62480a0fb2fb", "metadata": {}, "outputs": [], "source": [ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)" ] }, { "cell_type": "code", "execution_count": 20, "id": "d62e3fa3-f379-4e43-9377-e8e1fb230215", "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression" ] }, { "cell_type": "code", "execution_count": 21, "id": "61b3fa63-4140-4108-ad23-fffc69cacf93", "metadata": {}, "outputs": [], "source": [ "lr = LogisticRegression(max_iter=1000)" ] }, { "cell_type": "code", "execution_count": 22, "id": "7d36c30c-ced2-4477-8a33-205a67fceba9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
LogisticRegression(max_iter=1000)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "LogisticRegression(max_iter=1000)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lr.fit(x_train, y_train)" ] }, { "cell_type": "code", "execution_count": 23, "id": "bf4d1fe7-2dc4-4c75-80c8-20fcd288e76d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9777777777777777" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lr.score(x_test, y_test)" ] }, { "cell_type": "code", "execution_count": 24, "id": "89a258b1-4238-47a0-85bf-3d6077f63cea", "metadata": {}, "outputs": [], "source": [ "from sklearn.decomposition import PCA" ] }, { "cell_type": "code", "execution_count": 25, "id": "e23f323c-14cb-4ab4-a5fa-6d7055a60cf3", "metadata": {}, "outputs": [], "source": [ "pca = PCA(n_components=24)" ] }, { "cell_type": "code", "execution_count": 33, "id": "e66b7942-9ac1-416e-8360-7172b1cdf781", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.12033916, 0.09561054, 0.08444415, 0.06498408, 0.04860155,\n", " 0.0421412 , 0.03942083, 0.0338938 , 0.02998219, 0.02931999,\n", " 0.02781797, 0.02577048, 0.02275267, 0.02227115, 0.02165206,\n", " 0.01914094, 0.01775203, 0.01637506, 0.01596336, 0.0148891 ,\n", " 0.01347101, 0.01269223, 0.01161657, 0.01049985])" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pca.explained_variance_ratio_" ] }, { "cell_type": "code", "execution_count": 26, "id": "50537bef-8f5b-47be-a4bf-d0dcb31f168a", "metadata": {}, "outputs": [], "source": [ "x_pca = pca.fit_transform(x)" ] }, { "cell_type": "code", "execution_count": 27, "id": "aad5e973-076b-4e71-8661-b4824f099bae", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1797, 24)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_pca.shape" ] }, { "cell_type": "code", "execution_count": 28, "id": "32ee0c4e-5920-4a77-8069-2b0fea28eded", "metadata": {}, "outputs": [], "source": [ "pca_train, pca_test, y_train, y_test = train_test_split(x_pca, y, test_size=0.2)" ] }, { "cell_type": "code", "execution_count": 29, "id": "80612977-9ff9-48a6-823b-ae5e5823e954", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
LogisticRegression(max_iter=1000)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "LogisticRegression(max_iter=1000)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lr.fit(pca_train, y_train)" ] }, { "cell_type": "code", "execution_count": 30, "id": "9a3c49a8-9f66-4672-a832-23ba11d86584", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9527777777777777" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lr.score(pca_test, y_test)" ] }, { "cell_type": "code", "execution_count": null, "id": "3a20054c-2087-41e3-9cc4-2def180d6a74", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 5 }