You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

915 lines
41 KiB
Plaintext

6 months ago
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e8377817-763a-4c8a-a311-28fd26f1ad7d",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import load_digits"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8632592f-c1f0-4ea6-8b5f-1f3513458741",
"metadata": {},
"outputs": [],
"source": [
"digits = load_digits()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "d27acf66-2b68-4d63-97f3-c2436097cf45",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1797, 64)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"digits.data.shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fee840d3-0e1e-4a48-a436-b94285c1acc5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0., 0., 5., 13., 9., 1., 0., 0., 0., 0., 13., 15., 10.,\n",
" 15., 5., 0., 0., 3., 15., 2., 0., 11., 8., 0., 0., 4.,\n",
" 12., 0., 0., 8., 8., 0., 0., 5., 8., 0., 0., 9., 8.,\n",
" 0., 0., 4., 11., 0., 1., 12., 7., 0., 0., 2., 14., 5.,\n",
" 10., 12., 0., 0., 0., 0., 6., 13., 10., 0., 0., 0.])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"digits.data[0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "feffae4a-a0fc-4f82-a8ba-297e34330ce5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0., 0., 5., 13., 9., 1., 0., 0.],\n",
" [ 0., 0., 13., 15., 10., 15., 5., 0.],\n",
" [ 0., 3., 15., 2., 0., 11., 8., 0.],\n",
" [ 0., 4., 12., 0., 0., 8., 8., 0.],\n",
" [ 0., 5., 8., 0., 0., 9., 8., 0.],\n",
" [ 0., 4., 11., 0., 1., 12., 7., 0.],\n",
" [ 0., 2., 14., 5., 10., 12., 0., 0.],\n",
" [ 0., 0., 6., 13., 10., 0., 0., 0.]])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"digits.data[0].reshape(8,8)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "4fad9e34-b1d2-427e-8bac-494554e58879",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d385e526-d0c4-40ad-96ea-eaa916c4dcde",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.image.AxesImage at 0x16e16f2c0d0>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"<Figure size 640x480 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAGkCAYAAAAIduO+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8WgzjOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAYoElEQVR4nO3df2yUhR3H8c/R2oNpexak0I7jpygCtoMWCKvOHyCkQSL7oxKCWYXNRXJMsDFx/WewLOPqH1twGyk/xoqJYyDLis4MusKkZJkdpaQJaIJgmRwidG5wV7rkML3bX7utQ9o+R788PNf3K3midz7HfUIqb+5He75kMpkUAABGhrk9AACQ2QgNAMAUoQEAmCI0AABThAYAYIrQAABMERoAgClCAwAwRWgAAKYIDQDAVMaEZsuWLZo4caKGDx+uefPm6dixY25P6tfRo0e1dOlSFRUVyefzaf/+/W5PGpBwOKw5c+YoNzdXBQUFWrZsmU6fPu32rAGpq6tTcXGx8vLylJeXp/nz5+vAgQNuz3KstrZWPp9P69evd3tKvzZu3Cifz9frmDZtmtuzBuTTTz/Vc889p1GjRmnEiBF6+OGHdfz4cbdn9WvixIk3/J77fD6FQiFX9mREaPbu3avq6mpt2LBBJ06cUElJiRYvXqzOzk63p/Wpu7tbJSUl2rJli9tTHGlublYoFFJLS4uampr0xRdfaNGiReru7nZ7Wr/GjRun2tpatbW16fjx43ryySf1zDPP6IMPPnB72oC1trZq27ZtKi4udnvKgM2YMUOfffZZ6vjzn//s9qR+XblyReXl5brrrrt04MABffjhh/rJT36i/Px8t6f1q7W1tdfvd1NTkySpsrLSnUHJDDB37txkKBRKXe7p6UkWFRUlw+Gwi6uckZRsaGhwe0ZaOjs7k5KSzc3Nbk9JS35+fvKXv/yl2zMGpKurKzl16tRkU1NT8rHHHkuuW7fO7Un92rBhQ7KkpMTtGY69+uqryUceecTtGYNi3bp1ySlTpiQTiYQr9+/5RzTXr19XW1ubFi5cmLpu2LBhWrhwod5//30Xlw0d0WhUkjRy5EiXlzjT09OjPXv2qLu7W/Pnz3d7zoCEQiEtWbKk19e7F5w5c0ZFRUWaPHmyVq5cqfPnz7s9qV/vvPOOysrKVFlZqYKCAs2aNUs7duxwe5Zj169f15tvvqnVq1fL5/O5ssHzofn888/V09OjMWPG9Lp+zJgxunTpkkurho5EIqH169ervLxcM2fOdHvOgJw8eVL33HOP/H6/XnzxRTU0NGj69Oluz+rXnj17dOLECYXDYbenODJv3jzt2rVLBw8eVF1dnc6dO6dHH31UXV1dbk/rU0dHh+rq6jR16lQ1NjZqzZo1eumll/TGG2+4Pc2R/fv36+rVq3r++edd25Dt2j0jI4RCIZ06dcoTz7n/x4MPPqj29nZFo1H99re/VVVVlZqbm+/o2EQiEa1bt05NTU0aPny423McqaioSP17cXGx5s2bpwkTJuitt97St7/9bReX9S2RSKisrEybNm2SJM2aNUunTp3S1q1bVVVV5fK6gdu5c6cqKipUVFTk2gbPP6K57777lJWVpcuXL/e6/vLlyxo7dqxLq4aGtWvX6t1339V7772ncePGuT1nwHJycnT//fertLRU4XBYJSUlev31192e1ae2tjZ1dnZq9uzZys7OVnZ2tpqbm/Wzn/1M2dnZ6unpcXvigN1777164IEHdPbsWben9KmwsPCGv3w89NBDnnja7z8++eQTHTp0SN/5zndc3eH50OTk5Ki0tFSHDx9OXZdIJHT48GHPPO/uNclkUmvXrlVDQ4P+9Kc/adKkSW5PuiWJRELxeNztGX1asGCBTp48qfb29tRRVlamlStXqr29XVlZWW5PHLBr167p448/VmFhodtT+lReXn7D2/Y/+ugjTZgwwaVFztXX16ugoEBLlixxdUdGPHVWXV2tqqoqlZWVae7cudq8ebO6u7u1atUqt6f16dq1a73+Vnfu3Dm1t7dr5MiRGj9+vIvL+hYKhbR79269/fbbys3NTb0WFggENGLECJfX9a2mpkYVFRUaP368urq6tHv3bh05ckSNjY1uT+tTbm7uDa+B3X333Ro1atQd/9rYK6+8oqVLl2rChAm6ePGiNmzYoKysLK1YscLtaX16+eWX9fWvf12bNm3Ss88+q2PHjmn79u3avn2729MGJJFIqL6+XlVVVcrOdvmPelfe62bg5z//eXL8+PHJnJyc5Ny5c5MtLS1uT+rXe++9l5R0w1FVVeX2tD592WZJyfr6eren9Wv16tXJCRMmJHNycpKjR49OLliwIPnHP/7R7Vlp8crbm5cvX54sLCxM5uTkJL/61a8mly9fnjx79qzbswbk97//fXLmzJlJv9+fnDZtWnL79u1uTxqwxsbGpKTk6dOn3Z6S9CWTyaQ7iQMADAWef40GAHBnIzQAAFOEBgBgitAAAEwRGgCAKUIDADCVUaGJx+PauHHjHf9d3v/Pq7sl72736m7Ju9u9ulvy7vY7ZXdGfR9NLBZTIBBQNBpVXl6e23MGzKu7Je9u9+puybvbvbpb8u72O2V3Rj2iAQDceQgNAMDUbf9Ja4lEQhcvXlRubu6gf9pbLBbr9U+v8Opuybvbvbpb8u52r+6WvLvdencymVRXV5eKioo0bNjNH7fc9tdoLly4oGAweDvvEgBgKBKJ9PmZVLf9EU1ubu7tvktIWrZsmdsT0rJx40a3J6TtyJEjbk9Ii5d/z69ever2hCGpvz/Xb3toBvvpMgzMXXfd5faEtHj5LyZ3+mfz3Az/j8Kp/r5meDMAAMAUoQEAmCI0AABThAYAYIrQAABMERoAgClCAwAwRWgAAKYIDQDAFKEBAJgiNAAAU4QGAGCK0AAATBEaAIApQgMAMEVoAACm0grNli1bNHHiRA0fPlzz5s3TsWPHBnsXACBDOA7N3r17VV1drQ0bNujEiRMqKSnR4sWL1dnZabEPAOBxjkPz05/+VC+88IJWrVql6dOna+vWrfrKV76iX/3qVxb7AAAe5yg0169fV1tbmxYuXPjfX2DYMC1cuFDvv//+l94mHo8rFov1OgAAQ4ej0Hz++efq6enRmDFjel0/ZswYXbp06UtvEw6HFQgEUkcwGEx/LQDAc8zfdVZTU6NoNJo6IpGI9V0CAO4g2U5Ovu+++5SVlaXLly/3uv7y5csaO3bsl97G7/fL7/envxAA4GmOHtHk5OSotLRUhw8fTl2XSCR0+PBhzZ8/f9DHAQC8z9EjGkmqrq5WVVWVysrKNHfuXG3evFnd3d1atWqVxT4AgMc5Ds3y5cv197//XT/4wQ906dIlfe1rX9PBgwdveIMAAABSGqGRpLVr12rt2rWDvQUAkIH4WWcAAFOEBgBgitAAAEwRGgCAKUIDADBFaAAApggNAMAUoQEAmCI0AABThAYAYIrQAABMERoAgClCAwAwRWgAAKYIDQDAFKEBAJhK64PP4D21tbVuT0jL5MmT3Z6Qtvz8fLcnpOWf//yn2xPS9uyzz7o9IS379u1ze4IpHtEAAEwRGgCAKUIDADBFaAAApggNAMAUoQEAmCI0AABThAYAYIrQAABMERoAgClCAwAwRWgAAKYIDQDAFKEBAJgiNAAAU4QGAGCK0AAATBEaAIApQgMAMOU4NEePHtXSpUtVVFQkn8+n/fv3G8wCAGQKx6Hp7u5WSUmJtmzZYrEHAJBhsp3eoKKiQhUVFRZbAAAZyHFonIrH44rH46nLsVjM+i4BAHcQ8zcDhMNhBQKB1BEMBq3vEgBwBzEPTU1NjaLRaOqIRCLWdwkAuIOYP3Xm9/vl9/ut7wYAcIfi+2gAAKYcP6K5du2azp49m7p87tw5tbe3a+TIkRo/fvygjgMAeJ/j0Bw/flxPPPFE6nJ1dbUkqaqqSrt27Rq0YQCAzOA4NI8//riSyaTFFgBABuI1GgCAKUIDADBFaAAApggNAMAUoQEAmCI0AABThAYAYIrQAABMERoAgClCAwAwRWgAAKYIDQDAFKEBAJgiNAAAU4QGAGCK0AA
"text/plain": [
"<Figure size 480x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.gray()\n",
"plt.matshow(digits.data[0].reshape(8,8))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "fab57c4b-4eeb-47fc-8a7f-ffbbbc778003",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"digits.target[0]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b640c490-c823-42cd-a1f8-52fe40d3ecc9",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "cf0da7e9-99ea-4341-a483-42a2ae71410f",
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(digits.data)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "df8b60d8-ac5c-4250-8456-6c2e15f5c7f5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>54</th>\n",
" <th>55</th>\n",
" <th>56</th>\n",
" <th>57</th>\n",
" <th>58</th>\n",
" <th>59</th>\n",
" <th>60</th>\n",
" <th>61</th>\n",
" <th>62</th>\n",
" <th>63</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5.0</td>\n",
" <td>13.0</td>\n",
" <td>9.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>6.0</td>\n",
" <td>13.0</td>\n",
" <td>10.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>12.0</td>\n",
" <td>13.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>11.0</td>\n",
" <td>16.0</td>\n",
" <td>10.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>15.0</td>\n",
" <td>12.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>16.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>15.0</td>\n",
" <td>13.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>8.0</td>\n",
" <td>...</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>16.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 64 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... 54 55 56 \\\n",
"0 0.0 0.0 5.0 13.0 9.0 1.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 12.0 13.0 5.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 4.0 15.0 12.0 0.0 0.0 0.0 0.0 ... 5.0 0.0 0.0 \n",
"3 0.0 0.0 7.0 15.0 13.0 1.0 0.0 0.0 0.0 8.0 ... 9.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 1.0 11.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"\n",
" 57 58 59 60 61 62 63 \n",
"0 0.0 6.0 13.0 10.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 11.0 16.0 10.0 0.0 0.0 \n",
"2 0.0 0.0 3.0 11.0 16.0 9.0 0.0 \n",
"3 0.0 7.0 13.0 13.0 9.0 0.0 0.0 \n",
"4 0.0 0.0 2.0 16.0 4.0 0.0 0.0 \n",
"\n",
"[5 rows x 64 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "aded246a-0bfa-4078-9868-03b08694e6da",
"metadata": {},
"outputs": [],
"source": [
"df.columns = digits.feature_names"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "3fe7f989-8741-4664-9480-01632c58ad58",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>pixel_0_0</th>\n",
" <th>pixel_0_1</th>\n",
" <th>pixel_0_2</th>\n",
" <th>pixel_0_3</th>\n",
" <th>pixel_0_4</th>\n",
" <th>pixel_0_5</th>\n",
" <th>pixel_0_6</th>\n",
" <th>pixel_0_7</th>\n",
" <th>pixel_1_0</th>\n",
" <th>pixel_1_1</th>\n",
" <th>...</th>\n",
" <th>pixel_6_6</th>\n",
" <th>pixel_6_7</th>\n",
" <th>pixel_7_0</th>\n",
" <th>pixel_7_1</th>\n",
" <th>pixel_7_2</th>\n",
" <th>pixel_7_3</th>\n",
" <th>pixel_7_4</th>\n",
" <th>pixel_7_5</th>\n",
" <th>pixel_7_6</th>\n",
" <th>pixel_7_7</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5.0</td>\n",
" <td>13.0</td>\n",
" <td>9.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>6.0</td>\n",
" <td>13.0</td>\n",
" <td>10.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>12.0</td>\n",
" <td>13.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>11.0</td>\n",
" <td>16.0</td>\n",
" <td>10.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>15.0</td>\n",
" <td>12.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>16.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>15.0</td>\n",
" <td>13.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>8.0</td>\n",
" <td>...</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>16.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 64 columns</p>\n",
"</div>"
],
"text/plain": [
" pixel_0_0 pixel_0_1 pixel_0_2 pixel_0_3 pixel_0_4 pixel_0_5 \\\n",
"0 0.0 0.0 5.0 13.0 9.0 1.0 \n",
"1 0.0 0.0 0.0 12.0 13.0 5.0 \n",
"2 0.0 0.0 0.0 4.0 15.0 12.0 \n",
"3 0.0 0.0 7.0 15.0 13.0 1.0 \n",
"4 0.0 0.0 0.0 1.0 11.0 0.0 \n",
"\n",
" pixel_0_6 pixel_0_7 pixel_1_0 pixel_1_1 ... pixel_6_6 pixel_6_7 \\\n",
"0 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 ... 5.0 0.0 \n",
"3 0.0 0.0 0.0 8.0 ... 9.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n",
"\n",
" pixel_7_0 pixel_7_1 pixel_7_2 pixel_7_3 pixel_7_4 pixel_7_5 \\\n",
"0 0.0 0.0 6.0 13.0 10.0 0.0 \n",
"1 0.0 0.0 0.0 11.0 16.0 10.0 \n",
"2 0.0 0.0 0.0 3.0 11.0 16.0 \n",
"3 0.0 0.0 7.0 13.0 13.0 9.0 \n",
"4 0.0 0.0 0.0 2.0 16.0 4.0 \n",
"\n",
" pixel_7_6 pixel_7_7 \n",
"0 0.0 0.0 \n",
"1 0.0 0.0 \n",
"2 9.0 0.0 \n",
"3 0.0 0.0 \n",
"4 0.0 0.0 \n",
"\n",
"[5 rows x 64 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "a243ed39-7669-45c6-85e0-acb8183d658a",
"metadata": {},
"source": [
"pca之前需要数据归一化"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "ef386108-8407-4453-9eda-5947b6323f38",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "b3873b08-537a-479a-a50c-7ad1ea7c763a",
"metadata": {},
"outputs": [],
"source": [
"scaler = StandardScaler()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "7c249404-1e53-48cd-8477-7a7c65c56ace",
"metadata": {},
"outputs": [],
"source": [
"x = scaler.fit_transform(digits.data)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "679a0e5d-add5-418c-bb82-6d5689b3339f",
"metadata": {},
"outputs": [],
"source": [
"y = digits.target"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "9998b765-1e81-4829-b27b-7f6a9c2525ad",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "3888eff8-762d-4200-9675-62480a0fb2fb",
"metadata": {},
"outputs": [],
"source": [
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "d62e3fa3-f379-4e43-9377-e8e1fb230215",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "61b3fa63-4140-4108-ad23-fffc69cacf93",
"metadata": {},
"outputs": [],
"source": [
"lr = LogisticRegression(max_iter=1000)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "7d36c30c-ced2-4477-8a33-205a67fceba9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-r
],
"text/plain": [
"LogisticRegression(max_iter=1000)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr.fit(x_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "bf4d1fe7-2dc4-4c75-80c8-20fcd288e76d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9777777777777777"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr.score(x_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "89a258b1-4238-47a0-85bf-3d6077f63cea",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.decomposition import PCA"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "e23f323c-14cb-4ab4-a5fa-6d7055a60cf3",
"metadata": {},
"outputs": [],
"source": [
"pca = PCA(n_components=24)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "e66b7942-9ac1-416e-8360-7172b1cdf781",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.12033916, 0.09561054, 0.08444415, 0.06498408, 0.04860155,\n",
" 0.0421412 , 0.03942083, 0.0338938 , 0.02998219, 0.02931999,\n",
" 0.02781797, 0.02577048, 0.02275267, 0.02227115, 0.02165206,\n",
" 0.01914094, 0.01775203, 0.01637506, 0.01596336, 0.0148891 ,\n",
" 0.01347101, 0.01269223, 0.01161657, 0.01049985])"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pca.explained_variance_ratio_"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "50537bef-8f5b-47be-a4bf-d0dcb31f168a",
"metadata": {},
"outputs": [],
"source": [
"x_pca = pca.fit_transform(x)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "aad5e973-076b-4e71-8661-b4824f099bae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1797, 24)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_pca.shape"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "32ee0c4e-5920-4a77-8069-2b0fea28eded",
"metadata": {},
"outputs": [],
"source": [
"pca_train, pca_test, y_train, y_test = train_test_split(x_pca, y, test_size=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "80612977-9ff9-48a6-823b-ae5e5823e954",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-r
],
"text/plain": [
"LogisticRegression(max_iter=1000)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr.fit(pca_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "9a3c49a8-9f66-4672-a832-23ba11d86584",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9527777777777777"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr.score(pca_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3a20054c-2087-41e3-9cc4-2def180d6a74",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}