You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

492 lines
26 KiB
Plaintext

6 months ago
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "ae02d6c8",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn import linear_model"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6729b5ba",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"housing_price.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "717fda9f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area</th>\n",
" <th>price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>86.45</td>\n",
" <td>117.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>91.57</td>\n",
" <td>98.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>85.52</td>\n",
" <td>114.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>103.60</td>\n",
" <td>146.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>105.25</td>\n",
" <td>106.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>99.00</td>\n",
" <td>109.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>87.95</td>\n",
" <td>91.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area price\n",
"0 86.45 117.0\n",
"1 91.57 98.0\n",
"2 85.52 114.0\n",
"3 103.60 146.0\n",
"4 105.25 106.0\n",
"5 99.00 109.0\n",
"6 87.95 91.5"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "8bae038f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x193b20e02c8>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvn0lEQVR4nO3dfVjUdb7/8dcAiqQyBCYwgohpamlGuhEVpclVaGu26JbGZdaans7J0ujGPP3Ubo/dbanlydOte7WWbR2yPbbZmlbiSqQW282WR40UFHSNANFUbj6/P+YwOXEj4sDMfHw+rmsunc/3M595f/zyZV5+78ZhjDECAACwVIi/CwAAAGhPhB0AAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKuF+buAQFBfX689e/aoe/fucjgc/i4HAAC0gjFGBw4ckMvlUkhI8/tvCDuS9uzZo8TERH+XAQAA2qC4uFgJCQnNLifsSOrevbsk9z9WZGSkn6sBAACtUVVVpcTERM/neHMIO5Ln0FVkZCRhBwCAIHO8U1A4QRkAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI07KAMAYIO6OikvTyotleLjpfR0KTTU31UFBMIOAADBLjdXmjlTKin5uS0hQVq0SMrK8l9dAYLDWAAABLPcXGnCBO+gI0m7d7vbc3P9U1cAIewAABCs6urce3SMabysoW3WLHe/UxhhBwCAYJWX13iPzrGMkYqL3f1OYYQdAACCVWmpb/tZirADAECwio/3bT9LEXYAAAhW6enuq64cjqaXOxxSYqK73ymMsAMAQLAKDXVfXi41DjwNzxcuPOXvt0PYAQAgmGVlSW+9JfXq5d2ekOBu5z473FQQAICgl5UljRvHHZSbQdgBAMAGoaHSiBH+riIgcRgLAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqhB0AAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNcIOAACwGmEHAABYjbADAACs5tews379eo0dO1Yul0sOh0MrV65stu8tt9wih8OhhQsXerWXl5crOztbkZGRioqK0tSpU1VdXd2+hQMAgKDh17Bz8OBBDR06VEuWLGmx39tvv61PPvlELper0bLs7Gx9/fXXWrNmjVatWqX169dr+vTp7VUyAAAIMmH+fPPRo0dr9OjRLfbZvXu3brvtNr3//vu66qqrvJZ98803Wr16tTZt2qThw4dLkp555hmNGTNGTz75ZJPhCAAAnFoC+pyd+vp6TZ48WXfffbfOOeecRsvz8/MVFRXlCTqSlJGRoZCQEBUUFDQ77pEjR1RVVeX1AAAAdgrosPPYY48pLCxMt99+e5PLy8rK1LNnT6+2sLAwRUdHq6ysrNlxFyxYIKfT6XkkJib6tG4AABA4AjbsbNmyRYsWLdKyZcvkcDh8OvacOXNUWVnpeRQXF/t0fAAAEDgCNuzk5eVp37596t27t8LCwhQWFqadO3fqzjvvVJ8+fSRJcXFx2rdvn9framtrVV5erri4uGbHDg8PV2RkpNcDAADYya8nKLdk8uTJysjI8Gq78sorNXnyZN10002SpLS0NFVUVGjLli0aNmyYJGndunWqr69Xampqh9cMAAACj1/DTnV1tbZv3+55XlRUpMLCQkVHR6t3796KiYnx6t+pUyfFxcVpwIABkqRBgwYpMzNT06ZN09KlS1VTU6MZM2Zo4sSJXIkFAAAk+fkw1ubNm5WSkqKUlBRJUk5OjlJSUjRv3rxWj7F8+XINHDhQo0aN0pgxY3TJJZfo+eefb6+SAQBAkHEYY4y/i/C3qqoqOZ1OVVZWcv4OAABBorWf3wF7gjIAAIAvEHYAAIDVCDsAAMBqhB0AAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNcIOAACwGmEHAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqhB0AAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNcIOAACwGmEHAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqfg0769ev19ixY+VyueRwOLRy5Uqv5ffff78GDhyorl276vTTT1dGRoYKCgq8+pSXlys7O1uRkZGKiorS1KlTVV1d3YGzAAAAgcyvYefgwYMaOnSolixZ0uTys846S88++6y+/PJLbdiwQX369NEVV1yhf/7zn54+2dnZ+vrrr7VmzRqtWrVK69ev1/Tp0ztqCgAAIMA5jDHG30VIksPh0Ntvv61rrrmm2T5VVVVyOp364IMPNGrUKH3zzTc6++yztWnTJg0fPlyStHr1ao0ZM0YlJSVyuVyteu+GcSsrKxUZGemL6QAAgHbW2s/voDln5+jRo3r++efldDo1dOhQSVJ+fr6ioqI8QUeSMjIyFBIS0uhw17GOHDmiqqoqrwcAALBTwIedVatWqVu3burSpYuefvpprVmzRj169JAklZWVqWfPnl79w8LCFB0drbKysmbHXLBggZxOp+eRmJjYrnMAAAD+E/BhZ+TIkSosLNTGjRuVmZmpa6+9Vvv27TupMefMmaPKykrPo7i42EfVAgCAQBPwYadr167q16+fLrzwQr300ksKCwvTSy+9JEmKi4trFHxqa2tVXl6uuLi4ZscMDw9XZGSk1wMAANgp4MPOL9XX1+vIkSOSpLS0NFVUVGjLli2e5evWrVN9fb1SU1P9VSIAAAggYf588+rqam3fvt3zvKioSIWFhYqOjlZMTIweeeQRXX311YqPj9f+/fu1ZMkS7d69W7/97W8lSYMGDVJmZqamTZumpUuXqqamRjNmzNDEiRNbfSUWAACwm1/DzubNmzVy5EjP85ycHEnSlClTtHTpUn377bf6wx/+oP379ysmJka/+tWvlJeXp3POOcfzmuXLl2vGjBkaNWqUQkJCNH78eC1evLjD5wIAAAJTwNxnx5+4zw4AAMHHuvvsAAAAtAVhBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNcIOAACwGmEHAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqhB0AAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNcIOAACwGmEHAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqhB0AAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsFqbw86rr76qiy++WC6XSzt37pQkLVy4UO+8847PigMAADhZbQo7zz33nHJycjRmzBhVVFSorq5OkhQVFaWFCxf6sj4AAICT0qaw88wzz+iFF17Qfffdp9DQUE/78OHD9eWXX7Z6nPXr12vs2LFyuVxyOBxauXKlZ1lNTY1mz56tIUOGqGvXrnK5XLrhhhu0Z88erzHKy8uVnZ2tyMhIRUVFaerUqaqurm7LtAAAgIXaFHaKioqUkpLSqD08PFwHDx5s9TgHDx7U0KFDtWTJkkbLDh06pM8++0xz587VZ599ptzcXG3dulVXX321V7/s7Gx9/fXXWrNmjVatWqX169dr+vTpJz4pAABgpbC2vCg5OVmFhYVKSkryal+9erUGDRr
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%matplotlib inline\n",
"plt.xlabel(\"area\")\n",
"plt.ylabel(\"price\")\n",
"plt.scatter(df.area, df.price, c='r')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1dbdd68d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(7, 1)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"area\"]].shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c147b635",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = linear_model.LinearRegression()\n",
"model.fit(df[[\"area\"]].values, df.price.values)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "cd1a7008",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([0.83809367]), 32.70161672527138)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.coef_, model.intercept_"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "d7e74d63",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([115.93269948])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.predict(np.array([[99.31]]))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "4fffcc21",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([115.93269948])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"price = model.coef_ * 99.31 + model.intercept_\n",
"price"
]
},
{
"cell_type": "markdown",
"id": "528624fa",
"metadata": {},
"source": [
"## 1.pickle保存加载模型"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "e29158e1",
"metadata": {},
"outputs": [],
"source": [
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "54dc5fe3",
"metadata": {},
"outputs": [],
"source": [
"with open(\"linear_model.pkl\", 'wb') as f:\n",
" pickle.dump(model, f)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "3871ee93",
"metadata": {},
"outputs": [],
"source": [
"with open(\"linear_model.pkl\", 'rb') as f:\n",
" loaded_model = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "1dd7555a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([0.83809367]), 32.70161672527138)"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loaded_model.coef_, loaded_model.intercept_"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "b5177ecc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([108.13004737])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.predict(np.array([[90]]))"
]
},
{
"cell_type": "markdown",
"id": "a4e5a9cc",
"metadata": {},
"source": [
"## 2.joblib保存加载模型"
]
},
{
"cell_type": "markdown",
"id": "7ad03fc7",
"metadata": {},
"source": [
"注意sklearn 0.21.3以后需要单独安装joblib。"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "d6bec1b5",
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "cannot import name 'joblib' from 'sklearn.externals' (D:\\envs\\stark-lin\\lib\\site-packages\\sklearn\\externals\\__init__.py)",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17608\\1043709584.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexternals\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mjoblib\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mImportError\u001b[0m: cannot import name 'joblib' from 'sklearn.externals' (D:\\envs\\stark-lin\\lib\\site-packages\\sklearn\\externals\\__init__.py)"
]
}
],
"source": [
"from sklearn.externals import joblib"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "23fee355",
"metadata": {},
"outputs": [],
"source": [
"import joblib"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "68b07903",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['joblib_model.pkl']"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joblib.dump(model, 'joblib_model.pkl')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "8814bd49",
"metadata": {},
"outputs": [],
"source": [
"jm = joblib.load('joblib_model.pkl')"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "4b6180a7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([0.83809367]), 32.70161672527138)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"jm.coef_, jm.intercept_"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "42a138b9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([124.89192085])"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"jm.predict([[110]])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfc90bd0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}