You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

483 lines
10 KiB
Plaintext

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d60aff11",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.linear_model import LinearRegression"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3aafae87",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"multivariable.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5ecbf4af",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area</th>\n",
" <th>age</th>\n",
" <th>bashroom</th>\n",
" <th>price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>88.54</td>\n",
" <td>5</td>\n",
" <td>1.0</td>\n",
" <td>118.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>93.36</td>\n",
" <td>8</td>\n",
" <td>1.0</td>\n",
" <td>114.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>98.90</td>\n",
" <td>13</td>\n",
" <td>2.0</td>\n",
" <td>102.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>98.58</td>\n",
" <td>5</td>\n",
" <td>2.0</td>\n",
" <td>118.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>92.26</td>\n",
" <td>5</td>\n",
" <td>2.0</td>\n",
" <td>95.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>88.94</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>118.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>89.57</td>\n",
" <td>14</td>\n",
" <td>NaN</td>\n",
" <td>127.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area age bashroom price\n",
"0 88.54 5 1.0 118.0\n",
"1 93.36 8 1.0 114.0\n",
"2 98.90 13 2.0 102.0\n",
"3 98.58 5 2.0 118.4\n",
"4 92.26 5 2.0 95.0\n",
"5 88.94 3 1.0 118.0\n",
"6 89.57 14 NaN 127.0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df#数据存在NaN需要处理"
]
},
{
"cell_type": "markdown",
"id": "8cab6f22",
"metadata": {},
"source": [
"$$price = ax_1 + bx_2 + cx_3 + m$$"
]
},
{
"cell_type": "markdown",
"id": "4087cc5f",
"metadata": {},
"source": [
"## 1.数据处理"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "cef313ea",
"metadata": {},
"outputs": [],
"source": [
"median = df.bashroom.median()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f59d79bc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.5"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"median#卫生间没有小数,需要向下取整"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f49989d8",
"metadata": {},
"outputs": [],
"source": [
"df = df.fillna(np.floor(median))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "ee51756b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area</th>\n",
" <th>age</th>\n",
" <th>bashroom</th>\n",
" <th>price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>88.54</td>\n",
" <td>5</td>\n",
" <td>1.0</td>\n",
" <td>118.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>93.36</td>\n",
" <td>8</td>\n",
" <td>1.0</td>\n",
" <td>114.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>98.90</td>\n",
" <td>13</td>\n",
" <td>2.0</td>\n",
" <td>102.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>98.58</td>\n",
" <td>5</td>\n",
" <td>2.0</td>\n",
" <td>118.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>92.26</td>\n",
" <td>5</td>\n",
" <td>2.0</td>\n",
" <td>95.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>88.94</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>118.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>89.57</td>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>127.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area age bashroom price\n",
"0 88.54 5 1.0 118.0\n",
"1 93.36 8 1.0 114.0\n",
"2 98.90 13 2.0 102.0\n",
"3 98.58 5 2.0 118.4\n",
"4 92.26 5 2.0 95.0\n",
"5 88.94 3 1.0 118.0\n",
"6 89.57 14 1.0 127.0"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"id": "e41d72fc",
"metadata": {},
"source": [
"## 2.训练模型"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "39295dee",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = LinearRegression()\n",
"model.fit(df[[\"area\", \"age\", \"bashroom\"]].values, df.price.values)"
]
},
{
"cell_type": "markdown",
"id": "c1b812cb",
"metadata": {},
"source": [
"## 3.模型测试"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "636c57cb",
"metadata": {},
"outputs": [],
"source": [
"pred = model.predict(np.array([[105, 2, 1]]))"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "4928085a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([134.39931808])"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "d6dea269",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1.03253137, 0.04233053, -20.81194367])"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.coef_"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "7cd87a3a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1.032531365235341, 0.042330533476314436, -20.81194367389132)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.coef_[0], model.coef_[1], model.coef_[2]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "a64c1cc5",
"metadata": {},
"outputs": [],
"source": [
"price = model.coef_[0] * 105 + model.coef_[1] * 2 + model.coef_[2]* 1 + model.intercept_"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "c02a4f36",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"134.3993180794738"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"price"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "862d7fab",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ True])"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"price == pred"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}