You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

405 lines
44 KiB
Plaintext

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "7d1db536",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "76b100fc",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"carprices.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "806166f8",
"metadata": {},
"outputs": [],
"source": [
"df = df.drop(\"Car Model\", axis=\"columns\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "1dfdbd5a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Mileage</th>\n",
" <th>Sell Price($)</th>\n",
" <th>Age(yrs)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>69000</td>\n",
" <td>18000</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>35000</td>\n",
" <td>34000</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>57000</td>\n",
" <td>26100</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>22500</td>\n",
" <td>40000</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>46000</td>\n",
" <td>31500</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>59000</td>\n",
" <td>29400</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>52000</td>\n",
" <td>32000</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>72000</td>\n",
" <td>19300</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>91000</td>\n",
" <td>12000</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>67000</td>\n",
" <td>22000</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>83000</td>\n",
" <td>20000</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>79000</td>\n",
" <td>21000</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>59000</td>\n",
" <td>33000</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Mileage Sell Price($) Age(yrs)\n",
"0 69000 18000 6\n",
"1 35000 34000 3\n",
"2 57000 26100 5\n",
"3 22500 40000 2\n",
"4 46000 31500 4\n",
"5 59000 29400 5\n",
"6 52000 32000 5\n",
"7 72000 19300 6\n",
"8 91000 12000 8\n",
"9 67000 22000 6\n",
"10 83000 20000 7\n",
"11 79000 21000 7\n",
"12 59000 33000 5"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d76d64b6",
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "acdf9d06",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x1bb29b183c8>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(df[\"Mileage\"], df[\"Sell Price($)\"])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "1627f829",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x1bb28350d08>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(df[\"Age(yrs)\"], df[\"Sell Price($)\"])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "932ff8d6",
"metadata": {},
"outputs": [],
"source": [
"x = df.drop(\"Sell Price($)\", axis=\"columns\")\n",
"y = df[\"Sell Price($)\"]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "a3714dd8",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "f7263cf1",
"metadata": {},
"outputs": [],
"source": [
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=10)#random_state可以保证每次运行结果都一样否则每次都是随机划分。"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "48944228",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(10, 3)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(x_train), len(x_test)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "524b6f13",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "29f0bfaf",
"metadata": {},
"outputs": [],
"source": [
"lr = LinearRegression()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "e60d2dd9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr.fit(x_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "dcefda82",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([41842.49106079, 22531.68057211, 18423.93325387])"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr.predict(x_test)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "ecefdb9b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9224816911971742"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr.score(x_test, y_test)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}