{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "9c8d8367-9587-4e87-ad42-2debf858dfcb", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "bece23f0-ea8b-450b-b82a-63bddc3c1de7", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"diabetes.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "554e5c95-083e-45fc-9438-b2121ec20b4f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", "
" ], "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", "0 6 148 72 35 0 33.6 \n", "1 1 85 66 29 0 26.6 \n", "2 8 183 64 0 0 23.3 \n", "3 1 89 66 23 94 28.1 \n", "4 0 137 40 35 168 43.1 \n", "\n", " DiabetesPedigreeFunction Age Outcome \n", "0 0.627 50 1 \n", "1 0.351 31 0 \n", "2 0.672 32 1 \n", "3 0.167 21 0 \n", "4 2.288 33 1 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "735d8795-1dac-4623-be2d-7fb2a2f5049e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Pregnancies 0\n", "Glucose 0\n", "BloodPressure 0\n", "SkinThickness 0\n", "Insulin 0\n", "BMI 0\n", "DiabetesPedigreeFunction 0\n", "Age 0\n", "Outcome 0\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 5, "id": "7a10a77a-d90f-4b0b-a951-d1c7c46202c7", "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler" ] }, { "cell_type": "code", "execution_count": 7, "id": "301f9c12-aeae-45b1-8689-18cc6e00718d", "metadata": {}, "outputs": [], "source": [ "y = df.Outcome\n", "x = df.drop(\"Outcome\", axis=\"columns\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "fa9a0f52-5647-429c-8b76-df87a3243519", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAge
061487235033.60.62750
11856629026.60.35131
28183640023.30.67232
318966239428.10.16721
40137403516843.12.28833
\n", "
" ], "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", "0 6 148 72 35 0 33.6 \n", "1 1 85 66 29 0 26.6 \n", "2 8 183 64 0 0 23.3 \n", "3 1 89 66 23 94 28.1 \n", "4 0 137 40 35 168 43.1 \n", "\n", " DiabetesPedigreeFunction Age \n", "0 0.627 50 \n", "1 0.351 31 \n", "2 0.672 32 \n", "3 0.167 21 \n", "4 2.288 33 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.head()" ] }, { "cell_type": "code", "execution_count": 10, "id": "c7682595-b4fd-4fbc-8fca-e4a9300a4378", "metadata": {}, "outputs": [], "source": [ "scaler = StandardScaler()" ] }, { "cell_type": "code", "execution_count": 11, "id": "f4af8802-d94e-4fa7-a993-bd33e8fd56a5", "metadata": {}, "outputs": [], "source": [ "x_scalered = scaler.fit_transform(x)" ] }, { "cell_type": "code", "execution_count": 13, "id": "84572647-fa48-4807-b58f-eef4fb7650bc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0.63994726, 0.84832379, 0.14964075, 0.90726993, -0.69289057,\n", " 0.20401277, 0.46849198, 1.4259954 ],\n", " [-0.84488505, -1.12339636, -0.16054575, 0.53090156, -0.69289057,\n", " -0.68442195, -0.36506078, -0.19067191],\n", " [ 1.23388019, 1.94372388, -0.26394125, -1.28821221, -0.69289057,\n", " -1.10325546, 0.60439732, -0.10558415]])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_scalered[:3]" ] }, { "cell_type": "code", "execution_count": 16, "id": "eea39bb8-93dd-4bcd-b509-10cfff7b5fbb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(500, 268)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(y[y==0]), len(y[y==1])#数据还算平衡" ] }, { "cell_type": "code", "execution_count": 19, "id": "725d9479-9a33-4d8b-93b5-256ebef8c078", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.536" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(y[y==1])/len(y[y==0])" ] }, { "cell_type": "code", "execution_count": 30, "id": "9983a9ff-6aff-4f2e-8d3a-4dfe52c0a580", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 31, "id": "2b049926-9155-40c9-803e-a9f8a75da355", "metadata": {}, "outputs": [], "source": [ "x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.3)#stratify使拆分后比例一样" ] }, { "cell_type": "code", "execution_count": 32, "id": "d36e5561-5ded-473d-9161-59ee4e2f7755", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5342857142857143" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(y_train[y_train==1])/len(y_train[y_train==0])" ] }, { "cell_type": "code", "execution_count": 38, "id": "e8c4fdb1-2730-46c5-99f8-d2cb4c6d1d19", "metadata": {}, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeClassifier" ] }, { "cell_type": "code", "execution_count": 39, "id": "f4cdd4a2-b7db-4e0c-bb7e-436796b38f77", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import cross_val_score" ] }, { "cell_type": "code", "execution_count": 41, "id": "62495661-ca1e-43d4-9da7-2591acfcf639", "metadata": {}, "outputs": [], "source": [ "cvs = cross_val_score(DecisionTreeClassifier(), x, y, cv=5)" ] }, { "cell_type": "code", "execution_count": 42, "id": "3d10246e-3e29-4037-a166-10b365381b29", "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 43, "id": "0be2cea0-bfca-44dd-85d3-85de1e14355e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7110432051608522" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(cvs)" ] }, { "cell_type": "code", "execution_count": 44, "id": "da9f0cde-1b1f-45b2-9d18-50de178bdd65", "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import BaggingClassifier" ] }, { "cell_type": "code", "execution_count": 45, "id": "0a7f52d2-f648-4062-946b-f178823bb29d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
BaggingClassifier(estimator=DecisionTreeClassifier(), max_samples=0.8,\n",
       "                  n_estimators=100, oob_score=True, random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "BaggingClassifier(estimator=DecisionTreeClassifier(), max_samples=0.8,\n", " n_estimators=100, oob_score=True, random_state=0)" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bagg = BaggingClassifier(estimator=DecisionTreeClassifier(),\n", " n_estimators=100,\n", " max_samples=0.8,\n", " oob_score=True,\n", " random_state=0\n", " )\n", "bagg.fit(x_train, y_train)" ] }, { "cell_type": "code", "execution_count": 46, "id": "0e94db07-919f-4911-8c37-0516649628f8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7392923649906891" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bagg.oob_score_" ] }, { "cell_type": "code", "execution_count": 47, "id": "6ed41070-4666-4d84-b5c7-3be05809aec9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7662337662337663" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bagg.score(x_test, y_test)" ] }, { "cell_type": "code", "execution_count": 49, "id": "75149b99-951c-4566-b039-ee7142886a97", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7578728461081402" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bagg = BaggingClassifier(estimator=DecisionTreeClassifier(),\n", " n_estimators=100,\n", " max_samples=0.8,\n", " oob_score=True,\n", " random_state=0\n", " )\n", "np.mean(cross_val_score(bagg, x, y, cv=5))" ] }, { "cell_type": "code", "execution_count": 50, "id": "fdec9f7e-4b83-462f-ae41-0aeb1a20da7a", "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier" ] }, { "cell_type": "code", "execution_count": 55, "id": "1ca4c92b-d7c1-4c5e-b2fa-f5331ca4ccd5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7657074951192598" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cross_val_score(RandomForestClassifier(), x, y, cv=5).mean()#随机森林就是bagging" ] }, { "cell_type": "code", "execution_count": null, "id": "08f53b7a-a348-4e12-83e1-3d04d43d7907", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 5 }