{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "760ad04a-fb17-473b-8d72-abe818112c00",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8ff304f9-7c15-4c17-8dc6-5e75be63d940",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"titanic.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "fdd98246-09d5-4d2b-afb2-cd4c964215d3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PassengerId | \n",
" Name | \n",
" Pclass | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Ticket | \n",
" Fare | \n",
" Cabin | \n",
" Embarked | \n",
" Survived | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Braund, Mr. Owen Harris | \n",
" 3 | \n",
" male | \n",
" 22.0 | \n",
" 1 | \n",
" 0 | \n",
" A/5 21171 | \n",
" 7.2500 | \n",
" NaN | \n",
" S | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
" 1 | \n",
" female | \n",
" 38.0 | \n",
" 1 | \n",
" 0 | \n",
" PC 17599 | \n",
" 71.2833 | \n",
" C85 | \n",
" C | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Heikkinen, Miss. Laina | \n",
" 3 | \n",
" female | \n",
" 26.0 | \n",
" 0 | \n",
" 0 | \n",
" STON/O2. 3101282 | \n",
" 7.9250 | \n",
" NaN | \n",
" S | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
" 1 | \n",
" female | \n",
" 35.0 | \n",
" 1 | \n",
" 0 | \n",
" 113803 | \n",
" 53.1000 | \n",
" C123 | \n",
" S | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" Allen, Mr. William Henry | \n",
" 3 | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 0 | \n",
" 373450 | \n",
" 8.0500 | \n",
" NaN | \n",
" S | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" PassengerId Name Pclass \\\n",
"0 1 Braund, Mr. Owen Harris 3 \n",
"1 2 Cumings, Mrs. John Bradley (Florence Briggs Th... 1 \n",
"2 3 Heikkinen, Miss. Laina 3 \n",
"3 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 \n",
"4 5 Allen, Mr. William Henry 3 \n",
"\n",
" Sex Age SibSp Parch Ticket Fare Cabin Embarked \\\n",
"0 male 22.0 1 0 A/5 21171 7.2500 NaN S \n",
"1 female 38.0 1 0 PC 17599 71.2833 C85 C \n",
"2 female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 female 35.0 1 0 113803 53.1000 C123 S \n",
"4 male 35.0 0 0 373450 8.0500 NaN S \n",
"\n",
" Survived \n",
"0 0 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 0 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "53d859fe-700a-4794-8fc1-eb1097b9f8a9",
"metadata": {},
"outputs": [],
"source": [
"df.drop([\"PassengerId\", \"Name\", \"SibSp\", \"Ticket\", \"Cabin\", \"Embarked\"], axis=\"columns\", inplace=True)#inplace=True,直接对数据进行操作"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ec9a94d6-2750-4e73-ad95-0b36a4a2f026",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pclass | \n",
" Sex | \n",
" Age | \n",
" Parch | \n",
" Fare | \n",
" Survived | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" male | \n",
" 22.0 | \n",
" 0 | \n",
" 7.2500 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" female | \n",
" 38.0 | \n",
" 0 | \n",
" 71.2833 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" female | \n",
" 26.0 | \n",
" 0 | \n",
" 7.9250 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" female | \n",
" 35.0 | \n",
" 0 | \n",
" 53.1000 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 8.0500 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pclass Sex Age Parch Fare Survived\n",
"0 3 male 22.0 0 7.2500 0\n",
"1 1 female 38.0 0 71.2833 1\n",
"2 3 female 26.0 0 7.9250 1\n",
"3 1 female 35.0 0 53.1000 1\n",
"4 3 male 35.0 0 8.0500 0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "6541545a-13f5-4d57-bb08-7c95818b90a8",
"metadata": {},
"outputs": [],
"source": [
"dummy = pd.get_dummies(df.Sex,dtype=float)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "3cad29ec-9221-4e9e-bf8d-de8c6645b2ea",
"metadata": {},
"outputs": [],
"source": [
"df = pd.concat([df, dummy], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "cb141724-1db3-4d18-948a-87e530de6038",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pclass | \n",
" Sex | \n",
" Age | \n",
" Parch | \n",
" Fare | \n",
" Survived | \n",
" female | \n",
" male | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" male | \n",
" 22.0 | \n",
" 0 | \n",
" 7.2500 | \n",
" 0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" female | \n",
" 38.0 | \n",
" 0 | \n",
" 71.2833 | \n",
" 1 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" female | \n",
" 26.0 | \n",
" 0 | \n",
" 7.9250 | \n",
" 1 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" female | \n",
" 35.0 | \n",
" 0 | \n",
" 53.1000 | \n",
" 1 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 8.0500 | \n",
" 0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pclass Sex Age Parch Fare Survived female male\n",
"0 3 male 22.0 0 7.2500 0 0.0 1.0\n",
"1 1 female 38.0 0 71.2833 1 1.0 0.0\n",
"2 3 female 26.0 0 7.9250 1 1.0 0.0\n",
"3 1 female 35.0 0 53.1000 1 1.0 0.0\n",
"4 3 male 35.0 0 8.0500 0 0.0 1.0"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f80f8d46-eb88-4d81-9fda-6e1bbc987d14",
"metadata": {},
"outputs": [],
"source": [
"df = df.drop(\"Sex\", axis=\"columns\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "40f9e39e-4f53-4ef6-a9ab-2973dc1f59bd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pclass | \n",
" Age | \n",
" Parch | \n",
" Fare | \n",
" Survived | \n",
" female | \n",
" male | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" 22.0 | \n",
" 0 | \n",
" 7.2500 | \n",
" 0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 38.0 | \n",
" 0 | \n",
" 71.2833 | \n",
" 1 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 26.0 | \n",
" 0 | \n",
" 7.9250 | \n",
" 1 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 35.0 | \n",
" 0 | \n",
" 53.1000 | \n",
" 1 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" 35.0 | \n",
" 0 | \n",
" 8.0500 | \n",
" 0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pclass Age Parch Fare Survived female male\n",
"0 3 22.0 0 7.2500 0 0.0 1.0\n",
"1 1 38.0 0 71.2833 1 1.0 0.0\n",
"2 3 26.0 0 7.9250 1 1.0 0.0\n",
"3 1 35.0 0 53.1000 1 1.0 0.0\n",
"4 3 35.0 0 8.0500 0 0.0 1.0"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "9765398d-8761-4857-864d-ee2bb381db4f",
"metadata": {},
"outputs": [],
"source": [
"y = df.Survived"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "2a25bb9b-bd32-4e9f-b3a1-eebb9827150f",
"metadata": {},
"outputs": [],
"source": [
"x = df.drop(\"Survived\", axis=\"columns\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d6578f9c-ecb7-4c8d-9655-0f38dff4732d",
"metadata": {},
"outputs": [],
"source": [
"x.Age = x.Age.fillna(x.Age.mean())"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "f61eb7dd-32b4-4f73-930f-5f35e703b262",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "35140d75-f92b-4431-93d3-6ce6c3506ae9",
"metadata": {},
"outputs": [],
"source": [
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "85fdca0d-289a-410f-a27f-b0ad1dac7d6b",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.naive_bayes import GaussianNB"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "4b84bfda-b2e9-40b2-a1f1-edacc83df97e",
"metadata": {},
"outputs": [],
"source": [
"gnb = GaussianNB()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "7359396a-3920-44fd-9787-7526eaa104cc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
],
"text/plain": [
"GaussianNB()"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gnb.fit(x_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "2b043bdd-481e-4d1b-bc6d-05fe754b34b4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.7877094972067039"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gnb.score(x_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "218dd605-da4f-4794-aec4-3a605bb6aece",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"50 0\n",
"627 1\n",
"359 1\n",
"711 0\n",
"249 0\n",
"393 1\n",
"44 1\n",
"80 0\n",
"849 1\n",
"826 0\n",
"Name: Survived, dtype: int64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_test[:10]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "a27d97ec-2163-4d5f-bc07-b4538715fe7a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[9.79369123e-01, 2.06308775e-02],\n",
" [1.89602239e-03, 9.98103978e-01],\n",
" [7.12732233e-02, 9.28726777e-01],\n",
" [9.10542388e-01, 8.94576121e-02],\n",
" [9.75459216e-01, 2.45407840e-02],\n",
" [2.71273089e-04, 9.99728727e-01],\n",
" [6.33906625e-02, 9.36609338e-01],\n",
" [9.88859325e-01, 1.11406748e-02],\n",
" [1.22867429e-03, 9.98771326e-01],\n",
" [9.80998751e-01, 1.90012495e-02]])"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gnb.predict_proba(x_test[:10])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d6e364f-7e4a-4259-b101-795bf2f54635",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}