{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "760ad04a-fb17-473b-8d72-abe818112c00", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "8ff304f9-7c15-4c17-8dc6-5e75be63d940", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"titanic.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "fdd98246-09d5-4d2b-afb2-cd4c964215d3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdNamePclassSexAgeSibSpParchTicketFareCabinEmbarkedSurvived
01Braund, Mr. Owen Harris3male22.010A/5 211717.2500NaNS0
12Cumings, Mrs. John Bradley (Florence Briggs Th...1female38.010PC 1759971.2833C85C1
23Heikkinen, Miss. Laina3female26.000STON/O2. 31012827.9250NaNS1
34Futrelle, Mrs. Jacques Heath (Lily May Peel)1female35.01011380353.1000C123S1
45Allen, Mr. William Henry3male35.0003734508.0500NaNS0
\n", "
" ], "text/plain": [ " PassengerId Name Pclass \\\n", "0 1 Braund, Mr. Owen Harris 3 \n", "1 2 Cumings, Mrs. John Bradley (Florence Briggs Th... 1 \n", "2 3 Heikkinen, Miss. Laina 3 \n", "3 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 \n", "4 5 Allen, Mr. William Henry 3 \n", "\n", " Sex Age SibSp Parch Ticket Fare Cabin Embarked \\\n", "0 male 22.0 1 0 A/5 21171 7.2500 NaN S \n", "1 female 38.0 1 0 PC 17599 71.2833 C85 C \n", "2 female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S \n", "3 female 35.0 1 0 113803 53.1000 C123 S \n", "4 male 35.0 0 0 373450 8.0500 NaN S \n", "\n", " Survived \n", "0 0 \n", "1 1 \n", "2 1 \n", "3 1 \n", "4 0 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "53d859fe-700a-4794-8fc1-eb1097b9f8a9", "metadata": {}, "outputs": [], "source": [ "df.drop([\"PassengerId\", \"Name\", \"SibSp\", \"Ticket\", \"Cabin\", \"Embarked\"], axis=\"columns\", inplace=True)#inplace=True,直接对数据进行操作" ] }, { "cell_type": "code", "execution_count": 5, "id": "ec9a94d6-2750-4e73-ad95-0b36a4a2f026", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PclassSexAgeParchFareSurvived
03male22.007.25000
11female38.0071.28331
23female26.007.92501
31female35.0053.10001
43male35.008.05000
\n", "
" ], "text/plain": [ " Pclass Sex Age Parch Fare Survived\n", "0 3 male 22.0 0 7.2500 0\n", "1 1 female 38.0 0 71.2833 1\n", "2 3 female 26.0 0 7.9250 1\n", "3 1 female 35.0 0 53.1000 1\n", "4 3 male 35.0 0 8.0500 0" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 6, "id": "6541545a-13f5-4d57-bb08-7c95818b90a8", "metadata": {}, "outputs": [], "source": [ "dummy = pd.get_dummies(df.Sex,dtype=float)" ] }, { "cell_type": "code", "execution_count": 7, "id": "3cad29ec-9221-4e9e-bf8d-de8c6645b2ea", "metadata": {}, "outputs": [], "source": [ "df = pd.concat([df, dummy], axis=1)" ] }, { "cell_type": "code", "execution_count": 8, "id": "cb141724-1db3-4d18-948a-87e530de6038", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PclassSexAgeParchFareSurvivedfemalemale
03male22.007.250000.01.0
11female38.0071.283311.00.0
23female26.007.925011.00.0
31female35.0053.100011.00.0
43male35.008.050000.01.0
\n", "
" ], "text/plain": [ " Pclass Sex Age Parch Fare Survived female male\n", "0 3 male 22.0 0 7.2500 0 0.0 1.0\n", "1 1 female 38.0 0 71.2833 1 1.0 0.0\n", "2 3 female 26.0 0 7.9250 1 1.0 0.0\n", "3 1 female 35.0 0 53.1000 1 1.0 0.0\n", "4 3 male 35.0 0 8.0500 0 0.0 1.0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 9, "id": "f80f8d46-eb88-4d81-9fda-6e1bbc987d14", "metadata": {}, "outputs": [], "source": [ "df = df.drop(\"Sex\", axis=\"columns\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "40f9e39e-4f53-4ef6-a9ab-2973dc1f59bd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PclassAgeParchFareSurvivedfemalemale
0322.007.250000.01.0
1138.0071.283311.00.0
2326.007.925011.00.0
3135.0053.100011.00.0
4335.008.050000.01.0
\n", "
" ], "text/plain": [ " Pclass Age Parch Fare Survived female male\n", "0 3 22.0 0 7.2500 0 0.0 1.0\n", "1 1 38.0 0 71.2833 1 1.0 0.0\n", "2 3 26.0 0 7.9250 1 1.0 0.0\n", "3 1 35.0 0 53.1000 1 1.0 0.0\n", "4 3 35.0 0 8.0500 0 0.0 1.0" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 11, "id": "9765398d-8761-4857-864d-ee2bb381db4f", "metadata": {}, "outputs": [], "source": [ "y = df.Survived" ] }, { "cell_type": "code", "execution_count": 12, "id": "2a25bb9b-bd32-4e9f-b3a1-eebb9827150f", "metadata": {}, "outputs": [], "source": [ "x = df.drop(\"Survived\", axis=\"columns\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "d6578f9c-ecb7-4c8d-9655-0f38dff4732d", "metadata": {}, "outputs": [], "source": [ "x.Age = x.Age.fillna(x.Age.mean())" ] }, { "cell_type": "code", "execution_count": 14, "id": "f61eb7dd-32b4-4f73-930f-5f35e703b262", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 15, "id": "35140d75-f92b-4431-93d3-6ce6c3506ae9", "metadata": {}, "outputs": [], "source": [ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)" ] }, { "cell_type": "code", "execution_count": 16, "id": "85fdca0d-289a-410f-a27f-b0ad1dac7d6b", "metadata": {}, "outputs": [], "source": [ "from sklearn.naive_bayes import GaussianNB" ] }, { "cell_type": "code", "execution_count": 17, "id": "4b84bfda-b2e9-40b2-a1f1-edacc83df97e", "metadata": {}, "outputs": [], "source": [ "gnb = GaussianNB()" ] }, { "cell_type": "code", "execution_count": 18, "id": "7359396a-3920-44fd-9787-7526eaa104cc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GaussianNB()" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gnb.fit(x_train, y_train)" ] }, { "cell_type": "code", "execution_count": 19, "id": "2b043bdd-481e-4d1b-bc6d-05fe754b34b4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7877094972067039" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gnb.score(x_test, y_test)" ] }, { "cell_type": "code", "execution_count": 24, "id": "218dd605-da4f-4794-aec4-3a605bb6aece", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "50 0\n", "627 1\n", "359 1\n", "711 0\n", "249 0\n", "393 1\n", "44 1\n", "80 0\n", "849 1\n", "826 0\n", "Name: Survived, dtype: int64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_test[:10]" ] }, { "cell_type": "code", "execution_count": 21, "id": "a27d97ec-2163-4d5f-bc07-b4538715fe7a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[9.79369123e-01, 2.06308775e-02],\n", " [1.89602239e-03, 9.98103978e-01],\n", " [7.12732233e-02, 9.28726777e-01],\n", " [9.10542388e-01, 8.94576121e-02],\n", " [9.75459216e-01, 2.45407840e-02],\n", " [2.71273089e-04, 9.99728727e-01],\n", " [6.33906625e-02, 9.36609338e-01],\n", " [9.88859325e-01, 1.11406748e-02],\n", " [1.22867429e-03, 9.98771326e-01],\n", " [9.80998751e-01, 1.90012495e-02]])" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gnb.predict_proba(x_test[:10])" ] }, { "cell_type": "code", "execution_count": null, "id": "4d6e364f-7e4a-4259-b101-795bf2f54635", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 5 }