{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "4be81fd7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GenderHeightWeightIndex
0Male174964
1Male189872
2Female1851104
3Female1951043
4Male149613
\n", "
" ], "text/plain": [ " Gender Height Weight Index\n", "0 Male 174 96 4\n", "1 Male 189 87 2\n", "2 Female 185 110 4\n", "3 Female 195 104 3\n", "4 Male 149 61 3" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "data = pd.read_csv(\"obese.csv\")\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 2, "id": "c90a3afb", "metadata": {}, "outputs": [], "source": [ "data['obese'] = (data.Index >= 4).astype('int')\n", "data.drop('Index', axis = 1, inplace = True)" ] }, { "cell_type": "code", "execution_count": 3, "id": "d0ad044a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GenderHeightWeightobese
0Male174961
1Male189870
2Female1851101
3Female1951040
4Male149610
\n", "
" ], "text/plain": [ " Gender Height Weight obese\n", "0 Male 174 96 1\n", "1 Male 189 87 0\n", "2 Female 185 110 1\n", "3 Female 195 104 0\n", "4 Male 149 61 0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "dde37f0d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Misclassified when cutting at 100kg: 18 \n", " Misclassified when cutting at 80kg: 63\n" ] } ], "source": [ "print(\n", " \" Misclassified when cutting at 100kg:\",\n", " data.loc[(data['Weight']>=100) & (data['obese']==0),:].shape[0], \"\\n\",\n", " \"Misclassified when cutting at 80kg:\",\n", " data.loc[(data['Weight']>=80) & (data['obese']==0),:].shape[0]\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "id": "0d2e9b73", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Female 0.51\n", "Male 0.49\n", "Name: Gender, dtype: float64\n" ] }, { "data": { "text/plain": [ "0.4998" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def gini_impurity(y):\n", " '''\n", " Given a Pandas Series, it calculates the Gini Impurity. \n", " y: variable with which calculate Gini Impurity.\n", " '''\n", " if isinstance(y, pd.Series):\n", " p = y.value_counts()/y.shape[0]\n", " print(p)\n", " gini = 1-np.sum(p**2)\n", " return(gini)\n", "\n", " else:\n", " raise('Object must be a Pandas Series.')\n", "\n", "gini_impurity(data.Gender) " ] }, { "cell_type": "code", "execution_count": null, "id": "b60120ac", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 5 }