machine_learning/multinomialNB_classifier_15/multinomialNB_classifier.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>Message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ham</td>\n",
       "      <td>Go until jurong point, crazy.. Available only ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ham</td>\n",
       "      <td>Ok lar... Joking wif u oni...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>spam</td>\n",
       "      <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ham</td>\n",
       "      <td>U dun say so early hor... U c already then say...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ham</td>\n",
       "      <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Category                                            Message\n",
       "0      ham  Go until jurong point, crazy.. Available only ...\n",
       "1      ham                      Ok lar... Joking wif u oni...\n",
       "2     spam  Free entry in 2 a wkly comp to win FA Cup fina...\n",
       "3      ham  U dun say so early hor... U c already then say...\n",
       "4      ham  Nah I don't think he goes to usf, he lives aro..."
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"spam.csv\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">Message</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>unique</th>\n",
       "      <th>top</th>\n",
       "      <th>freq</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Category</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ham</th>\n",
       "      <td>4825</td>\n",
       "      <td>4516</td>\n",
       "      <td>Sorry, I'll call later</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>spam</th>\n",
       "      <td>747</td>\n",
       "      <td>641</td>\n",
       "      <td>Please call our customer service representativ...</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Message                                                            \\\n",
       "           count unique                                                top   \n",
       "Category                                                                     \n",
       "ham         4825   4516                             Sorry, I'll call later   \n",
       "spam         747    641  Please call our customer service representativ...   \n",
       "\n",
       "               \n",
       "         freq  \n",
       "Category       \n",
       "ham        30  \n",
       "spam        4  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('Category').describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>Message</th>\n",
       "      <th>spam</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ham</td>\n",
       "      <td>Go until jurong point, crazy.. Available only ...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ham</td>\n",
       "      <td>Ok lar... Joking wif u oni...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>spam</td>\n",
       "      <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ham</td>\n",
       "      <td>U dun say so early hor... U c already then say...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ham</td>\n",
       "      <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Category                                            Message  spam\n",
       "0      ham  Go until jurong point, crazy.. Available only ...     0\n",
       "1      ham                      Ok lar... Joking wif u oni...     0\n",
       "2     spam  Free entry in 2 a wkly comp to win FA Cup fina...     1\n",
       "3      ham  U dun say so early hor... U c already then say...     0\n",
       "4      ham  Nah I don't think he goes to usf, he lives aro...     0"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['spam']=df['Category'].apply(lambda x: 1 if x=='spam' else 0)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(df.Message,df.spam)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "v = CountVectorizer()\n",
    "X_train_count = v.fit_transform(X_train.values)\n",
    "X_train_count.toarray()[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.naive_bayes import MultinomialNB\n",
    "model = MultinomialNB()\n",
    "model.fit(X_train_count,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1], dtype=int64)"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "emails = [\n",
    "    'Hey mohan, can we get together to watch footbal game tomorrow?',\n",
    "    'Upto 20% discount on parking, exclusive offer just for you. Dont miss this reward!'\n",
    "]\n",
    "emails_count = v.transform(emails)\n",
    "model.predict(emails_count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9827709978463748"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test_count = v.transform(X_test)\n",
    "model.score(X_test_count, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Sklearn Pipeline**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "clf = Pipeline([\n",
    "    ('vectorizer', CountVectorizer()),\n",
    "    ('nb', MultinomialNB())\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pipeline(memory=None,\n",
       "     steps=[('vectorizer', CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
       "        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
       "        lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
       "        ngram_range=(1, 1), preprocessor=None, stop_words=None,\n",
       "        strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
       "        tokenizer=None, vocabulary=None)), ('nb', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9827709978463748"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.score(X_test,y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1], dtype=int64)"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.predict(emails)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
sklearn 6 months ago			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"import pandas as pd"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/html": [`
			`"<div>\n",`
			`"<style scoped>\n",`
			`" .dataframe tbody tr th:only-of-type {\n",`
			`" vertical-align: middle;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe tbody tr th {\n",`
			`" vertical-align: top;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe thead th {\n",`
			`" text-align: right;\n",`
			`" }\n",`
			`"</style>\n",`
			`"<table border=\"1\" class=\"dataframe\">\n",`
			`" <thead>\n",`
			`" <tr style=\"text-align: right;\">\n",`
			`" <th></th>\n",`
			`" <th>Category</th>\n",`
			`" <th>Message</th>\n",`
			`" </tr>\n",`
			`" </thead>\n",`
			`" <tbody>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>ham</td>\n",`
			`" <td>Go until jurong point, crazy.. Available only ...</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>1</th>\n",`
			`" <td>ham</td>\n",`
			`" <td>Ok lar... Joking wif u oni...</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>2</th>\n",`
			`" <td>spam</td>\n",`
			`" <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>3</th>\n",`
			`" <td>ham</td>\n",`
			`" <td>U dun say so early hor... U c already then say...</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>4</th>\n",`
			`" <td>ham</td>\n",`
			`" <td>Nah I don't think he goes to usf, he lives aro...</td>\n",`
			`" </tr>\n",`
			`" </tbody>\n",`
			`"</table>\n",`
			`"</div>"`
			`],`
			`"text/plain": [`
			`" Category Message\n",`
			`"0 ham Go until jurong point, crazy.. Available only ...\n",`
			`"1 ham Ok lar... Joking wif u oni...\n",`
			`"2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n",`
			`"3 ham U dun say so early hor... U c already then say...\n",`
			`"4 ham Nah I don't think he goes to usf, he lives aro..."`
			`]`
			`},`
			`"execution_count": 2,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"df = pd.read_csv(\"spam.csv\")\n",`
			`"df.head()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 3,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/html": [`
			`"<div>\n",`
			`"<style scoped>\n",`
			`" .dataframe tbody tr th:only-of-type {\n",`
			`" vertical-align: middle;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe tbody tr th {\n",`
			`" vertical-align: top;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe thead tr th {\n",`
			`" text-align: left;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe thead tr:last-of-type th {\n",`
			`" text-align: right;\n",`
			`" }\n",`
			`"</style>\n",`
			`"<table border=\"1\" class=\"dataframe\">\n",`
			`" <thead>\n",`
			`" <tr>\n",`
			`" <th></th>\n",`
			`" <th colspan=\"4\" halign=\"left\">Message</th>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th></th>\n",`
			`" <th>count</th>\n",`
			`" <th>unique</th>\n",`
			`" <th>top</th>\n",`
			`" <th>freq</th>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>Category</th>\n",`
			`" <th></th>\n",`
			`" <th></th>\n",`
			`" <th></th>\n",`
			`" <th></th>\n",`
			`" </tr>\n",`
			`" </thead>\n",`
			`" <tbody>\n",`
			`" <tr>\n",`
			`" <th>ham</th>\n",`
			`" <td>4825</td>\n",`
			`" <td>4516</td>\n",`
			`" <td>Sorry, I'll call later</td>\n",`
			`" <td>30</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>spam</th>\n",`
			`" <td>747</td>\n",`
			`" <td>641</td>\n",`
			`" <td>Please call our customer service representativ...</td>\n",`
			`" <td>4</td>\n",`
			`" </tr>\n",`
			`" </tbody>\n",`
			`"</table>\n",`
			`"</div>"`
			`],`
			`"text/plain": [`
			`" Message \\\n",`
			`" count unique top \n",`
			`"Category \n",`
			`"ham 4825 4516 Sorry, I'll call later \n",`
			`"spam 747 641 Please call our customer service representativ... \n",`
			`"\n",`
			`" \n",`
			`" freq \n",`
			`"Category \n",`
			`"ham 30 \n",`
			`"spam 4 "`
			`]`
			`},`
			`"execution_count": 3,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"df.groupby('Category').describe()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 4,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/html": [`
			`"<div>\n",`
			`"<style scoped>\n",`
			`" .dataframe tbody tr th:only-of-type {\n",`
			`" vertical-align: middle;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe tbody tr th {\n",`
			`" vertical-align: top;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe thead th {\n",`
			`" text-align: right;\n",`
			`" }\n",`
			`"</style>\n",`
			`"<table border=\"1\" class=\"dataframe\">\n",`
			`" <thead>\n",`
			`" <tr style=\"text-align: right;\">\n",`
			`" <th></th>\n",`
			`" <th>Category</th>\n",`
			`" <th>Message</th>\n",`
			`" <th>spam</th>\n",`
			`" </tr>\n",`
			`" </thead>\n",`
			`" <tbody>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>ham</td>\n",`
			`" <td>Go until jurong point, crazy.. Available only ...</td>\n",`
			`" <td>0</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>1</th>\n",`
			`" <td>ham</td>\n",`
			`" <td>Ok lar... Joking wif u oni...</td>\n",`
			`" <td>0</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>2</th>\n",`
			`" <td>spam</td>\n",`
			`" <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",`
			`" <td>1</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>3</th>\n",`
			`" <td>ham</td>\n",`
			`" <td>U dun say so early hor... U c already then say...</td>\n",`
			`" <td>0</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>4</th>\n",`
			`" <td>ham</td>\n",`
			`" <td>Nah I don't think he goes to usf, he lives aro...</td>\n",`
			`" <td>0</td>\n",`
			`" </tr>\n",`
			`" </tbody>\n",`
			`"</table>\n",`
			`"</div>"`
			`],`
			`"text/plain": [`
			`" Category Message spam\n",`
			`"0 ham Go until jurong point, crazy.. Available only ... 0\n",`
			`"1 ham Ok lar... Joking wif u oni... 0\n",`
			`"2 spam Free entry in 2 a wkly comp to win FA Cup fina... 1\n",`
			`"3 ham U dun say so early hor... U c already then say... 0\n",`
			`"4 ham Nah I don't think he goes to usf, he lives aro... 0"`
			`]`
			`},`
			`"execution_count": 4,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"df['spam']=df['Category'].apply(lambda x: 1 if x=='spam' else 0)\n",`
			`"df.head()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 7,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"from sklearn.model_selection import train_test_split\n",`
			`"X_train, X_test, y_train, y_test = train_test_split(df.Message,df.spam)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 31,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"array([[0, 0, 0, ..., 0, 0, 0],\n",`
			`" [0, 0, 0, ..., 0, 0, 0]], dtype=int64)"`
			`]`
			`},`
			`"execution_count": 31,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"from sklearn.feature_extraction.text import CountVectorizer\n",`
			`"v = CountVectorizer()\n",`
			`"X_train_count = v.fit_transform(X_train.values)\n",`
			`"X_train_count.toarray()[:2]"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 23,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)"`
			`]`
			`},`
			`"execution_count": 23,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"from sklearn.naive_bayes import MultinomialNB\n",`
			`"model = MultinomialNB()\n",`
			`"model.fit(X_train_count,y_train)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 37,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"array([0, 1], dtype=int64)"`
			`]`
			`},`
			`"execution_count": 37,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"emails = [\n",`
			`" 'Hey mohan, can we get together to watch footbal game tomorrow?',\n",`
			`" 'Upto 20% discount on parking, exclusive offer just for you. Dont miss this reward!'\n",`
			`"]\n",`
			`"emails_count = v.transform(emails)\n",`
			`"model.predict(emails_count)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 38,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"0.9827709978463748"`
			`]`
			`},`
			`"execution_count": 38,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"X_test_count = v.transform(X_test)\n",`
			`"model.score(X_test_count, y_test)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"Sklearn Pipeline"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 39,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"from sklearn.pipeline import Pipeline\n",`
			`"clf = Pipeline([\n",`
			`" ('vectorizer', CountVectorizer()),\n",`
			`" ('nb', MultinomialNB())\n",`
			`"])"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 40,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"Pipeline(memory=None,\n",`
			`" steps=[('vectorizer', CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",`
			`" dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",`
			`" lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",`
			`" ngram_range=(1, 1), preprocessor=None, stop_words=None,\n",`
			`" strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",`
			`" tokenizer=None, vocabulary=None)), ('nb', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])"`
			`]`
			`},`
			`"execution_count": 40,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"clf.fit(X_train, y_train)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 41,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"0.9827709978463748"`
			`]`
			`},`
			`"execution_count": 41,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"clf.score(X_test,y_test)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 42,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"array([0, 1], dtype=int64)"`
			`]`
			`},`
			`"execution_count": 42,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"clf.predict(emails)"`
			`]`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.7.3"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`