Skip to content

Commit 7008d09

Browse files
committed
Restaurant review
1 parent 876c7a3 commit 7008d09

File tree

1 file changed

+359
-0
lines changed

1 file changed

+359
-0
lines changed
Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 16,
6+
"id": "peaceful-legend",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import nltk\n",
11+
"import pandas as pd\n",
12+
"from nltk.stem import WordNetLemmatizer \n",
13+
"lemmatizer = WordNetLemmatizer()"
14+
]
15+
},
16+
{
17+
"cell_type": "code",
18+
"execution_count": 2,
19+
"id": "featured-strategy",
20+
"metadata": {},
21+
"outputs": [
22+
{
23+
"data": {
24+
"text/html": [
25+
"<div>\n",
26+
"<style scoped>\n",
27+
" .dataframe tbody tr th:only-of-type {\n",
28+
" vertical-align: middle;\n",
29+
" }\n",
30+
"\n",
31+
" .dataframe tbody tr th {\n",
32+
" vertical-align: top;\n",
33+
" }\n",
34+
"\n",
35+
" .dataframe thead th {\n",
36+
" text-align: right;\n",
37+
" }\n",
38+
"</style>\n",
39+
"<table border=\"1\" class=\"dataframe\">\n",
40+
" <thead>\n",
41+
" <tr style=\"text-align: right;\">\n",
42+
" <th></th>\n",
43+
" <th>Review</th>\n",
44+
" <th>Liked</th>\n",
45+
" </tr>\n",
46+
" </thead>\n",
47+
" <tbody>\n",
48+
" <tr>\n",
49+
" <th>0</th>\n",
50+
" <td>Wow... Loved this place.</td>\n",
51+
" <td>1</td>\n",
52+
" </tr>\n",
53+
" <tr>\n",
54+
" <th>1</th>\n",
55+
" <td>Crust is not good.</td>\n",
56+
" <td>0</td>\n",
57+
" </tr>\n",
58+
" <tr>\n",
59+
" <th>2</th>\n",
60+
" <td>Not tasty and the texture was just nasty.</td>\n",
61+
" <td>0</td>\n",
62+
" </tr>\n",
63+
" <tr>\n",
64+
" <th>3</th>\n",
65+
" <td>Stopped by during the late May bank holiday of...</td>\n",
66+
" <td>1</td>\n",
67+
" </tr>\n",
68+
" <tr>\n",
69+
" <th>4</th>\n",
70+
" <td>The selection on the menu was great and so wer...</td>\n",
71+
" <td>1</td>\n",
72+
" </tr>\n",
73+
" </tbody>\n",
74+
"</table>\n",
75+
"</div>"
76+
],
77+
"text/plain": [
78+
" Review Liked\n",
79+
"0 Wow... Loved this place. 1\n",
80+
"1 Crust is not good. 0\n",
81+
"2 Not tasty and the texture was just nasty. 0\n",
82+
"3 Stopped by during the late May bank holiday of... 1\n",
83+
"4 The selection on the menu was great and so wer... 1"
84+
]
85+
},
86+
"execution_count": 2,
87+
"metadata": {},
88+
"output_type": "execute_result"
89+
}
90+
],
91+
"source": [
92+
"df = pd.read_csv(\"Restaurant_Reviews.tsv\",sep='\\t')\n",
93+
"df.head()"
94+
]
95+
},
96+
{
97+
"cell_type": "code",
98+
"execution_count": 3,
99+
"id": "colonial-cassette",
100+
"metadata": {},
101+
"outputs": [
102+
{
103+
"data": {
104+
"text/plain": [
105+
"'Wow... Loved this place.'"
106+
]
107+
},
108+
"execution_count": 3,
109+
"metadata": {},
110+
"output_type": "execute_result"
111+
}
112+
],
113+
"source": [
114+
"df.iloc[0]['Review']"
115+
]
116+
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": 4,
120+
"id": "owned-adelaide",
121+
"metadata": {},
122+
"outputs": [],
123+
"source": [
124+
"import re\n",
125+
"from nltk.stem import PorterStemmer\n",
126+
"from nltk.corpus import stopwords\n",
127+
"ps = PorterStemmer()\n",
128+
"\n",
129+
"corpus=[]\n",
130+
"for i in range(0,len(df)):\n",
131+
" Review = re.sub('[^a-zA-Z]',' ',df['Review'][i])\n",
132+
" Review = Review.lower()\n",
133+
" Review = Review.split()\n",
134+
" Review = [ps.stem(word) for word in Review if word not in set(stopwords.words('english'))]\n",
135+
" Review = ' '.join(Review)\n",
136+
" corpus.append(Review)"
137+
]
138+
},
139+
{
140+
"cell_type": "code",
141+
"execution_count": 5,
142+
"id": "ambient-backing",
143+
"metadata": {},
144+
"outputs": [
145+
{
146+
"data": {
147+
"text/plain": [
148+
"'wow love place'"
149+
]
150+
},
151+
"execution_count": 5,
152+
"metadata": {},
153+
"output_type": "execute_result"
154+
}
155+
],
156+
"source": [
157+
"corpus[0]"
158+
]
159+
},
160+
{
161+
"cell_type": "code",
162+
"execution_count": 6,
163+
"id": "brave-start",
164+
"metadata": {},
165+
"outputs": [
166+
{
167+
"data": {
168+
"text/plain": [
169+
"Review 0\n",
170+
"Liked 0\n",
171+
"dtype: int64"
172+
]
173+
},
174+
"execution_count": 6,
175+
"metadata": {},
176+
"output_type": "execute_result"
177+
}
178+
],
179+
"source": [
180+
"df.isnull().sum()"
181+
]
182+
},
183+
{
184+
"cell_type": "code",
185+
"execution_count": 7,
186+
"id": "cloudy-enterprise",
187+
"metadata": {},
188+
"outputs": [],
189+
"source": [
190+
"X = df['Review']\n",
191+
"y = df['Liked']"
192+
]
193+
},
194+
{
195+
"cell_type": "code",
196+
"execution_count": 8,
197+
"id": "intellectual-latino",
198+
"metadata": {},
199+
"outputs": [
200+
{
201+
"data": {
202+
"text/plain": [
203+
"0 50.0\n",
204+
"1 50.0\n",
205+
"Name: Liked, dtype: float64"
206+
]
207+
},
208+
"execution_count": 8,
209+
"metadata": {},
210+
"output_type": "execute_result"
211+
}
212+
],
213+
"source": [
214+
"y.value_counts(1)*100"
215+
]
216+
},
217+
{
218+
"cell_type": "code",
219+
"execution_count": 9,
220+
"id": "returning-cartridge",
221+
"metadata": {},
222+
"outputs": [],
223+
"source": [
224+
"# Train Test Split\n",
225+
"from sklearn.model_selection import train_test_split\n",
226+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 0)"
227+
]
228+
},
229+
{
230+
"cell_type": "code",
231+
"execution_count": 10,
232+
"id": "curious-modem",
233+
"metadata": {},
234+
"outputs": [],
235+
"source": [
236+
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
237+
"vectorizer = TfidfVectorizer()\n",
238+
"X_train_vect = vectorizer.fit_transform(X_train)\n",
239+
"X_test_vect = vectorizer.transform(X_test)"
240+
]
241+
},
242+
{
243+
"cell_type": "code",
244+
"execution_count": 11,
245+
"id": "ordered-consumption",
246+
"metadata": {},
247+
"outputs": [
248+
{
249+
"data": {
250+
"text/plain": [
251+
"LogisticRegression()"
252+
]
253+
},
254+
"execution_count": 11,
255+
"metadata": {},
256+
"output_type": "execute_result"
257+
}
258+
],
259+
"source": [
260+
"from sklearn.linear_model import LogisticRegression\n",
261+
"clf = LogisticRegression(solver='lbfgs')\n",
262+
"clf.fit(X_train_vect,y_train)"
263+
]
264+
},
265+
{
266+
"cell_type": "code",
267+
"execution_count": 12,
268+
"id": "prepared-acrobat",
269+
"metadata": {},
270+
"outputs": [
271+
{
272+
"name": "stdout",
273+
"output_type": "stream",
274+
"text": [
275+
" precision recall f1-score support\n",
276+
"\n",
277+
" 0 0.75 0.85 0.80 143\n",
278+
" 1 0.84 0.75 0.79 157\n",
279+
"\n",
280+
" accuracy 0.79 300\n",
281+
" macro avg 0.80 0.80 0.79 300\n",
282+
"weighted avg 0.80 0.79 0.79 300\n",
283+
"\n"
284+
]
285+
}
286+
],
287+
"source": [
288+
"from sklearn.metrics import accuracy_score, classification_report\n",
289+
"y_pred = clf.predict(X_test_vect)\n",
290+
"accuracy_score(y_test,y_pred)\n",
291+
"print(classification_report(y_test,y_pred))"
292+
]
293+
},
294+
{
295+
"cell_type": "code",
296+
"execution_count": 19,
297+
"id": "universal-moore",
298+
"metadata": {},
299+
"outputs": [
300+
{
301+
"name": "stdout",
302+
"output_type": "stream",
303+
"text": [
304+
"This is Positive Review\n"
305+
]
306+
}
307+
],
308+
"source": [
309+
"test = \"this resturnt is good\" \n",
310+
"\n",
311+
"a = re.sub('[^a-zA-Z]',' ',test)\n",
312+
"a = a.lower()\n",
313+
"a = a.split()\n",
314+
"a = [lemmatizer.lemmatize(word) for word in a ]\n",
315+
"a = ' '.join(a)\n",
316+
"\n",
317+
"\n",
318+
"example_counts = vectorizer.transform([a])\n",
319+
"\n",
320+
"prediction =clf.predict(example_counts)\n",
321+
"prediction[0]\n",
322+
"\n",
323+
"if prediction[0]==0:\n",
324+
" print(\"This is Negative Review\")\n",
325+
"elif prediction[0]==1:\n",
326+
" print(\"This is Positive Review\")"
327+
]
328+
},
329+
{
330+
"cell_type": "code",
331+
"execution_count": null,
332+
"id": "colored-investment",
333+
"metadata": {},
334+
"outputs": [],
335+
"source": []
336+
}
337+
],
338+
"metadata": {
339+
"kernelspec": {
340+
"display_name": "Python 3",
341+
"language": "python",
342+
"name": "python3"
343+
},
344+
"language_info": {
345+
"codemirror_mode": {
346+
"name": "ipython",
347+
"version": 3
348+
},
349+
"file_extension": ".py",
350+
"mimetype": "text/x-python",
351+
"name": "python",
352+
"nbconvert_exporter": "python",
353+
"pygments_lexer": "ipython3",
354+
"version": "3.7.6"
355+
}
356+
},
357+
"nbformat": 4,
358+
"nbformat_minor": 5
359+
}

0 commit comments

Comments
 (0)