Skip to content

Commit 0f64f5d

Browse files
author
rishabhiitbhu
committed
script written, code refractoring
1 parent c8ca2d8 commit 0f64f5d

File tree

10 files changed

+281
-322
lines changed

10 files changed

+281
-322
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ __pycache__/
99
*.jpeg
1010
# Distribution / packaging
1111
.Python
12+
visualize.ipynb
1213
proxy/
1314
resuts.txt
1415
others/

WhatsApp_img_notes_extractor/behind_the_scenes/__init__.py

Whitespace-only changes.
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"## Import dependencies"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": null,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"from keras.preprocessing.image import *\n",
17+
"import numpy as np\n",
18+
"import matplotlib.pyplot as plt\n",
19+
"import os\n",
20+
"import random\n",
21+
"from glob import glob\n",
22+
"from model import CNN_model\n",
23+
"%matplotlib inline"
24+
]
25+
},
26+
{
27+
"cell_type": "markdown",
28+
"metadata": {},
29+
"source": [
30+
"## Model"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": null,
36+
"metadata": {
37+
"collapsed": true
38+
},
39+
"outputs": [],
40+
"source": [
41+
"model = CNN_model() # model architecture defined in model.py\n",
42+
"# load trained weights\n",
43+
"model.load_weights('weights.h5')"
44+
]
45+
},
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {},
49+
"source": [
50+
"## Check model performance on random images"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": null,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"img_path = random.choice(glob('WhatsApp Images/*'))\n",
60+
"img = load_img(img_path, target_size=(124, 124, 3)) # this is a PIL image\n",
61+
"x = img_to_array(img) / 255.0\n",
62+
"y = model.predict(np.expand_dims(x, axis=0))\n",
63+
"print(np.squeeze(y) > 0.5)\n",
64+
"img"
65+
]
66+
},
67+
{
68+
"cell_type": "markdown",
69+
"metadata": {},
70+
"source": [
71+
"## Prediction"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": null,
77+
"metadata": {
78+
"collapsed": true
79+
},
80+
"outputs": [],
81+
"source": [
82+
"def predict(file_path):\n",
83+
" '''\n",
84+
" predict whether file is a notes image\n",
85+
" '''\n",
86+
" img = load_img(file_path, target_size=(124, 124, 3))\n",
87+
" x = img_to_array(img) / 255. \n",
88+
" y = model.predict(np.expand_dims(x, axis=0))\n",
89+
" return np.squeeze(y) > 0.5"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": null,
95+
"metadata": {
96+
"collapsed": true
97+
},
98+
"outputs": [],
99+
"source": [
100+
"# create 'notes' folder to store extracted notes images\n",
101+
"if not os.path.exists('notes'):\n",
102+
" os.mkdir('notes')"
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": null,
108+
"metadata": {
109+
"collapsed": true
110+
},
111+
"outputs": [],
112+
"source": [
113+
"# extract notes from WhatsApp Images folder\n",
114+
"\n",
115+
"files = glob('WhatsApp Images/*.*') + glob('WhatsApp Images/Sent/*.*')\n",
116+
"\n",
117+
"for file_path in files:\n",
118+
" if predict(file_path): # check if the file is one of the notes\n",
119+
" file_name = file_path.split('/')[-1] # get file name\n",
120+
" os.rename(file_path, 'notes/' + file_name) # move the file to 'notes' folder"
121+
]
122+
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": null,
126+
"metadata": {
127+
"collapsed": true
128+
},
129+
"outputs": [],
130+
"source": []
131+
}
132+
],
133+
"metadata": {
134+
"kernelspec": {
135+
"display_name": "Python [conda env:ML]",
136+
"language": "python",
137+
"name": "conda-env-ML-py"
138+
},
139+
"language_info": {
140+
"codemirror_mode": {
141+
"name": "ipython",
142+
"version": 3
143+
},
144+
"file_extension": ".py",
145+
"mimetype": "text/x-python",
146+
"name": "python",
147+
"nbconvert_exporter": "python",
148+
"pygments_lexer": "ipython3",
149+
"version": "3.5.4"
150+
},
151+
"widgets": {
152+
"state": {},
153+
"version": "1.1.2"
154+
}
155+
},
156+
"nbformat": 4,
157+
"nbformat_minor": 2
158+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import keras
2+
from keras.models import *
3+
from keras.layers import *
4+
from keras.preprocessing.image import *
5+
import numpy as np
6+
7+
# ## Define Model
8+
def CNN_model():
9+
10+
model = Sequential()
11+
model.add(Conv2D(32, (3, 3), input_shape=(124, 124, 3)))
12+
model.add(Activation('relu'))
13+
model.add(MaxPooling2D(pool_size=(2, 2)))
14+
15+
model.add(Conv2D(32, (3, 3)))
16+
model.add(Activation('relu'))
17+
model.add(MaxPooling2D(pool_size=(2, 2)))
18+
19+
model.add(Conv2D(64, (3, 3)))
20+
model.add(Activation('relu'))
21+
model.add(MaxPooling2D(pool_size=(2, 2)))
22+
23+
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
24+
model.add(Dense(64))
25+
model.add(Activation('relu'))
26+
model.add(Dropout(0.5))
27+
model.add(Dense(1))
28+
model.add(Activation('sigmoid'))
29+
30+
model.compile(loss='binary_crossentropy',
31+
optimizer='adam',
32+
metrics=['accuracy'])
33+
34+
return model

Whatsapp_notes_extractor/train.ipynb renamed to WhatsApp_img_notes_extractor/behind_the_scenes/train.ipynb

Lines changed: 16 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,17 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 17,
13-
"metadata": {
14-
"collapsed": true
15-
},
12+
"execution_count": null,
13+
"metadata": {},
1614
"outputs": [],
1715
"source": [
18-
"import keras\n",
19-
"from keras.models import *\n",
20-
"from keras.layers import *\n",
21-
"from keras.optimizers import *\n",
2216
"from keras.preprocessing.image import *\n",
23-
"from keras.callbacks import EarlyStopping\n",
24-
"from tqdm import tqdm\n",
25-
"from sklearn.model_selection import train_test_split\n",
2617
"import numpy as np\n",
27-
"import pandas as pd\n",
2818
"import matplotlib.pyplot as plt\n",
2919
"import os\n",
3020
"import random\n",
21+
"from glob import glob\n",
22+
"from model import CNN_model\n",
3123
"%matplotlib inline"
3224
]
3325
},
@@ -40,17 +32,9 @@
4032
},
4133
{
4234
"cell_type": "code",
43-
"execution_count": 3,
35+
"execution_count": null,
4436
"metadata": {},
45-
"outputs": [
46-
{
47-
"name": "stdout",
48-
"output_type": "stream",
49-
"text": [
50-
"Found 734 images belonging to 2 classes.\n"
51-
]
52-
}
53-
],
37+
"outputs": [],
5438
"source": [
5539
"batch_size = 4\n",
5640
"\n",
@@ -74,65 +58,14 @@
7458
},
7559
{
7660
"cell_type": "code",
77-
"execution_count": 11,
61+
"execution_count": null,
7862
"metadata": {},
79-
"outputs": [
80-
{
81-
"data": {
82-
"text/plain": [
83-
"((4, 124, 124, 3), (4,))"
84-
]
85-
},
86-
"execution_count": 11,
87-
"metadata": {},
88-
"output_type": "execute_result"
89-
}
90-
],
63+
"outputs": [],
9164
"source": [
9265
"x, y = next(train_generator)\n",
9366
"x.shape, y.shape"
9467
]
9568
},
96-
{
97-
"cell_type": "markdown",
98-
"metadata": {},
99-
"source": [
100-
"## Build Model"
101-
]
102-
},
103-
{
104-
"cell_type": "code",
105-
"execution_count": 43,
106-
"metadata": {
107-
"collapsed": true
108-
},
109-
"outputs": [],
110-
"source": [
111-
"model = Sequential()\n",
112-
"model.add(Conv2D(32, (3, 3), input_shape=(124, 124, 3)))\n",
113-
"model.add(Activation('relu'))\n",
114-
"model.add(MaxPooling2D(pool_size=(2, 2)))\n",
115-
"\n",
116-
"model.add(Conv2D(32, (3, 3)))\n",
117-
"model.add(Activation('relu'))\n",
118-
"model.add(MaxPooling2D(pool_size=(2, 2)))\n",
119-
"\n",
120-
"model.add(Conv2D(64, (3, 3)))\n",
121-
"model.add(Activation('relu'))\n",
122-
"model.add(MaxPooling2D(pool_size=(2, 2)))\n",
123-
"\n",
124-
"model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors\n",
125-
"model.add(Dense(64))\n",
126-
"model.add(Activation('relu'))\n",
127-
"model.add(Dropout(0.5))\n",
128-
"model.add(Dense(1))\n",
129-
"model.add(Activation('sigmoid'))\n",
130-
"\n",
131-
"model.compile(loss='binary_crossentropy',\n",
132-
" optimizer='adam',\n",
133-
" metrics=['accuracy'])"
134-
]
135-
},
13669
{
13770
"cell_type": "markdown",
13871
"metadata": {},
@@ -142,27 +75,12 @@
14275
},
14376
{
14477
"cell_type": "code",
145-
"execution_count": 44,
78+
"execution_count": null,
14679
"metadata": {},
147-
"outputs": [
148-
{
149-
"name": "stdout",
150-
"output_type": "stream",
151-
"text": [
152-
"Epoch 1/5\n",
153-
"500/500 [==============================] - 190s 381ms/step - loss: 0.4161 - acc: 0.8125\n",
154-
"Epoch 2/5\n",
155-
"500/500 [==============================] - 125s 249ms/step - loss: 0.3499 - acc: 0.8150\n",
156-
"Epoch 3/5\n",
157-
"500/500 [==============================] - 164s 328ms/step - loss: 0.3263 - acc: 0.8190\n",
158-
"Epoch 4/5\n",
159-
"500/500 [==============================] - 105s 210ms/step - loss: 0.3191 - acc: 0.8410\n",
160-
"Epoch 5/5\n",
161-
"500/500 [==============================] - 100s 201ms/step - loss: 0.3118 - acc: 0.8280\n"
162-
]
163-
}
164-
],
80+
"outputs": [],
16581
"source": [
82+
"model = CNN_model()\n",
83+
"\n",
16684
"model.fit_generator(\n",
16785
" train_generator,\n",
16886
" steps_per_epoch=2000 // batch_size,\n",
@@ -179,23 +97,9 @@
17997
},
18098
{
18199
"cell_type": "code",
182-
"execution_count": 70,
100+
"execution_count": null,
183101
"metadata": {},
184-
"outputs": [
185-
{
186-
"data": {
187-
"text/plain": [
188-
"array([[ True],\n",
189-
" [ True],\n",
190-
" [ True],\n",
191-
" [False]], dtype=bool)"
192-
]
193-
},
194-
"execution_count": 70,
195-
"metadata": {},
196-
"output_type": "execute_result"
197-
}
198-
],
102+
"outputs": [],
199103
"source": [
200104
"x, y = next(train_generator)\n",
201105
"y = y.reshape(len(y), 1)\n",
@@ -208,10 +112,8 @@
208112
},
209113
{
210114
"cell_type": "code",
211-
"execution_count": 47,
212-
"metadata": {
213-
"collapsed": true
214-
},
115+
"execution_count": null,
116+
"metadata": {},
215117
"outputs": [],
216118
"source": [
217119
"img_path = random.choice(glob('data/1/*'))\n",

0 commit comments

Comments
 (0)