pyaf
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎WhatsApp_img_notes_extractor/behind_the_scenes/__init__.py‎ b/‎WhatsApp_img_notes_extractor/behind_the_scenes/__init__.py‎
diff --git a/‎WhatsApp_img_notes_extractor/behind_the_scenes/extract.ipynb‎
Lines changed: 158 additions & 0 deletions b/‎WhatsApp_img_notes_extractor/behind_the_scenes/extract.ipynb‎
Lines changed: 158 additions & 0 deletions
diff --git a/‎WhatsApp_img_notes_extractor/behind_the_scenes/model.py‎
Lines changed: 34 additions & 0 deletions b/‎WhatsApp_img_notes_extractor/behind_the_scenes/model.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎Whatsapp_notes_extractor/train.ipynb‎ renamed to ‎WhatsApp_img_notes_extractor/behind_the_scenes/train.ipynb‎
Lines changed: 16 additions & 114 deletions b/‎Whatsapp_notes_extractor/train.ipynb‎ renamed to ‎WhatsApp_img_notes_extractor/behind_the_scenes/train.ipynb‎
Lines changed: 16 additions & 114 deletions
diff --git a/‎Whatsapp_notes_extractor/weights.h5‎ renamed to ‎WhatsApp_img_notes_extractor/behind_the_scenes/weights.h5‎ b/‎Whatsapp_notes_extractor/weights.h5‎ renamed to ‎WhatsApp_img_notes_extractor/behind_the_scenes/weights.h5‎
@@ -9,6 +9,7 @@ __pycache__/
 *.jpeg
 # Distribution / packaging
 .Python
+visualize.ipynb
 proxy/
 resuts.txt
 others/
 
@@ -0,0 +1,158 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import dependencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from keras.preprocessing.image import *\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import os\n",
+ "import random\n",
+ "from glob import glob\n",
+ "from model import CNN_model\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "model = CNN_model() # model architecture defined in model.py\n",
+ "# load trained weights\n",
+ "model.load_weights('weights.h5')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Check model performance on random images"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "img_path = random.choice(glob('WhatsApp Images/*'))\n",
+ "img = load_img(img_path, target_size=(124, 124, 3)) # this is a PIL image\n",
+ "x = img_to_array(img) / 255.0\n",
+ "y = model.predict(np.expand_dims(x, axis=0))\n",
+ "print(np.squeeze(y) > 0.5)\n",
+ "img"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prediction"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def predict(file_path):\n",
+ " '''\n",
+ " predict whether file is a notes image\n",
+ " '''\n",
+ " img = load_img(file_path, target_size=(124, 124, 3))\n",
+ " x = img_to_array(img) / 255. \n",
+ " y = model.predict(np.expand_dims(x, axis=0))\n",
+ " return np.squeeze(y) > 0.5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# create 'notes' folder to store extracted notes images\n",
+ "if not os.path.exists('notes'):\n",
+ " os.mkdir('notes')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# extract notes from WhatsApp Images folder\n",
+ "\n",
+ "files = glob('WhatsApp Images/*.*') + glob('WhatsApp Images/Sent/*.*')\n",
+ "\n",
+ "for file_path in files:\n",
+ " if predict(file_path): # check if the file is one of the notes\n",
+ " file_name = file_path.split('/')[-1] # get file name\n",
+ " os.rename(file_path, 'notes/' + file_name) # move the file to 'notes' folder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python [conda env:ML]",
+ "language": "python",
+ "name": "conda-env-ML-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.4"
+ },
+ "widgets": {
+ "state": {},
+ "version": "1.1.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -0,0 +1,34 @@
+import keras
+from keras.models import *
+from keras.layers import *
+from keras.preprocessing.image import *
+import numpy as np
+
+# ## Define Model
+def CNN_model():
+ 
+ model = Sequential()
+ model.add(Conv2D(32, (3, 3), input_shape=(124, 124, 3)))
+ model.add(Activation('relu'))
+ model.add(MaxPooling2D(pool_size=(2, 2)))
+
+ model.add(Conv2D(32, (3, 3)))
+ model.add(Activation('relu'))
+ model.add(MaxPooling2D(pool_size=(2, 2)))
+
+ model.add(Conv2D(64, (3, 3)))
+ model.add(Activation('relu'))
+ model.add(MaxPooling2D(pool_size=(2, 2)))
+
+ model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
+ model.add(Dense(64))
+ model.add(Activation('relu'))
+ model.add(Dropout(0.5))
+ model.add(Dense(1))
+ model.add(Activation('sigmoid'))
+
+ model.compile(loss='binary_crossentropy',
+ optimizer='adam',
+ metrics=['accuracy'])
+ 
+ return model
@@ -9,25 +9,17 @@
  },
  {
  "cell_type": "code",
- "execution_count": 17,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": null,
+ "metadata": {},
  "outputs": [],
  "source": [
- "import keras\n",
- "from keras.models import *\n",
- "from keras.layers import *\n",
- "from keras.optimizers import *\n",
  "from keras.preprocessing.image import *\n",
- "from keras.callbacks import EarlyStopping\n",
- "from tqdm import tqdm\n",
- "from sklearn.model_selection import train_test_split\n",
  "import numpy as np\n",
- "import pandas as pd\n",
  "import matplotlib.pyplot as plt\n",
  "import os\n",
  "import random\n",
+ "from glob import glob\n",
+ "from model import CNN_model\n",
  "%matplotlib inline"
  ]
  },
@@ -40,17 +32,9 @@
  },
  {
  "cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
  "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Found 734 images belonging to 2 classes.\n"
- ]
- }
- ],
+ "outputs": [],
  "source": [
  "batch_size = 4\n",
  "\n",
@@ -74,65 +58,14 @@
  },
  {
  "cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
  "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "((4, 124, 124, 3), (4,))"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
  "source": [
  "x, y = next(train_generator)\n",
  "x.shape, y.shape"
  ]
  },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Build Model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 43,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "model = Sequential()\n",
- "model.add(Conv2D(32, (3, 3), input_shape=(124, 124, 3)))\n",
- "model.add(Activation('relu'))\n",
- "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
- "\n",
- "model.add(Conv2D(32, (3, 3)))\n",
- "model.add(Activation('relu'))\n",
- "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
- "\n",
- "model.add(Conv2D(64, (3, 3)))\n",
- "model.add(Activation('relu'))\n",
- "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
- "\n",
- "model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors\n",
- "model.add(Dense(64))\n",
- "model.add(Activation('relu'))\n",
- "model.add(Dropout(0.5))\n",
- "model.add(Dense(1))\n",
- "model.add(Activation('sigmoid'))\n",
- "\n",
- "model.compile(loss='binary_crossentropy',\n",
- " optimizer='adam',\n",
- " metrics=['accuracy'])"
- ]
- },
  {
  "cell_type": "markdown",
  "metadata": {},
@@ -142,27 +75,12 @@
  },
  {
  "cell_type": "code",
- "execution_count": 44,
+ "execution_count": null,
  "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Epoch 1/5\n",
- "500/500 [==============================] - 190s 381ms/step - loss: 0.4161 - acc: 0.8125\n",
- "Epoch 2/5\n",
- "500/500 [==============================] - 125s 249ms/step - loss: 0.3499 - acc: 0.8150\n",
- "Epoch 3/5\n",
- "500/500 [==============================] - 164s 328ms/step - loss: 0.3263 - acc: 0.8190\n",
- "Epoch 4/5\n",
- "500/500 [==============================] - 105s 210ms/step - loss: 0.3191 - acc: 0.8410\n",
- "Epoch 5/5\n",
- "500/500 [==============================] - 100s 201ms/step - loss: 0.3118 - acc: 0.8280\n"
- ]
- }
- ],
+ "outputs": [],
  "source": [
+ "model = CNN_model()\n",
+ "\n",
  "model.fit_generator(\n",
  " train_generator,\n",
  " steps_per_epoch=2000 // batch_size,\n",
@@ -179,23 +97,9 @@
  },
  {
  "cell_type": "code",
- "execution_count": 70,
+ "execution_count": null,
  "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([[ True],\n",
- " [ True],\n",
- " [ True],\n",
- " [False]], dtype=bool)"
- ]
- },
- "execution_count": 70,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
  "source": [
  "x, y = next(train_generator)\n",
  "y = y.reshape(len(y), 1)\n",
@@ -208,10 +112,8 @@
  },
  {
  "cell_type": "code",
- "execution_count": 47,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": null,
+ "metadata": {},
  "outputs": [],
  "source": [
  "img_path = random.choice(glob('data/1/*'))\n",