|
90 | 90 | "height": 224 |
91 | 91 | }, |
92 | 92 | "id": "gjdumhTTgkHj", |
93 | | - "outputId": "ba2eeab4-14e8-4c86-f169-13abfbfc4d6e" |
| 93 | + "outputId": "2b468ded-ae47-4c98-ef71-61dca67f21c3" |
94 | 94 | }, |
95 | 95 | "source": [ |
96 | 96 | "import pandas as pd\n", |
|
732 | 732 | "base_uri": "https://localhost:8080/" |
733 | 733 | }, |
734 | 734 | "id": "X6NbjA1egkHo", |
735 | | - "outputId": "52c847c9-987a-47ad-b806-2ef4a94a72f4" |
| 735 | + "outputId": "08e6e404-49a4-4ef0-d326-e445412c1d19" |
736 | 736 | }, |
737 | 737 | "source": [ |
738 | 738 | "X.shape" |
|
767 | 767 | "base_uri": "https://localhost:8080/" |
768 | 768 | }, |
769 | 769 | "id": "MmCfqXnGgkHo", |
770 | | - "outputId": "ec4233f8-611a-4bfa-f952-c3a91a8e17d7" |
| 770 | + "outputId": "6f1ed09e-e3e9-443d-982e-0440ca097f19" |
771 | 771 | }, |
772 | 772 | "source": [ |
773 | 773 | "from sklearn.decomposition import NMF\n", |
|
835 | 835 | "height": 556 |
836 | 836 | }, |
837 | 837 | "id": "EBiAR0AdgkHp", |
838 | | - "outputId": "441578f2-25a9-42ea-9c24-0e4fca466221" |
| 838 | + "outputId": "319bfc85-c5f3-4b8b-cb6b-a6665ac0af42" |
839 | 839 | }, |
840 | 840 | "source": [ |
841 | 841 | "import seaborn as sns\n", |
|
861 | 861 | "output_type": "execute_result", |
862 | 862 | "data": { |
863 | 863 | "text/plain": [ |
864 | | - "<matplotlib.axes._subplots.AxesSubplot at 0x7f9fb8a62b90>" |
| 864 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7f772c530790>" |
865 | 865 | ] |
866 | 866 | }, |
867 | 867 | "metadata": {}, |
|
1001 | 1001 | "height": 1000 |
1002 | 1002 | }, |
1003 | 1003 | "id": "xDQMiAkWgkHq", |
1004 | | - "outputId": "92e7a2c2-c6fb-4aa5-e95b-210b0911690d" |
| 1004 | + "outputId": "9d5111dd-040c-4a13-8245-84826ba93f0d" |
1005 | 1005 | }, |
1006 | 1006 | "source": [ |
1007 | 1007 | "plot_top_words(model, req_cols, n_top_words, \"MF Based Embedding of Frequent Customers\")" |
|
1036 | 1036 | "base_uri": "https://localhost:8080/" |
1037 | 1037 | }, |
1038 | 1038 | "id": "E3ToNbWogkHr", |
1039 | | - "outputId": "11bea8cf-d670-443b-fbf0-1228a06a7f29" |
| 1039 | + "outputId": "56c4a453-e015-400a-b4de-a0f8ec28a572" |
1040 | 1040 | }, |
1041 | 1041 | "source": [ |
1042 | 1042 | "for k,v in topic_features.items():\n", |
|
1102 | 1102 | "base_uri": "https://localhost:8080/" |
1103 | 1103 | }, |
1104 | 1104 | "id": "F-GCl7y8gkHr", |
1105 | | - "outputId": "5fa6352e-3ced-4cb3-aeee-03a73e68bcdf" |
| 1105 | + "outputId": "4adb97c7-c9b8-4304-f1a0-b85f0142d739" |
1106 | 1106 | }, |
1107 | 1107 | "source": [ |
1108 | 1108 | "W.shape" |
|
1139 | 1139 | "base_uri": "https://localhost:8080/" |
1140 | 1140 | }, |
1141 | 1141 | "id": "Q9n2uF5ZgkHr", |
1142 | | - "outputId": "79f5eb45-db91-47a0-b5c5-93c8ea184add" |
| 1142 | + "outputId": "c2fc9eb0-ae93-400f-b79e-0a757ba203b4" |
1143 | 1143 | }, |
1144 | 1144 | "source": [ |
1145 | 1145 | "import hdbscan\n", |
|
1182 | 1182 | "base_uri": "https://localhost:8080/" |
1183 | 1183 | }, |
1184 | 1184 | "id": "4aOhW1I1gkHs", |
1185 | | - "outputId": "6102cb7c-7105-40f0-df1b-38e6d4b33614" |
| 1185 | + "outputId": "dda764d8-f59d-45cd-dbde-94ec26cb3f46" |
1186 | 1186 | }, |
1187 | 1187 | "source": [ |
1188 | 1188 | "np.unique(clusterer.labels_)" |
|
1208 | 1208 | "base_uri": "https://localhost:8080/" |
1209 | 1209 | }, |
1210 | 1210 | "id": "PwNJesFwe7IT", |
1211 | | - "outputId": "abf58a4f-5011-4aac-c104-c71204c78c72" |
| 1211 | + "outputId": "e130dbe9-fec0-4a5d-daf4-339bd4d8c7a4" |
1212 | 1212 | }, |
1213 | 1213 | "source": [ |
1214 | 1214 | "clusterer.labels_.shape" |
|
1234 | 1234 | "base_uri": "https://localhost:8080/" |
1235 | 1235 | }, |
1236 | 1236 | "id": "WPTzfAm-gkHs", |
1237 | | - "outputId": "a0c5863e-584c-4270-b667-44569ff148a4" |
| 1237 | + "outputId": "88758446-5cd5-40e9-af9f-821cc1e75350" |
1238 | 1238 | }, |
1239 | 1239 | "source": [ |
1240 | 1240 | "W.shape" |
|
1307 | 1307 | "height": 502 |
1308 | 1308 | }, |
1309 | 1309 | "id": "4M8ufspxgkHt", |
1310 | | - "outputId": "5e6be679-82e3-4cb2-ae42-77a41c060099" |
| 1310 | + "outputId": "1e6c9db7-db39-4acb-ea2e-738dc4bac1a4" |
1311 | 1311 | }, |
1312 | 1312 | "source": [ |
1313 | 1313 | "cluster_counts = dft[\"cluster\"].value_counts()\n", |
|
1319 | 1319 | "output_type": "execute_result", |
1320 | 1320 | "data": { |
1321 | 1321 | "text/plain": [ |
1322 | | - "<matplotlib.axes._subplots.AxesSubplot at 0x7f9f9a88cd10>" |
| 1322 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7f77256c8110>" |
1323 | 1323 | ] |
1324 | 1324 | }, |
1325 | 1325 | "metadata": {}, |
|
1372 | 1372 | "height": 266 |
1373 | 1373 | }, |
1374 | 1374 | "id": "4zYpWZUk7TOJ", |
1375 | | - "outputId": "f3e93a71-d84b-455b-e044-574eb8dbe398" |
| 1375 | + "outputId": "e7b7eb75-d726-4ed1-fcae-8a8b81b8b72d" |
1376 | 1376 | }, |
1377 | 1377 | "source": [ |
1378 | 1378 | "colsneeded = [\"AIG_\"+ str(i) for i in range(5)]\n", |
|
1495 | 1495 | "height": 235 |
1496 | 1496 | }, |
1497 | 1497 | "id": "7_BcGoK64xLE", |
1498 | | - "outputId": "66a1f507-c44d-4162-8b21-43d80852a066" |
| 1498 | + "outputId": "c79980fa-cb30-4118-f1d5-7525fb297ca1" |
1499 | 1499 | }, |
1500 | 1500 | "source": [ |
1501 | 1501 | "from sklearn import preprocessing\n", |
|
1611 | 1611 | "height": 503 |
1612 | 1612 | }, |
1613 | 1613 | "id": "OCalzY0qcwyx", |
1614 | | - "outputId": "3597408e-b4b7-46e8-83e5-72d6b041970e" |
| 1614 | + "outputId": "02851c60-b912-4169-d3a8-b9847fdc3eae" |
1615 | 1615 | }, |
1616 | 1616 | "source": [ |
1617 | 1617 | "import seaborn as sns\n", |
|
1625 | 1625 | "output_type": "execute_result", |
1626 | 1626 | "data": { |
1627 | 1627 | "text/plain": [ |
1628 | | - "<matplotlib.axes._subplots.AxesSubplot at 0x7f9f99462110>" |
| 1628 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7f772565a810>" |
1629 | 1629 | ] |
1630 | 1630 | }, |
1631 | 1631 | "metadata": {}, |
|
1660 | 1660 | "height": 556 |
1661 | 1661 | }, |
1662 | 1662 | "id": "mB9Pe3CJgkHt", |
1663 | | - "outputId": "a0596278-a6fe-43ff-fd43-f8eeda733317" |
| 1663 | + "outputId": "34a5cf99-b698-435a-ae75-174c502d8105" |
1664 | 1664 | }, |
1665 | 1665 | "source": [ |
1666 | 1666 | "plt.rcParams['figure.figsize'] = [11, 8]\n", |
|
1683 | 1683 | "output_type": "execute_result", |
1684 | 1684 | "data": { |
1685 | 1685 | "text/plain": [ |
1686 | | - "<matplotlib.axes._subplots.AxesSubplot at 0x7f9f9886a510>" |
| 1686 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7f7725532190>" |
1687 | 1687 | ] |
1688 | 1688 | }, |
1689 | 1689 | "metadata": {}, |
|
1718 | 1718 | "base_uri": "https://localhost:8080/" |
1719 | 1719 | }, |
1720 | 1720 | "id": "m0Lo8WDsgkHt", |
1721 | | - "outputId": "f111f549-c96d-4dac-9578-9f21261fe361" |
| 1721 | + "outputId": "9dec839b-a885-4959-cb3d-0145ea2f1c43" |
1722 | 1722 | }, |
1723 | 1723 | "source": [ |
1724 | 1724 | "\n", |
|
1777 | 1777 | " 'DB_service_port': 8529,\n", |
1778 | 1778 | " 'arangodb_replication_factor': 3,\n", |
1779 | 1779 | " 'conn_protocol': 'https',\n", |
1780 | | - " 'dbName': 'MLnhbcqq5p2uffy78m792gw',\n", |
1781 | | - " 'password': 'MLutpuycscsnr9srnluavgco',\n", |
1782 | | - " 'username': 'MLqxvued4g83cfrg0jck23'},\n", |
| 1780 | + " 'dbName': 'MLd3glwsq6jyss6f4abo5btf',\n", |
| 1781 | + " 'password': 'MLl1li9rhakopyltuqdho4wc',\n", |
| 1782 | + " 'username': 'MLz1qsetxx367d83agoc4zo'},\n", |
1783 | 1783 | " 'mlgraph': {'graphname': 'enterprise_ml_graph'}}" |
1784 | 1784 | ] |
1785 | 1785 | }, |
|
1951 | 1951 | "base_uri": "https://localhost:8080/" |
1952 | 1952 | }, |
1953 | 1953 | "id": "NDFgef8T6Z3O", |
1954 | | - "outputId": "4229c829-d0c9-445f-e4aa-b120eefac0ae" |
| 1954 | + "outputId": "d8e5748b-25b4-41ed-9ec5-eba7981c7270" |
1955 | 1955 | }, |
1956 | 1956 | "source": [ |
1957 | 1957 | "df.shape[0]" |
|
1986 | 1986 | "base_uri": "https://localhost:8080/" |
1987 | 1987 | }, |
1988 | 1988 | "id": "Daz665EvCU3H", |
1989 | | - "outputId": "56697884-5aea-415e-df15-a56c35826593" |
| 1989 | + "outputId": "a606a453-41d1-4d70-daf8-326eb367c539" |
1990 | 1990 | }, |
1991 | 1991 | "source": [ |
1992 | 1992 | "import json\n", |
|
2026 | 2026 | "output_type": "stream", |
2027 | 2027 | "text": [ |
2028 | 2028 | "CPU times: user 3 µs, sys: 0 ns, total: 3 µs\n", |
2029 | | - "Wall time: 5.96 µs\n", |
| 2029 | + "Wall time: 5.72 µs\n", |
2030 | 2030 | "Inserting batch 1\n", |
2031 | 2031 | "Inserting batch 2\n", |
2032 | 2032 | "Inserting batch 3\n", |
|
2062 | 2062 | "base_uri": "https://localhost:8080/" |
2063 | 2063 | }, |
2064 | 2064 | "id": "OOapu6CQL41p", |
2065 | | - "outputId": "61b4cd24-8448-43ca-8115-a8445945a375" |
| 2065 | + "outputId": "286a94d4-2105-4205-a3f5-d568df79e835" |
2066 | 2066 | }, |
2067 | 2067 | "source": [ |
2068 | 2068 | "import json\n", |
|
2098 | 2098 | { |
2099 | 2099 | "output_type": "stream", |
2100 | 2100 | "text": [ |
2101 | | - "CPU times: user 3 µs, sys: 1 µs, total: 4 µs\n", |
2102 | | - "Wall time: 6.68 µs\n", |
| 2101 | + "CPU times: user 3 µs, sys: 0 ns, total: 3 µs\n", |
| 2102 | + "Wall time: 6.2 µs\n", |
2103 | 2103 | "Inserting batch 1\n", |
2104 | 2104 | "Inserting batch 2\n", |
2105 | 2105 | "Inserting batch the last batch!\n" |
|
2124 | 2124 | "base_uri": "https://localhost:8080/" |
2125 | 2125 | }, |
2126 | 2126 | "id": "X_17W43u59kq", |
2127 | | - "outputId": "2c098d75-3918-41a5-dd6e-0322ef299cec" |
| 2127 | + "outputId": "c9d782f5-f7c0-4281-bbe5-fb2c1216678c" |
2128 | 2128 | }, |
2129 | 2129 | "source": [ |
2130 | 2130 | "import json\n", |
|
2161 | 2161 | { |
2162 | 2162 | "output_type": "stream", |
2163 | 2163 | "text": [ |
2164 | | - "CPU times: user 0 ns, sys: 2 µs, total: 2 µs\n", |
2165 | | - "Wall time: 4.53 µs\n", |
| 2164 | + "CPU times: user 4 µs, sys: 1 µs, total: 5 µs\n", |
| 2165 | + "Wall time: 8.34 µs\n", |
2166 | 2166 | "Inserting batch the last batch!\n" |
2167 | 2167 | ], |
2168 | 2168 | "name": "stdout" |
|
2175 | 2175 | "id": "iyrR2icdt3Th" |
2176 | 2176 | }, |
2177 | 2177 | "source": [ |
2178 | | - "## Query to find customers similar to a customer" |
| 2178 | + "## Query to find customers similar to a customer\n", |
| 2179 | + "\n", |
| 2180 | + "### AQL Query (can be run through the web UI queries tab)\n", |
| 2181 | + "The following query can be used to find customers similar to a customer\n", |
| 2182 | + "```/*locate specific user*/\n", |
| 2183 | + "LET uemb = (\n", |
| 2184 | + "FOR u in Customers\n", |
| 2185 | + " FILTER u._id == \"Customers/12748\"\n", |
| 2186 | + " FOR j in RANGE(0,4)\n", |
| 2187 | + " RETURN TO_NUMBER(NTH(u.mf_emb,j))\n", |
| 2188 | + " )\n", |
| 2189 | + "\n", |
| 2190 | + "/*calculate distance from user to all other users*/\n", |
| 2191 | + "LET dau = (\n", |
| 2192 | + " FOR v in Customers\n", |
| 2193 | + " /* Limit to users that have an embedding*/\n", |
| 2194 | + " FILTER HAS(v, \"mf_emb\")\n", |
| 2195 | + " LET dv = (SQRT(SUM(\n", |
| 2196 | + " \n", |
| 2197 | + " FOR i in RANGE(0,4)\n", |
| 2198 | + " LET di = TO_NUMBER(NTH(uemb, i)) - TO_NUMBER(NTH(v.mf_emb, i))\n", |
| 2199 | + " RETURN POW(di,2)\n", |
| 2200 | + " )))\n", |
| 2201 | + " RETURN {\"user\": v._id, \"dist\": dv}\n", |
| 2202 | + " )\n", |
| 2203 | + "/*sort results*/ \n", |
| 2204 | + "FOR du in dau\n", |
| 2205 | + " SORT du.dist\n", |
| 2206 | + " RETURN {\"user\": du.user, \"dist\": du.dist}\n", |
| 2207 | + "```\n" |
2179 | 2208 | ] |
2180 | 2209 | }, |
2181 | 2210 | { |
|
2193 | 2222 | "id": "LCS6POAauAIE" |
2194 | 2223 | }, |
2195 | 2224 | "source": [ |
2196 | | - "## Query to find items similar to an item" |
| 2225 | + "## Query to find items similar to an item\n", |
| 2226 | + "\n", |
| 2227 | + "### AQL Query (can be run through the web UI queries tab)\n", |
| 2228 | + "The following query can be used to find items similar to an item\n", |
| 2229 | + "\n", |
| 2230 | + "```\n", |
| 2231 | + "LET uemb = (\n", |
| 2232 | + "FOR u in Items\n", |
| 2233 | + " FILTER u._id == \"Items/22045\"\n", |
| 2234 | + " FOR j in RANGE(0,4)\n", |
| 2235 | + " RETURN TO_NUMBER(NTH(u.mf_emb,j))\n", |
| 2236 | + " )\n", |
| 2237 | + "\n", |
| 2238 | + "/*calculate distance from item to all other items*/\n", |
| 2239 | + "LET dau = (\n", |
| 2240 | + " FOR v in Items\n", |
| 2241 | + " /* Limit to items that have an embedding*/\n", |
| 2242 | + " FILTER HAS(v, \"mf_emb\")\n", |
| 2243 | + " LET dv = (SQRT(SUM(\n", |
| 2244 | + " \n", |
| 2245 | + " FOR i in RANGE(0,4)\n", |
| 2246 | + " LET di = TO_NUMBER(NTH(uemb, i)) - TO_NUMBER(NTH(v.mf_emb, i))\n", |
| 2247 | + " RETURN POW(di,2)\n", |
| 2248 | + " )))\n", |
| 2249 | + " RETURN {\"item\": v._id, \"dist\": dv}\n", |
| 2250 | + " )\n", |
| 2251 | + "```" |
2197 | 2252 | ] |
2198 | 2253 | }, |
2199 | 2254 | { |
|
2211 | 2266 | "id": "FCPJHoaquEP3" |
2212 | 2267 | }, |
2213 | 2268 | "source": [ |
2214 | | - "## Query to find customers in a cluster" |
| 2269 | + "## Query to find customers in a cluster\n", |
| 2270 | + "\n", |
| 2271 | + "### AQL Query (can be run through the web UI queries tab)\n", |
| 2272 | + "The following query can be used to find customers in a cluster\n", |
| 2273 | + "\n", |
| 2274 | + "```\n", |
| 2275 | + "for c in Customers\n", |
| 2276 | + " FILTER c.cluster == '4'\n", |
| 2277 | + " RETURN c \n", |
| 2278 | + "\n", |
| 2279 | + "```" |
2215 | 2280 | ] |
2216 | 2281 | }, |
2217 | 2282 | { |
|
0 commit comments