Skip to content

Commit 9b71752

Browse files
author
rajiv.sambasivan@gmail.com
committed
Added queries for MF knowledge graph.
1 parent 4a6689e commit 9b71752

File tree

1 file changed

+101
-36
lines changed

1 file changed

+101
-36
lines changed

notebooks/Matrix_Factorization_Freq_Shoppers.ipynb

Lines changed: 101 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@
9090
"height": 224
9191
},
9292
"id": "gjdumhTTgkHj",
93-
"outputId": "ba2eeab4-14e8-4c86-f169-13abfbfc4d6e"
93+
"outputId": "2b468ded-ae47-4c98-ef71-61dca67f21c3"
9494
},
9595
"source": [
9696
"import pandas as pd\n",
@@ -732,7 +732,7 @@
732732
"base_uri": "https://localhost:8080/"
733733
},
734734
"id": "X6NbjA1egkHo",
735-
"outputId": "52c847c9-987a-47ad-b806-2ef4a94a72f4"
735+
"outputId": "08e6e404-49a4-4ef0-d326-e445412c1d19"
736736
},
737737
"source": [
738738
"X.shape"
@@ -767,7 +767,7 @@
767767
"base_uri": "https://localhost:8080/"
768768
},
769769
"id": "MmCfqXnGgkHo",
770-
"outputId": "ec4233f8-611a-4bfa-f952-c3a91a8e17d7"
770+
"outputId": "6f1ed09e-e3e9-443d-982e-0440ca097f19"
771771
},
772772
"source": [
773773
"from sklearn.decomposition import NMF\n",
@@ -835,7 +835,7 @@
835835
"height": 556
836836
},
837837
"id": "EBiAR0AdgkHp",
838-
"outputId": "441578f2-25a9-42ea-9c24-0e4fca466221"
838+
"outputId": "319bfc85-c5f3-4b8b-cb6b-a6665ac0af42"
839839
},
840840
"source": [
841841
"import seaborn as sns\n",
@@ -861,7 +861,7 @@
861861
"output_type": "execute_result",
862862
"data": {
863863
"text/plain": [
864-
"<matplotlib.axes._subplots.AxesSubplot at 0x7f9fb8a62b90>"
864+
"<matplotlib.axes._subplots.AxesSubplot at 0x7f772c530790>"
865865
]
866866
},
867867
"metadata": {},
@@ -1001,7 +1001,7 @@
10011001
"height": 1000
10021002
},
10031003
"id": "xDQMiAkWgkHq",
1004-
"outputId": "92e7a2c2-c6fb-4aa5-e95b-210b0911690d"
1004+
"outputId": "9d5111dd-040c-4a13-8245-84826ba93f0d"
10051005
},
10061006
"source": [
10071007
"plot_top_words(model, req_cols, n_top_words, \"MF Based Embedding of Frequent Customers\")"
@@ -1036,7 +1036,7 @@
10361036
"base_uri": "https://localhost:8080/"
10371037
},
10381038
"id": "E3ToNbWogkHr",
1039-
"outputId": "11bea8cf-d670-443b-fbf0-1228a06a7f29"
1039+
"outputId": "56c4a453-e015-400a-b4de-a0f8ec28a572"
10401040
},
10411041
"source": [
10421042
"for k,v in topic_features.items():\n",
@@ -1102,7 +1102,7 @@
11021102
"base_uri": "https://localhost:8080/"
11031103
},
11041104
"id": "F-GCl7y8gkHr",
1105-
"outputId": "5fa6352e-3ced-4cb3-aeee-03a73e68bcdf"
1105+
"outputId": "4adb97c7-c9b8-4304-f1a0-b85f0142d739"
11061106
},
11071107
"source": [
11081108
"W.shape"
@@ -1139,7 +1139,7 @@
11391139
"base_uri": "https://localhost:8080/"
11401140
},
11411141
"id": "Q9n2uF5ZgkHr",
1142-
"outputId": "79f5eb45-db91-47a0-b5c5-93c8ea184add"
1142+
"outputId": "c2fc9eb0-ae93-400f-b79e-0a757ba203b4"
11431143
},
11441144
"source": [
11451145
"import hdbscan\n",
@@ -1182,7 +1182,7 @@
11821182
"base_uri": "https://localhost:8080/"
11831183
},
11841184
"id": "4aOhW1I1gkHs",
1185-
"outputId": "6102cb7c-7105-40f0-df1b-38e6d4b33614"
1185+
"outputId": "dda764d8-f59d-45cd-dbde-94ec26cb3f46"
11861186
},
11871187
"source": [
11881188
"np.unique(clusterer.labels_)"
@@ -1208,7 +1208,7 @@
12081208
"base_uri": "https://localhost:8080/"
12091209
},
12101210
"id": "PwNJesFwe7IT",
1211-
"outputId": "abf58a4f-5011-4aac-c104-c71204c78c72"
1211+
"outputId": "e130dbe9-fec0-4a5d-daf4-339bd4d8c7a4"
12121212
},
12131213
"source": [
12141214
"clusterer.labels_.shape"
@@ -1234,7 +1234,7 @@
12341234
"base_uri": "https://localhost:8080/"
12351235
},
12361236
"id": "WPTzfAm-gkHs",
1237-
"outputId": "a0c5863e-584c-4270-b667-44569ff148a4"
1237+
"outputId": "88758446-5cd5-40e9-af9f-821cc1e75350"
12381238
},
12391239
"source": [
12401240
"W.shape"
@@ -1307,7 +1307,7 @@
13071307
"height": 502
13081308
},
13091309
"id": "4M8ufspxgkHt",
1310-
"outputId": "5e6be679-82e3-4cb2-ae42-77a41c060099"
1310+
"outputId": "1e6c9db7-db39-4acb-ea2e-738dc4bac1a4"
13111311
},
13121312
"source": [
13131313
"cluster_counts = dft[\"cluster\"].value_counts()\n",
@@ -1319,7 +1319,7 @@
13191319
"output_type": "execute_result",
13201320
"data": {
13211321
"text/plain": [
1322-
"<matplotlib.axes._subplots.AxesSubplot at 0x7f9f9a88cd10>"
1322+
"<matplotlib.axes._subplots.AxesSubplot at 0x7f77256c8110>"
13231323
]
13241324
},
13251325
"metadata": {},
@@ -1372,7 +1372,7 @@
13721372
"height": 266
13731373
},
13741374
"id": "4zYpWZUk7TOJ",
1375-
"outputId": "f3e93a71-d84b-455b-e044-574eb8dbe398"
1375+
"outputId": "e7b7eb75-d726-4ed1-fcae-8a8b81b8b72d"
13761376
},
13771377
"source": [
13781378
"colsneeded = [\"AIG_\"+ str(i) for i in range(5)]\n",
@@ -1495,7 +1495,7 @@
14951495
"height": 235
14961496
},
14971497
"id": "7_BcGoK64xLE",
1498-
"outputId": "66a1f507-c44d-4162-8b21-43d80852a066"
1498+
"outputId": "c79980fa-cb30-4118-f1d5-7525fb297ca1"
14991499
},
15001500
"source": [
15011501
"from sklearn import preprocessing\n",
@@ -1611,7 +1611,7 @@
16111611
"height": 503
16121612
},
16131613
"id": "OCalzY0qcwyx",
1614-
"outputId": "3597408e-b4b7-46e8-83e5-72d6b041970e"
1614+
"outputId": "02851c60-b912-4169-d3a8-b9847fdc3eae"
16151615
},
16161616
"source": [
16171617
"import seaborn as sns\n",
@@ -1625,7 +1625,7 @@
16251625
"output_type": "execute_result",
16261626
"data": {
16271627
"text/plain": [
1628-
"<matplotlib.axes._subplots.AxesSubplot at 0x7f9f99462110>"
1628+
"<matplotlib.axes._subplots.AxesSubplot at 0x7f772565a810>"
16291629
]
16301630
},
16311631
"metadata": {},
@@ -1660,7 +1660,7 @@
16601660
"height": 556
16611661
},
16621662
"id": "mB9Pe3CJgkHt",
1663-
"outputId": "a0596278-a6fe-43ff-fd43-f8eeda733317"
1663+
"outputId": "34a5cf99-b698-435a-ae75-174c502d8105"
16641664
},
16651665
"source": [
16661666
"plt.rcParams['figure.figsize'] = [11, 8]\n",
@@ -1683,7 +1683,7 @@
16831683
"output_type": "execute_result",
16841684
"data": {
16851685
"text/plain": [
1686-
"<matplotlib.axes._subplots.AxesSubplot at 0x7f9f9886a510>"
1686+
"<matplotlib.axes._subplots.AxesSubplot at 0x7f7725532190>"
16871687
]
16881688
},
16891689
"metadata": {},
@@ -1718,7 +1718,7 @@
17181718
"base_uri": "https://localhost:8080/"
17191719
},
17201720
"id": "m0Lo8WDsgkHt",
1721-
"outputId": "f111f549-c96d-4dac-9578-9f21261fe361"
1721+
"outputId": "9dec839b-a885-4959-cb3d-0145ea2f1c43"
17221722
},
17231723
"source": [
17241724
"\n",
@@ -1777,9 +1777,9 @@
17771777
" 'DB_service_port': 8529,\n",
17781778
" 'arangodb_replication_factor': 3,\n",
17791779
" 'conn_protocol': 'https',\n",
1780-
" 'dbName': 'MLnhbcqq5p2uffy78m792gw',\n",
1781-
" 'password': 'MLutpuycscsnr9srnluavgco',\n",
1782-
" 'username': 'MLqxvued4g83cfrg0jck23'},\n",
1780+
" 'dbName': 'MLd3glwsq6jyss6f4abo5btf',\n",
1781+
" 'password': 'MLl1li9rhakopyltuqdho4wc',\n",
1782+
" 'username': 'MLz1qsetxx367d83agoc4zo'},\n",
17831783
" 'mlgraph': {'graphname': 'enterprise_ml_graph'}}"
17841784
]
17851785
},
@@ -1951,7 +1951,7 @@
19511951
"base_uri": "https://localhost:8080/"
19521952
},
19531953
"id": "NDFgef8T6Z3O",
1954-
"outputId": "4229c829-d0c9-445f-e4aa-b120eefac0ae"
1954+
"outputId": "d8e5748b-25b4-41ed-9ec5-eba7981c7270"
19551955
},
19561956
"source": [
19571957
"df.shape[0]"
@@ -1986,7 +1986,7 @@
19861986
"base_uri": "https://localhost:8080/"
19871987
},
19881988
"id": "Daz665EvCU3H",
1989-
"outputId": "56697884-5aea-415e-df15-a56c35826593"
1989+
"outputId": "a606a453-41d1-4d70-daf8-326eb367c539"
19901990
},
19911991
"source": [
19921992
"import json\n",
@@ -2026,7 +2026,7 @@
20262026
"output_type": "stream",
20272027
"text": [
20282028
"CPU times: user 3 µs, sys: 0 ns, total: 3 µs\n",
2029-
"Wall time: 5.96 µs\n",
2029+
"Wall time: 5.72 µs\n",
20302030
"Inserting batch 1\n",
20312031
"Inserting batch 2\n",
20322032
"Inserting batch 3\n",
@@ -2062,7 +2062,7 @@
20622062
"base_uri": "https://localhost:8080/"
20632063
},
20642064
"id": "OOapu6CQL41p",
2065-
"outputId": "61b4cd24-8448-43ca-8115-a8445945a375"
2065+
"outputId": "286a94d4-2105-4205-a3f5-d568df79e835"
20662066
},
20672067
"source": [
20682068
"import json\n",
@@ -2098,8 +2098,8 @@
20982098
{
20992099
"output_type": "stream",
21002100
"text": [
2101-
"CPU times: user 3 µs, sys: 1 µs, total: 4 µs\n",
2102-
"Wall time: 6.68 µs\n",
2101+
"CPU times: user 3 µs, sys: 0 ns, total: 3 µs\n",
2102+
"Wall time: 6.2 µs\n",
21032103
"Inserting batch 1\n",
21042104
"Inserting batch 2\n",
21052105
"Inserting batch the last batch!\n"
@@ -2124,7 +2124,7 @@
21242124
"base_uri": "https://localhost:8080/"
21252125
},
21262126
"id": "X_17W43u59kq",
2127-
"outputId": "2c098d75-3918-41a5-dd6e-0322ef299cec"
2127+
"outputId": "c9d782f5-f7c0-4281-bbe5-fb2c1216678c"
21282128
},
21292129
"source": [
21302130
"import json\n",
@@ -2161,8 +2161,8 @@
21612161
{
21622162
"output_type": "stream",
21632163
"text": [
2164-
"CPU times: user 0 ns, sys: 2 µs, total: 2 µs\n",
2165-
"Wall time: 4.53 µs\n",
2164+
"CPU times: user 4 µs, sys: 1 µs, total: 5 µs\n",
2165+
"Wall time: 8.34 µs\n",
21662166
"Inserting batch the last batch!\n"
21672167
],
21682168
"name": "stdout"
@@ -2175,7 +2175,36 @@
21752175
"id": "iyrR2icdt3Th"
21762176
},
21772177
"source": [
2178-
"## Query to find customers similar to a customer"
2178+
"## Query to find customers similar to a customer\n",
2179+
"\n",
2180+
"### AQL Query (can be run through the web UI queries tab)\n",
2181+
"The following query can be used to find customers similar to a customer\n",
2182+
"```/*locate specific user*/\n",
2183+
"LET uemb = (\n",
2184+
"FOR u in Customers\n",
2185+
" FILTER u._id == \"Customers/12748\"\n",
2186+
" FOR j in RANGE(0,4)\n",
2187+
" RETURN TO_NUMBER(NTH(u.mf_emb,j))\n",
2188+
" )\n",
2189+
"\n",
2190+
"/*calculate distance from user to all other users*/\n",
2191+
"LET dau = (\n",
2192+
" FOR v in Customers\n",
2193+
" /* Limit to users that have an embedding*/\n",
2194+
" FILTER HAS(v, \"mf_emb\")\n",
2195+
" LET dv = (SQRT(SUM(\n",
2196+
" \n",
2197+
" FOR i in RANGE(0,4)\n",
2198+
" LET di = TO_NUMBER(NTH(uemb, i)) - TO_NUMBER(NTH(v.mf_emb, i))\n",
2199+
" RETURN POW(di,2)\n",
2200+
" )))\n",
2201+
" RETURN {\"user\": v._id, \"dist\": dv}\n",
2202+
" )\n",
2203+
"/*sort results*/ \n",
2204+
"FOR du in dau\n",
2205+
" SORT du.dist\n",
2206+
" RETURN {\"user\": du.user, \"dist\": du.dist}\n",
2207+
"```\n"
21792208
]
21802209
},
21812210
{
@@ -2193,7 +2222,33 @@
21932222
"id": "LCS6POAauAIE"
21942223
},
21952224
"source": [
2196-
"## Query to find items similar to an item"
2225+
"## Query to find items similar to an item\n",
2226+
"\n",
2227+
"### AQL Query (can be run through the web UI queries tab)\n",
2228+
"The following query can be used to find items similar to an item\n",
2229+
"\n",
2230+
"```\n",
2231+
"LET uemb = (\n",
2232+
"FOR u in Items\n",
2233+
" FILTER u._id == \"Items/22045\"\n",
2234+
" FOR j in RANGE(0,4)\n",
2235+
" RETURN TO_NUMBER(NTH(u.mf_emb,j))\n",
2236+
" )\n",
2237+
"\n",
2238+
"/*calculate distance from item to all other items*/\n",
2239+
"LET dau = (\n",
2240+
" FOR v in Items\n",
2241+
" /* Limit to items that have an embedding*/\n",
2242+
" FILTER HAS(v, \"mf_emb\")\n",
2243+
" LET dv = (SQRT(SUM(\n",
2244+
" \n",
2245+
" FOR i in RANGE(0,4)\n",
2246+
" LET di = TO_NUMBER(NTH(uemb, i)) - TO_NUMBER(NTH(v.mf_emb, i))\n",
2247+
" RETURN POW(di,2)\n",
2248+
" )))\n",
2249+
" RETURN {\"item\": v._id, \"dist\": dv}\n",
2250+
" )\n",
2251+
"```"
21972252
]
21982253
},
21992254
{
@@ -2211,7 +2266,17 @@
22112266
"id": "FCPJHoaquEP3"
22122267
},
22132268
"source": [
2214-
"## Query to find customers in a cluster"
2269+
"## Query to find customers in a cluster\n",
2270+
"\n",
2271+
"### AQL Query (can be run through the web UI queries tab)\n",
2272+
"The following query can be used to find customers in a cluster\n",
2273+
"\n",
2274+
"```\n",
2275+
"for c in Customers\n",
2276+
" FILTER c.cluster == '4'\n",
2277+
" RETURN c \n",
2278+
"\n",
2279+
"```"
22152280
]
22162281
},
22172282
{

0 commit comments

Comments
 (0)