1111
1212
1313def get_client_es ():
14- with open (' ../config.yml' , 'r' ) as file :
14+ with open (" ../config.yml" , "r" ) as file :
1515 config = yaml .safe_load (file )
16- return Elasticsearch (
17- cloud_id = config ['cloud_id' ],
18- api_key = config ['api_key' ]
19- )
16+ return Elasticsearch (cloud_id = config ["cloud_id" ], api_key = config ["api_key" ])
2017
2118
2219def get_text_vector (sentences ):
23- model = SentenceTransformer (' sentence-transformers/all-MiniLM-L6-v2' )
20+ model = SentenceTransformer (" sentence-transformers/all-MiniLM-L6-v2" )
2421 embeddings = model .encode (sentences )
2522 return embeddings
2623
2724
2825def build_query (term = None , categories = None , product_types = None , brands = None ):
29- must_query = [{"match_all" : {}}] if not term else [{
30- "multi_match" : {
31- "query" : term ,
32- "fields" : ["name" , "category" , "description" ]
33- }
34- }]
26+ must_query = (
27+ [{"match_all" : {}}]
28+ if not term
29+ else [
30+ {
31+ "multi_match" : {
32+ "query" : term ,
33+ "fields" : ["name" , "category" , "description" ],
34+ }
35+ }
36+ ]
37+ )
3538
3639 filters = []
3740 if categories :
@@ -42,17 +45,23 @@ def build_query(term=None, categories=None, product_types=None, brands=None):
4245 filters .append ({"terms" : {"brand.keyword" : brands }})
4346
4447 return {
45- "_source" : ["id" , "brand" , "name" , "price" , "currency" , "image_link" , "category" , "tag_list" ],
46- "query" : {
47- "bool" : {
48- "must" : must_query ,
49- "filter" : filters
50- }
51- }
48+ "_source" : [
49+ "id" ,
50+ "brand" ,
51+ "name" ,
52+ "price" ,
53+ "currency" ,
54+ "image_link" ,
55+ "category" ,
56+ "tag_list" ,
57+ ],
58+ "query" : {"bool" : {"must" : must_query , "filter" : filters }},
5259 }
5360
5461
55- def build_hybrid_query (term = None , categories = None , product_types = None , brands = None , hybrid = False ):
62+ def build_hybrid_query (
63+ term = None , categories = None , product_types = None , brands = None , hybrid = False
64+ ):
5665 # Standard query
5766 organic_query = build_query (term , categories , product_types , brands )
5867
@@ -65,81 +74,79 @@ def build_hybrid_query(term=None, categories=None, product_types=None, brands=No
6574 "retriever" : {
6675 "rrf" : {
6776 "retrievers" : [
68- {
69- "standard" : {
70- "query" : organic_query ['query' ]
71- }
72- },
77+ {"standard" : {"query" : organic_query ["query" ]}},
7378 {
7479 "knn" : {
7580 "field" : "description_embeddings" ,
7681 "query_vector" : vector ,
7782 "k" : 5 ,
7883 "num_candidates" : 20 ,
79- "filter" : {
80- "bool" : {
81- "filter" : []
82- }
83- }
84+ "filter" : {"bool" : {"filter" : []}},
8485 }
85- }
86+ },
8687 ],
8788 "rank_window_size" : 20 ,
88- "rank_constant" : 5
89+ "rank_constant" : 5 ,
8990 }
9091 },
91- "_source" : organic_query [' _source' ]
92+ "_source" : organic_query [" _source" ],
9293 }
9394
9495 if categories :
95- query [' retriever' ][ ' rrf' ][ ' retrievers' ][1 ][' knn' ][ ' filter' ][ ' bool' ][ 'filter' ]. append ({
96- "terms" : { "category" : categories }
97- })
96+ query [" retriever" ][ " rrf" ][ " retrievers" ][1 ][" knn" ][ " filter" ][ " bool" ][
97+ "filter"
98+ ]. append ({ "terms" : { "category" : categories } })
9899 if product_types :
99- query [' retriever' ][ ' rrf' ][ ' retrievers' ][1 ][' knn' ][ ' filter' ][ ' bool' ][ 'filter' ]. append ({
100- "terms" : { "product_type" : product_types }
101- })
100+ query [" retriever" ][ " rrf" ][ " retrievers" ][1 ][" knn" ][ " filter" ][ " bool" ][
101+ "filter"
102+ ]. append ({ "terms" : { "product_type" : product_types } })
102103 if brands :
103- query [' retriever' ][ ' rrf' ][ ' retrievers' ][1 ][' knn' ][ ' filter' ][ ' bool' ][ 'filter' ]. append ({
104- "terms" : { "brand.keyword" : brands }
105- })
104+ query [" retriever" ][ " rrf" ][ " retrievers" ][1 ][" knn" ][ " filter" ][ " bool" ][
105+ "filter"
106+ ]. append ({ "terms" : { "brand.keyword" : brands } })
106107 else :
107108 query = organic_query
108109
109110 return query
110111
111112
112- def search_products (term , categories = None , product_types = None , brands = None , promote_products = [], hybrid = False ):
113+ def search_products (
114+ term ,
115+ categories = None ,
116+ product_types = None ,
117+ brands = None ,
118+ promote_products = [],
119+ hybrid = False ,
120+ ):
113121 query = build_hybrid_query (term , categories , product_types , brands , hybrid )
114122
115123 if promote_products and not hybrid :
116124 query = {
117- "query" : {
118- "pinned" : {
119- "ids" : promote_products ,
120- "organic" : query ['query' ]
121- }
122- },
123- "_source" : query ['_source' ]
125+ "query" : {"pinned" : {"ids" : promote_products , "organic" : query ["query" ]}},
126+ "_source" : query ["_source" ],
124127 }
125128
126129 print (query )
127130 response = get_client_es ().search (index = "products-catalog" , body = query , size = 20 )
128131
129132 results = []
130- for hit in response [' hits' ][ ' hits' ]:
133+ for hit in response [" hits" ][ " hits" ]:
131134 print (f"Product Name: { hit ['_source' ]['name' ]} , Score: { hit ['_score' ]} " )
132135
133- results .append ({
134- "id" : hit ['_source' ]['id' ],
135- "brand" : hit ['_source' ]['brand' ],
136- "name" : hit ['_source' ]['name' ],
137- "price" : hit ['_source' ]['price' ],
138- "currency" : hit ['_source' ]['currency' ] if hit ['_source' ]['currency' ] else "USD" ,
139- "image_link" : hit ['_source' ]['image_link' ],
140- "category" : hit ['_source' ]['category' ],
141- "tags" : hit ['_source' ].get ('tag_list' , [])
142- })
136+ results .append (
137+ {
138+ "id" : hit ["_source" ]["id" ],
139+ "brand" : hit ["_source" ]["brand" ],
140+ "name" : hit ["_source" ]["name" ],
141+ "price" : hit ["_source" ]["price" ],
142+ "currency" : (
143+ hit ["_source" ]["currency" ] if hit ["_source" ]["currency" ] else "USD"
144+ ),
145+ "image_link" : hit ["_source" ]["image_link" ],
146+ "category" : hit ["_source" ]["category" ],
147+ "tags" : hit ["_source" ].get ("tag_list" , []),
148+ }
149+ )
143150
144151 return results
145152
@@ -149,51 +156,55 @@ def get_facets_data(term, categories=None, product_types=None, brands=None):
149156 query ["aggs" ] = {
150157 "product_types" : {"terms" : {"field" : "product_type" }},
151158 "categories" : {"terms" : {"field" : "category" }},
152- "brands" : {"terms" : {"field" : "brand.keyword" }}
159+ "brands" : {"terms" : {"field" : "brand.keyword" }},
153160 }
154161 response = get_client_es ().search (index = "products-catalog" , body = query , size = 0 )
155162
156163 return {
157164 "product_types" : [
158- {"product_type" : bucket [' key' ], "count" : bucket [' doc_count' ]}
159- for bucket in response [' aggregations' ][ ' product_types' ][ ' buckets' ]
165+ {"product_type" : bucket [" key" ], "count" : bucket [" doc_count" ]}
166+ for bucket in response [" aggregations" ][ " product_types" ][ " buckets" ]
160167 ],
161168 "categories" : [
162- {"category" : bucket [' key' ], "count" : bucket [' doc_count' ]}
163- for bucket in response [' aggregations' ][ ' categories' ][ ' buckets' ]
169+ {"category" : bucket [" key" ], "count" : bucket [" doc_count" ]}
170+ for bucket in response [" aggregations" ][ " categories" ][ " buckets" ]
164171 ],
165172 "brands" : [
166- {"brand" : bucket [' key' ], "count" : bucket [' doc_count' ]}
167- for bucket in response [' aggregations' ][ ' brands' ][ ' buckets' ]
168- ]
173+ {"brand" : bucket [" key" ], "count" : bucket [" doc_count" ]}
174+ for bucket in response [" aggregations" ][ " brands" ][ " buckets" ]
175+ ],
169176 }
170177
171178
172- @app .route (' /api/products/search' , methods = [' GET' ])
179+ @app .route (" /api/products/search" , methods = [" GET" ])
173180def search ():
174- query = request .args .get ('query' )
175- categories = request .args .getlist ('selectedCategories[]' )
176- product_types = request .args .getlist ('selectedProductTypes[]' )
177- brands = request .args .getlist ('selectedBrands[]' )
178- hybrid = request .args .get ('hybrid' , 'False' ).lower () == 'true'
179- results = search_products (query , categories = categories , product_types = product_types ,
180- brands = brands ,
181- promote_products = promote_products_free_gluten ,
182- hybrid = hybrid )
181+ query = request .args .get ("query" )
182+ categories = request .args .getlist ("selectedCategories[]" )
183+ product_types = request .args .getlist ("selectedProductTypes[]" )
184+ brands = request .args .getlist ("selectedBrands[]" )
185+ hybrid = request .args .get ("hybrid" , "False" ).lower () == "true"
186+ results = search_products (
187+ query ,
188+ categories = categories ,
189+ product_types = product_types ,
190+ brands = brands ,
191+ promote_products = promote_products_free_gluten ,
192+ hybrid = hybrid ,
193+ )
183194 return jsonify (results )
184195
185196
186- @app .route (' /api/products/facets' , methods = [' GET' ])
197+ @app .route (" /api/products/facets" , methods = [" GET" ])
187198def facets ():
188- query = request .args .get (' query' )
189- categories = request .args .getlist (' selectedCategories[]' )
190- product_types = request .args .getlist (' selectedProductTypes[]' )
191- brands = request .args .getlist (' selectedBrands[]' )
192- results = get_facets_data (query , categories = categories ,
193- product_types = product_types ,
194- brands = brands )
199+ query = request .args .get (" query" )
200+ categories = request .args .getlist (" selectedCategories[]" )
201+ product_types = request .args .getlist (" selectedProductTypes[]" )
202+ brands = request .args .getlist (" selectedBrands[]" )
203+ results = get_facets_data (
204+ query , categories = categories , product_types = product_types , brands = brands
205+ )
195206 return jsonify (results )
196207
197208
198- if __name__ == ' __main__' :
209+ if __name__ == " __main__" :
199210 app .run (debug = True )
0 commit comments