1212tfidf = TfidfVectorizer (stop_words = 'english' )
1313
1414metadata ['shortDescription' ] = metadata ['shortDescription' ].fillna ('' )
15-
1615tfidf_matrix = tfidf .fit_transform (metadata ['shortDescription' ])
1716
1817# Using cosine similarity between descriptions.
@@ -27,10 +26,13 @@ def get_recommendations(cliName, cosine_sim=cosine_sim, k=None):
2726 idx = indices [cliName ]
2827
2928 # If the Index object is iterable, it means there is ambiguity because of same program name.
30- # Curently, we are choosing one and moving ahead, but this will be changed in future.
29+ # As the same program can belong to different platforms with the same or different functionality.
30+ # The ambiguity is resolved by providing a platformid
3131 if hasattr (idx , '__iter__' ):
3232 z = [x for x in idx ]
33- idx = z [1 ]
33+ for indx in z :
34+ if metadata ['platformId' ].iloc [indx ] == platform_id :
35+ idx = indx
3436
3537 # Get the pairwsie similarity scores of all programs with that program
3638 sim_scores = list (enumerate (cosine_sim [idx ]))
@@ -44,14 +46,13 @@ def get_recommendations(cliName, cosine_sim=cosine_sim, k=None):
4446 # Get the program indices
4547 program_indices = [i [0 ] for i in sim_scores ]
4648
47- # import pdb; pdb.set_trace()
48-
4949 # Return the top k or 5 most similar programs
5050 names = metadata ['cliName' ].iloc [program_indices ].tolist ()
5151 description = metadata ['shortDescription' ].iloc [program_indices ].tolist ()
52- recs = {}
53-
52+
53+ recs = dict ()
54+ recs ["recommendations" ] = []
5455 for x , y in zip (names , description ):
55- recs [x ] = y
56+ recs ["recommendations" ]. append ({ "cliName" : x , "shortDescription" : y })
5657
5758 return recs
0 commit comments