55
66
77def cover (
8- relevances : np .ndarray ,
98 embeddings : np .ndarray ,
9+ scores : np .ndarray ,
1010 k : int ,
1111 theta : float = 0.5 ,
1212 gamma : float = 0.5 ,
@@ -19,8 +19,8 @@ def cover(
1919 This strategy chooses `k` items by combining pure relevance with
2020 diversity-driven coverage using a concave submodular formulation.
2121
22- :param relevances: 1D array of relevance scores for each item.
2322 :param embeddings: 2D array of shape (n_samples, n_features).
23+ :param scores: 1D array of relevance scores for each item.
2424 :param k: Number of items to select.
2525 :param theta: Trade-off between relevance and coverage in [0, 1].
2626 1.0 = pure relevance, 0.0 = pure coverage.
@@ -31,47 +31,55 @@ def cover(
3131 :raises ValueError: If theta is not in [0, 1].
3232 :raises ValueError: If gamma is not in (0, 1].
3333 """
34+ # Validate parameters
3435 if not (0.0 <= float (theta ) <= 1.0 ):
3536 raise ValueError ("theta must be in [0, 1]" )
3637 if not (0.0 < float (gamma ) <= 1.0 ):
3738 raise ValueError ("gamma must be in (0, 1]" )
3839
39- relevance_scores , feature_matrix , top_k , early_exit = prepare_inputs (relevances , embeddings , k )
40+ # Prepare inputs
41+ relevance_scores , feature_matrix , top_k , early_exit = prepare_inputs (scores , embeddings , k )
4042 if early_exit :
43+ # Nothing to select: return empty arrays
4144 return np .empty (0 , np .int32 ), np .empty (0 , np .float32 )
4245
4346 if metric == Metric .COSINE and normalize :
47+ # Normalize feature vectors to unit length for cosine similarity
4448 feature_matrix = normalize_rows (feature_matrix )
4549
46- # Pure relevance: short-circuit
4750 if float (theta ) == 1.0 :
51+ # Pure relevance: select top-k by relevance scores
4852 topk = np .argsort (- relevance_scores )[:top_k ].astype (np .int32 )
4953 gains = relevance_scores [topk ].astype (np .float32 , copy = False )
5054 return topk , gains
5155
52- # Nonnegative similarities for coverage to avoid concave-power NaNs
56+ # Compute non-negative similarities for coverage to avoid concave-power NaNs
5357 similarity_matrix = pairwise_similarity (feature_matrix , metric )
54- transposed_similarity = similarity_matrix .T
58+ transposed_similarity_matrix = similarity_matrix .T
5559
56- n = similarity_matrix . shape [ 0 ]
57- accumulated_coverage = np .zeros (n , dtype = np .float32 )
58- selected_mask = np .zeros (n , dtype = bool )
60+ # Initialize selection state
61+ accumulated_coverage = np .zeros (similarity_matrix . shape [ 0 ] , dtype = np .float32 )
62+ selected_mask = np .zeros (similarity_matrix . shape [ 0 ] , dtype = bool )
5963 selected_indices = np .empty (top_k , dtype = np .int32 )
6064 marginal_gains = np .empty (top_k , dtype = np .float32 )
6165
62- for t in range (top_k ):
66+ for step in range (top_k ):
67+ # Compute coverage gains using concave transformation
6368 concave_before = np .power (accumulated_coverage , gamma )
64- concave_after = np .power (transposed_similarity + accumulated_coverage [None , :], gamma )
69+ concave_after = np .power (transposed_similarity_matrix + accumulated_coverage [None , :], gamma )
6570 coverage_gains = (concave_after - concave_before [None , :]).sum (axis = 1 )
6671
72+ # Combine relevance and coverage gains
6773 candidate_scores = theta * relevance_scores + (1.0 - theta ) * coverage_gains
6874 candidate_scores [selected_mask ] = - np .inf
6975
70- chosen = int (np .argmax (candidate_scores ))
71- selected_indices [t ] = chosen
72- marginal_gains [t ] = float (candidate_scores [chosen ])
73- selected_mask [chosen ] = True
76+ # Select item with highest combined score
77+ best_index = int (np .argmax (candidate_scores ))
78+ selected_indices [step ] = best_index
79+ marginal_gains [step ] = float (candidate_scores [best_index ])
80+ selected_mask [best_index ] = True
7481
75- accumulated_coverage += similarity_matrix [:, chosen ]
82+ # Update accumulated coverage
83+ accumulated_coverage += similarity_matrix [:, best_index ]
7684
7785 return selected_indices , marginal_gains
0 commit comments