Skip to content

Commit 8fdda45

Browse files
authored
feat: Aligned diversity parameter across strategies (Pringled#8)
* Renamed parameter to diversity * Renamed parameter to diversity * Renamed parameter to diversity and inversed diversity logic
1 parent 4aca465 commit 8fdda45

File tree

9 files changed

+86
-67
lines changed

9 files changed

+86
-67
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,13 @@ from pyversity import diversify, Strategy
3535
embeddings = np.random.randn(100, 256)
3636
scores = np.random.rand(100)
3737

38-
# Diversify with with a chosen strategy (in this case MMR)
38+
# Diversify with with a chosen strategy (in this case MMR) and a diversity of 0.5 (balanced)
3939
diversified_result = diversify(
4040
embeddings=embeddings,
4141
scores=scores,
4242
k=10,
4343
strategy=Strategy.MMR,
44+
diversity=0.5
4445
)
4546
# Get the indicices of the diversified result
4647
diversified_indices = diversified_result.indices

src/pyversity/datatypes.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,13 @@ class DiversificationResult:
3030
indices: Diversified item indices.
3131
marginal_gains: Marginal gains/relevance scores for the diversified items.
3232
strategy: Diversification strategy used.
33+
diversity: Diversity parameter used in the strategy.
3334
parameters: Additional parameters used in the strategy.
3435
3536
"""
3637

3738
indices: np.ndarray
3839
marginal_gains: np.ndarray
3940
strategy: Strategy
40-
parameters: dict
41+
diversity: float
42+
parameters: dict | None = None

src/pyversity/pyversity.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ def diversify(
1111
scores: np.ndarray,
1212
k: int,
1313
strategy: Strategy = Strategy.MMR,
14+
diversity: float = 0.5,
1415
**kwargs: Any,
1516
) -> DiversificationResult:
1617
"""
@@ -21,17 +22,18 @@ def diversify(
2122
:param k: The number of items to select for the diversified result.
2223
:param strategy: The diversification strategy to apply.
2324
Supported strategies are: 'mmr' (default), 'msd', 'cover', and 'dpp'.
25+
:param diversity: Diversity parameter. Higher values prioritize diversity and lower values prioritize relevance.
2426
:param **kwargs: Additional keyword arguments passed to the specific strategy function.
2527
:return: A DiversificationResult containing the selected item indices,
2628
their marginal gains, the strategy used, and the parameters.
2729
:raises ValueError: If the provided strategy is not recognized.
2830
"""
2931
if strategy == Strategy.MMR:
30-
return mmr(embeddings, scores, k, **kwargs)
32+
return mmr(embeddings, scores, k, diversity, **kwargs)
3133
if strategy == Strategy.MSD:
32-
return msd(embeddings, scores, k, **kwargs)
34+
return msd(embeddings, scores, k, diversity, **kwargs)
3335
if strategy == Strategy.COVER:
34-
return cover(embeddings, scores, k, **kwargs)
36+
return cover(embeddings, scores, k, diversity, **kwargs)
3537
if strategy == Strategy.DPP:
36-
return dpp(embeddings, scores, k, **kwargs)
38+
return dpp(embeddings, scores, k, diversity, **kwargs)
3739
raise ValueError(f"Unknown strategy: {strategy}")

src/pyversity/strategies/cover.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def cover(
88
embeddings: np.ndarray,
99
scores: np.ndarray,
1010
k: int,
11-
theta: float = 0.5,
11+
diversity: float = 0.5,
1212
gamma: float = 0.5,
1313
metric: Metric = Metric.COSINE,
1414
normalize: bool = True,
@@ -22,8 +22,8 @@ def cover(
2222
:param embeddings: 2D array of shape (n_samples, n_features).
2323
:param scores: 1D array of relevance scores for each item.
2424
:param k: Number of items to select.
25-
:param theta: Trade-off between relevance and coverage in [0, 1].
26-
1.0 = pure relevance, 0.0 = pure coverage.
25+
:param diversity: Trade-off between relevance and coverage in [0, 1] (theta parameter).
26+
1.0 = pure relevance, 0.0 = pure coverage.
2727
:param gamma: Concavity parameter in (0, 1]; lower values emphasize diversity.
2828
:param metric: Similarity metric to use. Default is Metric.COSINE.
2929
:param normalize: Whether to normalize embeddings before computing similarity.
@@ -33,17 +33,20 @@ def cover(
3333
:raises ValueError: If gamma is not in (0, 1].
3434
"""
3535
# Validate parameters
36-
if not (0.0 <= float(theta) <= 1.0):
37-
raise ValueError("theta must be in [0, 1]")
36+
if not (0.0 <= float(diversity) <= 1.0):
37+
raise ValueError("diversity must be in [0, 1]")
3838
if not (0.0 < float(gamma) <= 1.0):
3939
raise ValueError("gamma must be in (0, 1]")
4040

4141
params = {
42-
"theta": theta,
4342
"gamma": gamma,
4443
"metric": metric,
4544
}
4645

46+
# Theta parameter for trade-off between relevance and diversity
47+
# This is 1 - diversity to align with common notation
48+
theta = 1.0 - diversity
49+
4750
# Prepare inputs
4851
feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
4952
if early_exit:
@@ -52,6 +55,7 @@ def cover(
5255
indices=np.empty(0, np.int32),
5356
marginal_gains=np.empty(0, np.float32),
5457
strategy=Strategy.COVER,
58+
diversity=diversity,
5559
parameters=params,
5660
)
5761

@@ -67,6 +71,7 @@ def cover(
6771
indices=topk,
6872
marginal_gains=gains,
6973
strategy=Strategy.COVER,
74+
diversity=diversity,
7075
parameters=params,
7176
)
7277

@@ -103,5 +108,6 @@ def cover(
103108
indices=selected_indices,
104109
marginal_gains=marginal_gains,
105110
strategy=Strategy.COVER,
111+
diversity=diversity,
106112
parameters=params,
107113
)

src/pyversity/strategies/dpp.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def dpp(
1616
embeddings: np.ndarray,
1717
scores: np.ndarray,
1818
k: int,
19-
beta: float = 1.0,
19+
diversity: float = 1.0,
2020
) -> DiversificationResult:
2121
"""
2222
Greedy determinantal point process (DPP) selection.
@@ -28,11 +28,15 @@ def dpp(
2828
:param embeddings: 2D array of shape (n_samples, n_features).
2929
:param scores: 1D array of relevance scores for each item.
3030
:param k: Number of items to select.
31-
:param beta: Controls the influence of relevance scores in the DPP kernel.
32-
Higher values increase the emphasis on relevance.
31+
:param diversity: Controls the influence of relevance scores in the DPP kernel (beta parameter).
32+
Higher values increase the emphasis on relevance.
3333
:return: A DiversificationResult containing the selected item indices,
3434
their marginal gains, the strategy used, and the parameters.
3535
"""
36+
# Beta parameter to control relevance influence in DPP kernel.
37+
# This is the inverse of diversity to align with common notation.
38+
beta = 1 - diversity
39+
3640
# Prepare inputs
3741
feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
3842
if early_exit:
@@ -41,7 +45,7 @@ def dpp(
4145
indices=np.empty(0, np.int32),
4246
marginal_gains=np.empty(0, np.float32),
4347
strategy=Strategy.DPP,
44-
parameters={"beta": beta},
48+
diversity=diversity,
4549
)
4650
# Normalize feature vectors to unit length for cosine similarity
4751
feature_matrix = normalize_rows(feature_matrix)
@@ -97,5 +101,5 @@ def dpp(
97101
indices=selected_indices[:step],
98102
marginal_gains=marginal_gains[:step],
99103
strategy=Strategy.DPP,
100-
parameters={"beta": beta},
104+
diversity=diversity,
101105
)

src/pyversity/strategies/mmr.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def mmr(
88
embeddings: np.ndarray,
99
scores: np.ndarray,
1010
k: int,
11-
lambda_param: float = 0.5,
11+
diversity: float = 0.5,
1212
metric: Metric = Metric.COSINE,
1313
normalize: bool = True,
1414
) -> DiversificationResult:
@@ -22,8 +22,8 @@ def mmr(
2222
:param embeddings: 2D array of shape (n_samples, n_features).
2323
:param scores: 1D array of relevance scores for each item.
2424
:param k: Number of items to select.
25-
:param lambda_param: Trade-off parameter in [0, 1].
26-
1.0 = pure relevance, 0.0 = pure diversity.
25+
:param diversity: Trade-off parameter in [0, 1] (lambda parameter).
26+
1.0 = pure relevance, 0.0 = pure diversity.
2727
:param metric: Similarity metric to use. Default is Metric.COSINE.
2828
:param normalize: Whether to normalize embeddings before computing similarity.
2929
:return: A DiversificationResult containing the selected item indices,
@@ -36,5 +36,5 @@ def mmr(
3636
k=k,
3737
metric=metric,
3838
normalize=normalize,
39-
lambda_param=lambda_param,
39+
diversity=diversity,
4040
)

src/pyversity/strategies/msd.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def msd(
88
embeddings: np.ndarray,
99
scores: np.ndarray,
1010
k: int,
11-
lambda_param: float = 0.5,
11+
diversity: float = 0.5,
1212
metric: Metric = Metric.COSINE,
1313
normalize: bool = True,
1414
) -> DiversificationResult:
@@ -22,9 +22,8 @@ def msd(
2222
:param embeddings: 2D array of shape (n_samples, n_features).
2323
:param scores: 1D array of relevance scores for each item.
2424
:param k: Number of items to select.
25-
:param lambda_param: Trade-off parameter in [0, 1].
25+
:param diversity: Trade-off parameter in [0, 1] (lambda parameter).
2626
1.0 = pure relevance, 0.0 = pure diversity.
27-
2827
:param metric: Similarity metric to use. Default is Metric.COSINE.
2928
:param normalize: Whether to normalize embeddings before computing similarity.
3029
:return: A DiversificationResult containing the selected item indices,
@@ -37,5 +36,5 @@ def msd(
3736
k=k,
3837
metric=metric,
3938
normalize=normalize,
40-
lambda_param=lambda_param,
39+
diversity=diversity,
4140
)

src/pyversity/strategies/utils.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def greedy_select(
1414
*,
1515
metric: Metric,
1616
normalize: bool,
17-
lambda_param: float,
17+
diversity: float = 0.5,
1818
) -> DiversificationResult:
1919
"""
2020
Greedy selection for MMR/MSD strategies.
@@ -30,22 +30,25 @@ def greedy_select(
3030
:param k: Number of items to select.
3131
:param metric: Similarity metric to use. Default is Metric.COSINE.
3232
:param normalize: Whether to normalize embeddings before computing similarity.
33-
:param lambda_param: Trade-off parameter in [0, 1].
34-
1.0 = pure relevance, 0.0 = pure diversity.
33+
:param diversity: Trade-off parameter in [0, 1].
34+
1.0 = pure diversity, 0.0 = pure relevance.
3535
:return: A DiversificationResult containing the selected item indices,
3636
their marginal gains, the strategy used, and the parameters.
37-
:raises ValueError: If lambda_param is not in [0, 1].
37+
:raises ValueError: If diversity is not in [0, 1].
3838
:raises ValueError: If input shapes are inconsistent.
3939
"""
4040
# Validate parameters
41-
if not (0.0 <= float(lambda_param) <= 1.0):
42-
raise ValueError("lambda_param must be in [0, 1]")
41+
if not (0.0 <= float(diversity) <= 1.0):
42+
raise ValueError("diversity must be in [0, 1]")
4343

4444
params = {
45-
"lambda_param": lambda_param,
4645
"metric": metric,
4746
}
4847

48+
# Lambda parameter for trade-off between relevance and diversity
49+
# This is 1 - diversity to align with common notation
50+
lambda_param = 1.0 - diversity
51+
4952
# Prepare inputs
5053
feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
5154
if early_exit:
@@ -54,6 +57,7 @@ def greedy_select(
5457
indices=np.empty(0, np.int32),
5558
marginal_gains=np.empty(0, np.float32),
5659
strategy=Strategy.MMR if strategy == "mmr" else Strategy.MSD,
60+
diversity=diversity,
5761
parameters=params,
5862
)
5963

@@ -108,5 +112,6 @@ def greedy_select(
108112
indices=selected_indices,
109113
marginal_gains=marginal_gains,
110114
strategy=Strategy.MMR if strategy == "mmr" else Strategy.MSD,
115+
diversity=diversity,
111116
parameters=params,
112117
)

0 commit comments

Comments
 (0)