Alignment-Lab-AI
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/pyversity/datatypes.py‎
Lines changed: 3 additions & 1 deletion b/‎src/pyversity/datatypes.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/pyversity/pyversity.py‎
Lines changed: 6 additions & 4 deletions b/‎src/pyversity/pyversity.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎src/pyversity/strategies/cover.py‎
Lines changed: 12 additions & 6 deletions b/‎src/pyversity/strategies/cover.py‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎src/pyversity/strategies/dpp.py‎
Lines changed: 9 additions & 5 deletions b/‎src/pyversity/strategies/dpp.py‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎src/pyversity/strategies/mmr.py‎
Lines changed: 4 additions & 4 deletions b/‎src/pyversity/strategies/mmr.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/pyversity/strategies/msd.py‎
Lines changed: 3 additions & 4 deletions b/‎src/pyversity/strategies/msd.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎src/pyversity/strategies/utils.py‎
Lines changed: 12 additions & 7 deletions b/‎src/pyversity/strategies/utils.py‎
Lines changed: 12 additions & 7 deletions
@@ -35,12 +35,13 @@ from pyversity import diversify, Strategy
 embeddings = np.random.randn(100, 256)
 scores = np.random.rand(100)
 
-# Diversify with with a chosen strategy (in this case MMR)
+# Diversify with with a chosen strategy (in this case MMR) and a diversity of 0.5 (balanced)
 diversified_result = diversify(
  embeddings=embeddings,
  scores=scores,
  k=10,
  strategy=Strategy.MMR,
+ diversity=0.5
 )
 # Get the indicices of the diversified result
 diversified_indices = diversified_result.indices
 
@@ -30,11 +30,13 @@ class DiversificationResult:
  indices: Diversified item indices.
  marginal_gains: Marginal gains/relevance scores for the diversified items.
  strategy: Diversification strategy used.
+ diversity: Diversity parameter used in the strategy.
  parameters: Additional parameters used in the strategy.
 
  """
 
  indices: np.ndarray
  marginal_gains: np.ndarray
  strategy: Strategy
- parameters: dict
+ diversity: float
+ parameters: dict | None = None
@@ -11,6 +11,7 @@ def diversify(
  scores: np.ndarray,
  k: int,
  strategy: Strategy = Strategy.MMR,
+ diversity: float = 0.5,
  **kwargs: Any,
 ) -> DiversificationResult:
  """
@@ -21,17 +22,18 @@ def diversify(
  :param k: The number of items to select for the diversified result.
  :param strategy: The diversification strategy to apply.
  Supported strategies are: 'mmr' (default), 'msd', 'cover', and 'dpp'.
+ :param diversity: Diversity parameter. Higher values prioritize diversity and lower values prioritize relevance.
  :param **kwargs: Additional keyword arguments passed to the specific strategy function.
  :return: A DiversificationResult containing the selected item indices,
  their marginal gains, the strategy used, and the parameters.
  :raises ValueError: If the provided strategy is not recognized.
  """
  if strategy == Strategy.MMR:
- return mmr(embeddings, scores, k, **kwargs)
+ return mmr(embeddings, scores, k, diversity, **kwargs)
  if strategy == Strategy.MSD:
- return msd(embeddings, scores, k, **kwargs)
+ return msd(embeddings, scores, k, diversity, **kwargs)
  if strategy == Strategy.COVER:
- return cover(embeddings, scores, k, **kwargs)
+ return cover(embeddings, scores, k, diversity, **kwargs)
  if strategy == Strategy.DPP:
- return dpp(embeddings, scores, k, **kwargs)
+ return dpp(embeddings, scores, k, diversity, **kwargs)
  raise ValueError(f"Unknown strategy: {strategy}")
@@ -8,7 +8,7 @@ def cover(
  embeddings: np.ndarray,
  scores: np.ndarray,
  k: int,
- theta: float = 0.5,
+ diversity: float = 0.5,
  gamma: float = 0.5,
  metric: Metric = Metric.COSINE,
  normalize: bool = True,
@@ -22,8 +22,8 @@ def cover(
  :param embeddings: 2D array of shape (n_samples, n_features).
  :param scores: 1D array of relevance scores for each item.
  :param k: Number of items to select.
- :param theta: Trade-off between relevance and coverage in [0, 1].
- 1.0 = pure relevance, 0.0 = pure coverage.
+ :param diversity: Trade-off between relevance and coverage in [0, 1] (theta parameter).
+  1.0 = pure relevance, 0.0 = pure coverage.
  :param gamma: Concavity parameter in (0, 1]; lower values emphasize diversity.
  :param metric: Similarity metric to use. Default is Metric.COSINE.
  :param normalize: Whether to normalize embeddings before computing similarity.
@@ -33,17 +33,20 @@ def cover(
  :raises ValueError: If gamma is not in (0, 1].
  """
  # Validate parameters
- if not (0.0 <= float(theta) <= 1.0):
- raise ValueError("theta must be in [0, 1]")
+ if not (0.0 <= float(diversity) <= 1.0):
+ raise ValueError("diversity must be in [0, 1]")
  if not (0.0 < float(gamma) <= 1.0):
  raise ValueError("gamma must be in (0, 1]")
 
  params = {
- "theta": theta,
  "gamma": gamma,
  "metric": metric,
  }
 
+ # Theta parameter for trade-off between relevance and diversity
+ # This is 1 - diversity to align with common notation
+ theta = 1.0 - diversity
+
  # Prepare inputs
  feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
  if early_exit:
@@ -52,6 +55,7 @@ def cover(
  indices=np.empty(0, np.int32),
  marginal_gains=np.empty(0, np.float32),
  strategy=Strategy.COVER,
+ diversity=diversity,
  parameters=params,
  )
 
@@ -67,6 +71,7 @@ def cover(
  indices=topk,
  marginal_gains=gains,
  strategy=Strategy.COVER,
+ diversity=diversity,
  parameters=params,
  )
 
@@ -103,5 +108,6 @@ def cover(
  indices=selected_indices,
  marginal_gains=marginal_gains,
  strategy=Strategy.COVER,
+ diversity=diversity,
  parameters=params,
  )
@@ -16,7 +16,7 @@ def dpp(
  embeddings: np.ndarray,
  scores: np.ndarray,
  k: int,
- beta: float = 1.0,
+ diversity: float = 1.0,
 ) -> DiversificationResult:
  """
  Greedy determinantal point process (DPP) selection.
@@ -28,11 +28,15 @@ def dpp(
  :param embeddings: 2D array of shape (n_samples, n_features).
  :param scores: 1D array of relevance scores for each item.
  :param k: Number of items to select.
- :param beta: Controls the influence of relevance scores in the DPP kernel.
- Higher values increase the emphasis on relevance.
+ :param diversity: Controls the influence of relevance scores in the DPP kernel (beta parameter).
+  Higher values increase the emphasis on relevance.
  :return: A DiversificationResult containing the selected item indices,
  their marginal gains, the strategy used, and the parameters.
  """
+ # Beta parameter to control relevance influence in DPP kernel.
+ # This is the inverse of diversity to align with common notation.
+ beta = 1 - diversity
+
  # Prepare inputs
  feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
  if early_exit:
@@ -41,7 +45,7 @@ def dpp(
  indices=np.empty(0, np.int32),
  marginal_gains=np.empty(0, np.float32),
  strategy=Strategy.DPP,
- parameters={"beta": beta},
+ diversity=diversity,
  )
  # Normalize feature vectors to unit length for cosine similarity
  feature_matrix = normalize_rows(feature_matrix)
@@ -97,5 +101,5 @@ def dpp(
  indices=selected_indices[:step],
  marginal_gains=marginal_gains[:step],
  strategy=Strategy.DPP,
- parameters={"beta": beta},
+ diversity=diversity,
  )
@@ -8,7 +8,7 @@ def mmr(
  embeddings: np.ndarray,
  scores: np.ndarray,
  k: int,
- lambda_param: float = 0.5,
+ diversity: float = 0.5,
  metric: Metric = Metric.COSINE,
  normalize: bool = True,
 ) -> DiversificationResult:
@@ -22,8 +22,8 @@ def mmr(
  :param embeddings: 2D array of shape (n_samples, n_features).
  :param scores: 1D array of relevance scores for each item.
  :param k: Number of items to select.
- :param lambda_param: Trade-off parameter in [0, 1].
- 1.0 = pure relevance, 0.0 = pure diversity.
+ :param diversity: Trade-off parameter in [0, 1] (lambda parameter).
+  1.0 = pure relevance, 0.0 = pure diversity.
  :param metric: Similarity metric to use. Default is Metric.COSINE.
  :param normalize: Whether to normalize embeddings before computing similarity.
  :return: A DiversificationResult containing the selected item indices,
@@ -36,5 +36,5 @@ def mmr(
  k=k,
  metric=metric,
  normalize=normalize,
- lambda_param=lambda_param,
+ diversity=diversity,
  )
@@ -8,7 +8,7 @@ def msd(
  embeddings: np.ndarray,
  scores: np.ndarray,
  k: int,
- lambda_param: float = 0.5,
+ diversity: float = 0.5,
  metric: Metric = Metric.COSINE,
  normalize: bool = True,
 ) -> DiversificationResult:
@@ -22,9 +22,8 @@ def msd(
  :param embeddings: 2D array of shape (n_samples, n_features).
  :param scores: 1D array of relevance scores for each item.
  :param k: Number of items to select.
- :param lambda_param: Trade-off parameter in [0, 1].
+ :param diversity: Trade-off parameter in [0, 1] (lambda parameter).
  1.0 = pure relevance, 0.0 = pure diversity.
-
  :param metric: Similarity metric to use. Default is Metric.COSINE.
  :param normalize: Whether to normalize embeddings before computing similarity.
  :return: A DiversificationResult containing the selected item indices,
@@ -37,5 +36,5 @@ def msd(
  k=k,
  metric=metric,
  normalize=normalize,
- lambda_param=lambda_param,
+ diversity=diversity,
  )
@@ -14,7 +14,7 @@ def greedy_select(
  *,
  metric: Metric,
  normalize: bool,
- lambda_param: float,
+ diversity: float = 0.5,
 ) -> DiversificationResult:
  """
  Greedy selection for MMR/MSD strategies.
@@ -30,22 +30,25 @@ def greedy_select(
  :param k: Number of items to select.
  :param metric: Similarity metric to use. Default is Metric.COSINE.
  :param normalize: Whether to normalize embeddings before computing similarity.
- :param lambda_param: Trade-off parameter in [0, 1].
- 1.0 = pure relevance, 0.0 = pure diversity.
+ :param diversity: Trade-off parameter in [0, 1].
+ 1.0 = pure diversity, 0.0 = pure relevance.
  :return: A DiversificationResult containing the selected item indices,
  their marginal gains, the strategy used, and the parameters.
- :raises ValueError: If lambda_param is not in [0, 1].
+ :raises ValueError: If diversity is not in [0, 1].
  :raises ValueError: If input shapes are inconsistent.
  """
  # Validate parameters
- if not (0.0 <= float(lambda_param) <= 1.0):
- raise ValueError("lambda_param must be in [0, 1]")
+ if not (0.0 <= float(diversity) <= 1.0):
+ raise ValueError("diversity must be in [0, 1]")
 
  params = {
- "lambda_param": lambda_param,
  "metric": metric,
  }
 
+ # Lambda parameter for trade-off between relevance and diversity
+ # This is 1 - diversity to align with common notation
+ lambda_param = 1.0 - diversity
+
  # Prepare inputs
  feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
  if early_exit:
@@ -54,6 +57,7 @@ def greedy_select(
  indices=np.empty(0, np.int32),
  marginal_gains=np.empty(0, np.float32),
  strategy=Strategy.MMR if strategy == "mmr" else Strategy.MSD,
+ diversity=diversity,
  parameters=params,
  )
 
@@ -108,5 +112,6 @@ def greedy_select(
  indices=selected_indices,
  marginal_gains=marginal_gains,
  strategy=Strategy.MMR if strategy == "mmr" else Strategy.MSD,
+ diversity=diversity,
  parameters=params,
  )