Alignment-Lab-AI
diff --git a/‎src/pyversity/core.py‎
Lines changed: 4 additions & 4 deletions b/‎src/pyversity/core.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/pyversity/strategies/cover.py‎
Lines changed: 1 addition & 1 deletion b/‎src/pyversity/strategies/cover.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/pyversity/strategies/dpp.py‎
Lines changed: 1 addition & 1 deletion b/‎src/pyversity/strategies/dpp.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/pyversity/strategies/mmr.py‎
Lines changed: 3 additions & 3 deletions b/‎src/pyversity/strategies/mmr.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/pyversity/strategies/msd.py‎
Lines changed: 3 additions & 3 deletions b/‎src/pyversity/strategies/msd.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/pyversity/strategies/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎src/pyversity/strategies/utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/pyversity/utils.py‎
Lines changed: 6 additions & 6 deletions b/‎src/pyversity/utils.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎tests/__init__.py‎ b/‎tests/__init__.py‎
diff --git a/‎tests/conftest.py‎
Lines changed: 25 additions & 0 deletions b/‎tests/conftest.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎tests/test_strategies.py‎
Lines changed: 129 additions & 0 deletions b/‎tests/test_strategies.py‎
Lines changed: 129 additions & 0 deletions
@@ -26,11 +26,11 @@ def diversify(
  :raises ValueError: If the provided strategy is not recognized.
  """
  if strategy == Strategy.MMR:
- return mmr(scores, embeddings, k, **kwargs)
+ return mmr(embeddings, scores, k, **kwargs)
  if strategy == Strategy.MSD:
- return msd(scores, embeddings, k, **kwargs)
+ return msd(embeddings, scores, k, **kwargs)
  if strategy == Strategy.COVER:
- return cover(scores, embeddings, k, **kwargs)
+ return cover(embeddings, scores, k, **kwargs)
  if strategy == Strategy.DPP:
- return dpp(scores, embeddings, k, **kwargs)
+ return dpp(embeddings, scores, k, **kwargs)
  raise ValueError(f"Unknown strategy: {strategy}")
@@ -38,7 +38,7 @@ def cover(
  raise ValueError("gamma must be in (0, 1]")
 
  # Prepare inputs
- relevance_scores, feature_matrix, top_k, early_exit = prepare_inputs(scores, embeddings, k)
+ feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
  if early_exit:
  # Nothing to select: return empty arrays
  return np.empty(0, np.int32), np.empty(0, np.float32)
 
@@ -32,7 +32,7 @@ def dpp(
  :return: Tuple of selected indices and their marginal gains.
  """
  # Prepare inputs
- relevance_scores, feature_matrix, top_k, early_exit = prepare_inputs(scores, embeddings, k)
+ feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
  if early_exit:
  # Nothing to select: return empty arrays
  return np.empty(0, np.int32), np.empty(0, np.float32)
 
@@ -30,9 +30,9 @@ def mmr(
  """
  return greedy_select(
  "mmr",
- scores,
- embeddings,
- k,
+ embeddings=embeddings,
+ scores=scores,
+ k=k,
  metric=metric,
  normalize=normalize,
  lambda_param=lambda_param,
 
@@ -31,9 +31,9 @@ def msd(
  """
  return greedy_select(
  "msd",
- scores,
- embeddings,
- k,
+ embeddings=embeddings,
+ scores=scores,
+ k=k,
  metric=metric,
  normalize=normalize,
  lambda_param=lambda_param,
 
@@ -41,7 +41,7 @@ def greedy_select(
  raise ValueError("lambda_param must be in [0, 1]")
 
  # Prepare inputs
- relevance_scores, feature_matrix, top_k, early_exit = prepare_inputs(scores, embeddings, k)
+ feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
  if early_exit:
  # Nothing to select: return empty arrays
  return np.empty(0, np.int32), np.empty(0, np.float32)
 
@@ -19,30 +19,30 @@ def normalize_rows(vectors: np.ndarray) -> np.ndarray:
  return vectors / safe_norms
 
 
-def prepare_inputs(relevances: np.ndarray, embeddings: np.ndarray, k: int) -> tuple[np.ndarray, np.ndarray, int, bool]:
+def prepare_inputs(embeddings: np.ndarray, scores: np.ndarray, k: int) -> tuple[np.ndarray, np.ndarray, int, bool]:
  """
  Prepare relevance scores and embeddings.
 
- :param relevances: Array of relevance scores.
  :param embeddings: Array of shape embeddings.
+ :param scores: Array of relevance scores.
  :param k: Number of top elements to consider.
  :return: Tuple of relevances, embeddings, k_clamped, early_exit.
  :raises ValueError: If input shapes are inconsistent.
  """
- relevances = np.asarray(relevances, dtype=np.float32).reshape(-1)
+ relevance_scores = np.asarray(scores, dtype=np.float32).reshape(-1)
  embeddings = np.asarray(embeddings, dtype=np.float32, order="C")
 
  if embeddings.ndim != 2:
  raise ValueError(f"embeddings must be 2-D, got shape {embeddings.shape}")
 
  num_samples = embeddings.shape[0]
- if relevances.shape[0] != num_samples:
- raise ValueError(f"relevances length {relevances.shape[0]} != embeddings rows {num_samples}")
+ if relevance_scores.shape[0] != num_samples:
+ raise ValueError(f"relevance_scores length {relevance_scores.shape[0]} != embeddings rows {num_samples}")
 
  k_clamped = int(max(0, min(int(k), num_samples)))
  early_exit = (num_samples == 0) or (k_clamped == 0)
 
- return relevances, embeddings, k_clamped, early_exit
+ return embeddings, relevance_scores, k_clamped, early_exit
 
 
 def vector_similarity(
 
@@ -0,0 +1,25 @@
+import numpy as np
+import pytest
+
+
+@pytest.fixture
+def near_dups() -> tuple[np.ndarray, np.ndarray]:
+ """Embeddings with near-duplicates and their scores."""
+ emb = np.array(
+ [
+ [1.0, 0.0],
+ [0.999, 0.001], # ~same as 0
+ [0.0, 1.0],
+ ],
+ dtype=np.float32,
+ )
+ scores = np.array([1.0, 0.99, 0.98], dtype=np.float32)
+ return emb, scores
+
+
+@pytest.fixture
+def sim_data() -> tuple[np.ndarray, np.ndarray]:
+ """Data for similarity tests: 3 samples and a query vector."""
+ X = np.array([[1.0, 0.0], [2.0, 0.0], [1.0, 1.0]], dtype=np.float32)
+ v = np.array([1.0, 0.5], dtype=np.float32)
+ return X, v
@@ -0,0 +1,129 @@
+from typing import Any, Callable
+
+import numpy as np
+import pytest
+from pyversity import cover, diversify, dpp, mmr, msd
+from pyversity.datatypes import Metric, Strategy
+
+
+def test_mmr() -> None:
+ """Test MMR strategy with various lambda settings."""
+ # Pure relevance (lambda=1): picks top-k by scores
+ emb = np.eye(5, dtype=np.float32)
+ scores = np.array([0.1, 0.9, 0.3, 0.8, 0.2], dtype=np.float32)
+ idx, gains = mmr(emb, scores, k=3, lambda_param=1.0, metric=Metric.COSINE, normalize=True)
+ expected = np.array([1, 3, 2], dtype=np.int32)
+ assert np.array_equal(idx, expected)
+ assert np.allclose(gains, scores[expected])
+
+ # Strong diversity (lambda=0): avoid near-duplicate
+ emb = np.array([[1.0, 0.0], [0.999, 0.001], [0.0, 1.0]], dtype=np.float32)
+ scores = np.array([1.0, 0.99, 0.98], dtype=np.float32)
+ idx, _ = mmr(emb, scores, k=2, lambda_param=0.0, metric=Metric.COSINE, normalize=True)
+ assert idx[0] == 0 and idx[1] == 2
+
+ # Balanced (lambda=0.5): picks mix of relevance and diversity
+ idx, _ = mmr(emb, scores, k=2, lambda_param=0.5, metric=Metric.COSINE, normalize=True)
+ assert idx[0] == 0 and idx[1] == 2
+
+ # Bounds check
+ with pytest.raises(ValueError):
+ mmr(np.eye(2, dtype=np.float32), np.array([1.0, 0.5], dtype=np.float32), k=1, lambda_param=-0.1)
+
+
+def test_msd() -> None:
+ """Test MSD strategy with various lambda settings."""
+ # Pure relevance (lambda=1): picks top-k by scores
+ emb = np.eye(4, dtype=np.float32)
+ scores = np.array([0.5, 0.2, 0.9, 0.1], dtype=np.float32)
+ idx, _ = msd(emb, scores, k=2, lambda_param=1.0, metric=Metric.COSINE, normalize=True)
+ assert np.array_equal(idx, np.array([2, 0], dtype=np.int32))
+
+ # Strong diversity (lambda=0): picks most dissimilar
+ emb = np.array([[1.0, 0.0], [0.999, 0.001], [0.0, 1.0]], dtype=np.float32)
+ scores = np.array([1.0, 0.99, 0.98], dtype=np.float32)
+ idx, _ = msd(emb, scores, k=2, lambda_param=0.0, metric=Metric.COSINE, normalize=True)
+ assert idx[0] == 0 and idx[1] == 2
+
+ # Balanced (lambda=0.5): picks mix of relevance and diversity
+ idx, _ = msd(emb, scores, k=2, lambda_param=0.5, metric=Metric.COSINE, normalize=True)
+ assert idx[0] == 0 and idx[1] == 2
+
+ # Bounds check
+ with pytest.raises(ValueError):
+ msd(np.eye(2, dtype=np.float32), np.array([1.0, 0.5], dtype=np.float32), k=1, lambda_param=1.1)
+
+
+def test_cover() -> None:
+ """Test COVER strategy with various theta and gamma settings."""
+ emb = np.eye(3, dtype=np.float32)
+ scores = np.array([0.1, 0.8, 0.3], dtype=np.float32)
+
+ # Pure relevance (theta=1): picks top-k by scores
+ idx, gains = cover(emb, scores, k=2, theta=1.0)
+ expected = np.array([1, 2], dtype=np.int32)
+ assert np.array_equal(idx, expected)
+ assert np.allclose(gains, scores[expected])
+
+ # Balanced coverage (theta=0.5, gamma=0.5): picks diverse set
+ idx, _ = cover(emb, scores, k=2, theta=0.5, gamma=0.5)
+ assert idx[0] == 1 and idx[1] in (0, 2)
+
+ # Parameter validation
+ with pytest.raises(ValueError):
+ cover(emb, scores, k=2, theta=-0.01)
+ with pytest.raises(ValueError):
+ cover(emb, scores, k=2, theta=1.01)
+ with pytest.raises(ValueError):
+ cover(emb, scores, k=2, gamma=0.0)
+ with pytest.raises(ValueError):
+ cover(emb, scores, k=2, gamma=-0.5)
+
+
+def test_dpp() -> None:
+ """Test DPP strategy with various beta settings."""
+ emb = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=np.float32)
+ scores = np.array([0.1, 0.2, 0.3], dtype=np.float32)
+
+ # Beta=0: ignore relevance, diversity-only kernel
+ idx, gains = dpp(emb, scores, k=3, beta=0.0)
+ assert 1 <= idx.size <= 3
+ assert np.all(gains >= -1e-7)
+ assert np.all(gains[:-1] + 1e-7 >= gains[1:])
+
+ # Strong diversity (beta=1)
+ idx, gains = dpp(emb, scores, k=2, beta=1.0)
+ assert 1 <= idx.size <= 2
+ assert np.all(gains >= -1e-7)
+ assert np.all(gains[:-1] + 1e-7 >= gains[1:])
+
+ # Balanced (beta=0.5)
+ idx, gains = dpp(emb, scores, k=2, beta=0.5)
+ assert 1 <= idx.size <= 2
+ assert np.all(gains >= -1e-7)
+ assert np.all(gains[:-1] + 1e-7 >= gains[1:])
+
+ # Early exit on empty input
+ idx, gains = dpp(np.empty((0, 3), dtype=np.float32), np.array([]), k=3)
+ assert idx.size == 0 and gains.size == 0
+
+
+@pytest.mark.parametrize(
+ "strategy, fn, kwargs",
+ [
+ (Strategy.MMR, mmr, {"lambda_param": 0.5, "metric": Metric.COSINE, "normalize": True}),
+ (Strategy.MSD, msd, {"lambda_param": 0.5, "metric": Metric.COSINE, "normalize": True}),
+ (Strategy.COVER, cover, {"theta": 0.5, "gamma": 0.5}),
+ (Strategy.DPP, dpp, {"beta": 0.5}),
+ ],
+)
+def test_diversify(strategy: Strategy, fn: Callable, kwargs: Any) -> None:
+ """Test the diversify function."""
+ emb = np.eye(4, dtype=np.float32)
+ scores = np.array([0.3, 0.7, 0.1, 0.5], dtype=np.float32)
+
+ idx_direct, gains_direct = fn(emb, scores, k=2, **kwargs)
+ idx_disp, gains_disp = diversify(strategy, embeddings=emb, scores=scores, k=2, **kwargs)
+
+ assert np.array_equal(idx_direct, idx_disp)
+ assert np.allclose(gains_direct, gains_disp)