Skip to content

Commit 5046ca4

Browse files
authored
tests: Add tests (Pringled#4)
* Added tests * Added tests * Added tests * Added tests
1 parent 387ccd1 commit 5046ca4

File tree

11 files changed

+231
-19
lines changed

11 files changed

+231
-19
lines changed

src/pyversity/core.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ def diversify(
2626
:raises ValueError: If the provided strategy is not recognized.
2727
"""
2828
if strategy == Strategy.MMR:
29-
return mmr(scores, embeddings, k, **kwargs)
29+
return mmr(embeddings, scores, k, **kwargs)
3030
if strategy == Strategy.MSD:
31-
return msd(scores, embeddings, k, **kwargs)
31+
return msd(embeddings, scores, k, **kwargs)
3232
if strategy == Strategy.COVER:
33-
return cover(scores, embeddings, k, **kwargs)
33+
return cover(embeddings, scores, k, **kwargs)
3434
if strategy == Strategy.DPP:
35-
return dpp(scores, embeddings, k, **kwargs)
35+
return dpp(embeddings, scores, k, **kwargs)
3636
raise ValueError(f"Unknown strategy: {strategy}")

src/pyversity/strategies/cover.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def cover(
3838
raise ValueError("gamma must be in (0, 1]")
3939

4040
# Prepare inputs
41-
relevance_scores, feature_matrix, top_k, early_exit = prepare_inputs(scores, embeddings, k)
41+
feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
4242
if early_exit:
4343
# Nothing to select: return empty arrays
4444
return np.empty(0, np.int32), np.empty(0, np.float32)

src/pyversity/strategies/dpp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def dpp(
3232
:return: Tuple of selected indices and their marginal gains.
3333
"""
3434
# Prepare inputs
35-
relevance_scores, feature_matrix, top_k, early_exit = prepare_inputs(scores, embeddings, k)
35+
feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
3636
if early_exit:
3737
# Nothing to select: return empty arrays
3838
return np.empty(0, np.int32), np.empty(0, np.float32)

src/pyversity/strategies/mmr.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ def mmr(
3030
"""
3131
return greedy_select(
3232
"mmr",
33-
scores,
34-
embeddings,
35-
k,
33+
embeddings=embeddings,
34+
scores=scores,
35+
k=k,
3636
metric=metric,
3737
normalize=normalize,
3838
lambda_param=lambda_param,

src/pyversity/strategies/msd.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ def msd(
3131
"""
3232
return greedy_select(
3333
"msd",
34-
scores,
35-
embeddings,
36-
k,
34+
embeddings=embeddings,
35+
scores=scores,
36+
k=k,
3737
metric=metric,
3838
normalize=normalize,
3939
lambda_param=lambda_param,

src/pyversity/strategies/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def greedy_select(
4141
raise ValueError("lambda_param must be in [0, 1]")
4242

4343
# Prepare inputs
44-
relevance_scores, feature_matrix, top_k, early_exit = prepare_inputs(scores, embeddings, k)
44+
feature_matrix, relevance_scores, top_k, early_exit = prepare_inputs(embeddings, scores, k)
4545
if early_exit:
4646
# Nothing to select: return empty arrays
4747
return np.empty(0, np.int32), np.empty(0, np.float32)

src/pyversity/utils.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,30 +19,30 @@ def normalize_rows(vectors: np.ndarray) -> np.ndarray:
1919
return vectors / safe_norms
2020

2121

22-
def prepare_inputs(relevances: np.ndarray, embeddings: np.ndarray, k: int) -> tuple[np.ndarray, np.ndarray, int, bool]:
22+
def prepare_inputs(embeddings: np.ndarray, scores: np.ndarray, k: int) -> tuple[np.ndarray, np.ndarray, int, bool]:
2323
"""
2424
Prepare relevance scores and embeddings.
2525
26-
:param relevances: Array of relevance scores.
2726
:param embeddings: Array of shape embeddings.
27+
:param scores: Array of relevance scores.
2828
:param k: Number of top elements to consider.
2929
:return: Tuple of relevances, embeddings, k_clamped, early_exit.
3030
:raises ValueError: If input shapes are inconsistent.
3131
"""
32-
relevances = np.asarray(relevances, dtype=np.float32).reshape(-1)
32+
relevance_scores = np.asarray(scores, dtype=np.float32).reshape(-1)
3333
embeddings = np.asarray(embeddings, dtype=np.float32, order="C")
3434

3535
if embeddings.ndim != 2:
3636
raise ValueError(f"embeddings must be 2-D, got shape {embeddings.shape}")
3737

3838
num_samples = embeddings.shape[0]
39-
if relevances.shape[0] != num_samples:
40-
raise ValueError(f"relevances length {relevances.shape[0]} != embeddings rows {num_samples}")
39+
if relevance_scores.shape[0] != num_samples:
40+
raise ValueError(f"relevance_scores length {relevance_scores.shape[0]} != embeddings rows {num_samples}")
4141

4242
k_clamped = int(max(0, min(int(k), num_samples)))
4343
early_exit = (num_samples == 0) or (k_clamped == 0)
4444

45-
return relevances, embeddings, k_clamped, early_exit
45+
return embeddings, relevance_scores, k_clamped, early_exit
4646

4747

4848
def vector_similarity(

tests/__init__.py

Whitespace-only changes.

tests/conftest.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import numpy as np
2+
import pytest
3+
4+
5+
@pytest.fixture
6+
def near_dups() -> tuple[np.ndarray, np.ndarray]:
7+
"""Embeddings with near-duplicates and their scores."""
8+
emb = np.array(
9+
[
10+
[1.0, 0.0],
11+
[0.999, 0.001], # ~same as 0
12+
[0.0, 1.0],
13+
],
14+
dtype=np.float32,
15+
)
16+
scores = np.array([1.0, 0.99, 0.98], dtype=np.float32)
17+
return emb, scores
18+
19+
20+
@pytest.fixture
21+
def sim_data() -> tuple[np.ndarray, np.ndarray]:
22+
"""Data for similarity tests: 3 samples and a query vector."""
23+
X = np.array([[1.0, 0.0], [2.0, 0.0], [1.0, 1.0]], dtype=np.float32)
24+
v = np.array([1.0, 0.5], dtype=np.float32)
25+
return X, v

tests/test_strategies.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
from typing import Any, Callable
2+
3+
import numpy as np
4+
import pytest
5+
from pyversity import cover, diversify, dpp, mmr, msd
6+
from pyversity.datatypes import Metric, Strategy
7+
8+
9+
def test_mmr() -> None:
10+
"""Test MMR strategy with various lambda settings."""
11+
# Pure relevance (lambda=1): picks top-k by scores
12+
emb = np.eye(5, dtype=np.float32)
13+
scores = np.array([0.1, 0.9, 0.3, 0.8, 0.2], dtype=np.float32)
14+
idx, gains = mmr(emb, scores, k=3, lambda_param=1.0, metric=Metric.COSINE, normalize=True)
15+
expected = np.array([1, 3, 2], dtype=np.int32)
16+
assert np.array_equal(idx, expected)
17+
assert np.allclose(gains, scores[expected])
18+
19+
# Strong diversity (lambda=0): avoid near-duplicate
20+
emb = np.array([[1.0, 0.0], [0.999, 0.001], [0.0, 1.0]], dtype=np.float32)
21+
scores = np.array([1.0, 0.99, 0.98], dtype=np.float32)
22+
idx, _ = mmr(emb, scores, k=2, lambda_param=0.0, metric=Metric.COSINE, normalize=True)
23+
assert idx[0] == 0 and idx[1] == 2
24+
25+
# Balanced (lambda=0.5): picks mix of relevance and diversity
26+
idx, _ = mmr(emb, scores, k=2, lambda_param=0.5, metric=Metric.COSINE, normalize=True)
27+
assert idx[0] == 0 and idx[1] == 2
28+
29+
# Bounds check
30+
with pytest.raises(ValueError):
31+
mmr(np.eye(2, dtype=np.float32), np.array([1.0, 0.5], dtype=np.float32), k=1, lambda_param=-0.1)
32+
33+
34+
def test_msd() -> None:
35+
"""Test MSD strategy with various lambda settings."""
36+
# Pure relevance (lambda=1): picks top-k by scores
37+
emb = np.eye(4, dtype=np.float32)
38+
scores = np.array([0.5, 0.2, 0.9, 0.1], dtype=np.float32)
39+
idx, _ = msd(emb, scores, k=2, lambda_param=1.0, metric=Metric.COSINE, normalize=True)
40+
assert np.array_equal(idx, np.array([2, 0], dtype=np.int32))
41+
42+
# Strong diversity (lambda=0): picks most dissimilar
43+
emb = np.array([[1.0, 0.0], [0.999, 0.001], [0.0, 1.0]], dtype=np.float32)
44+
scores = np.array([1.0, 0.99, 0.98], dtype=np.float32)
45+
idx, _ = msd(emb, scores, k=2, lambda_param=0.0, metric=Metric.COSINE, normalize=True)
46+
assert idx[0] == 0 and idx[1] == 2
47+
48+
# Balanced (lambda=0.5): picks mix of relevance and diversity
49+
idx, _ = msd(emb, scores, k=2, lambda_param=0.5, metric=Metric.COSINE, normalize=True)
50+
assert idx[0] == 0 and idx[1] == 2
51+
52+
# Bounds check
53+
with pytest.raises(ValueError):
54+
msd(np.eye(2, dtype=np.float32), np.array([1.0, 0.5], dtype=np.float32), k=1, lambda_param=1.1)
55+
56+
57+
def test_cover() -> None:
58+
"""Test COVER strategy with various theta and gamma settings."""
59+
emb = np.eye(3, dtype=np.float32)
60+
scores = np.array([0.1, 0.8, 0.3], dtype=np.float32)
61+
62+
# Pure relevance (theta=1): picks top-k by scores
63+
idx, gains = cover(emb, scores, k=2, theta=1.0)
64+
expected = np.array([1, 2], dtype=np.int32)
65+
assert np.array_equal(idx, expected)
66+
assert np.allclose(gains, scores[expected])
67+
68+
# Balanced coverage (theta=0.5, gamma=0.5): picks diverse set
69+
idx, _ = cover(emb, scores, k=2, theta=0.5, gamma=0.5)
70+
assert idx[0] == 1 and idx[1] in (0, 2)
71+
72+
# Parameter validation
73+
with pytest.raises(ValueError):
74+
cover(emb, scores, k=2, theta=-0.01)
75+
with pytest.raises(ValueError):
76+
cover(emb, scores, k=2, theta=1.01)
77+
with pytest.raises(ValueError):
78+
cover(emb, scores, k=2, gamma=0.0)
79+
with pytest.raises(ValueError):
80+
cover(emb, scores, k=2, gamma=-0.5)
81+
82+
83+
def test_dpp() -> None:
84+
"""Test DPP strategy with various beta settings."""
85+
emb = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=np.float32)
86+
scores = np.array([0.1, 0.2, 0.3], dtype=np.float32)
87+
88+
# Beta=0: ignore relevance, diversity-only kernel
89+
idx, gains = dpp(emb, scores, k=3, beta=0.0)
90+
assert 1 <= idx.size <= 3
91+
assert np.all(gains >= -1e-7)
92+
assert np.all(gains[:-1] + 1e-7 >= gains[1:])
93+
94+
# Strong diversity (beta=1)
95+
idx, gains = dpp(emb, scores, k=2, beta=1.0)
96+
assert 1 <= idx.size <= 2
97+
assert np.all(gains >= -1e-7)
98+
assert np.all(gains[:-1] + 1e-7 >= gains[1:])
99+
100+
# Balanced (beta=0.5)
101+
idx, gains = dpp(emb, scores, k=2, beta=0.5)
102+
assert 1 <= idx.size <= 2
103+
assert np.all(gains >= -1e-7)
104+
assert np.all(gains[:-1] + 1e-7 >= gains[1:])
105+
106+
# Early exit on empty input
107+
idx, gains = dpp(np.empty((0, 3), dtype=np.float32), np.array([]), k=3)
108+
assert idx.size == 0 and gains.size == 0
109+
110+
111+
@pytest.mark.parametrize(
112+
"strategy, fn, kwargs",
113+
[
114+
(Strategy.MMR, mmr, {"lambda_param": 0.5, "metric": Metric.COSINE, "normalize": True}),
115+
(Strategy.MSD, msd, {"lambda_param": 0.5, "metric": Metric.COSINE, "normalize": True}),
116+
(Strategy.COVER, cover, {"theta": 0.5, "gamma": 0.5}),
117+
(Strategy.DPP, dpp, {"beta": 0.5}),
118+
],
119+
)
120+
def test_diversify(strategy: Strategy, fn: Callable, kwargs: Any) -> None:
121+
"""Test the diversify function."""
122+
emb = np.eye(4, dtype=np.float32)
123+
scores = np.array([0.3, 0.7, 0.1, 0.5], dtype=np.float32)
124+
125+
idx_direct, gains_direct = fn(emb, scores, k=2, **kwargs)
126+
idx_disp, gains_disp = diversify(strategy, embeddings=emb, scores=scores, k=2, **kwargs)
127+
128+
assert np.array_equal(idx_direct, idx_disp)
129+
assert np.allclose(gains_direct, gains_disp)

0 commit comments

Comments
 (0)