From 05237f227061c332c6949573bd3fee30bf128b54 Mon Sep 17 00:00:00 2001 From: Artur Barseghyan Date: Mon, 2 Mar 2026 23:27:32 +0100 Subject: [PATCH 1/5] Fixes in docs. Adding pytest-codeblock for testing documentation examples. Adding a new conftest.py for testing documentation examples. Replace irrelevant reference/dependency to pytest-coverage with a valid pytest-cov. Add pytest-cov configuration. Update makefile test command accordingly. --- Makefile | 2 +- README.md | 12 ++++ benchmarks/README.md | 4 +- conftest.py | 148 +++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 33 +++++++++- uv.lock | 41 +++++------- 6 files changed, 212 insertions(+), 28 deletions(-) create mode 100644 conftest.py diff --git a/Makefile b/Makefile index dd19c34..fd353d0 100644 --- a/Makefile +++ b/Makefile @@ -9,4 +9,4 @@ fix: uv run pre-commit run --all-files test: - uv run pytest --cov=PACKAGE --cov-report=term-missing + uv run pytest diff --git a/README.md b/README.md index 6325782..aadbcff 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,8 @@ This improves exploration, user satisfaction, and coverage across many domains: MMR and DPP work well as general-purpose diversifiers. In product search, use them to avoid showing near-duplicate results: + + ```python from pyversity import diversify, Strategy @@ -128,6 +130,8 @@ result = diversify( COVER ensures the selected items collectively represent the full topic space. For academic papers, this means covering different subfields and methodologies: + + ```python from pyversity import diversify, Strategy @@ -151,6 +155,9 @@ result = diversify( In conversational RAG, you want to avoid feeding the model redundant chunks. SSD diversifies relative to recent context, making it a natural fit: + + + ```python import numpy as np from pyversity import diversify, Strategy @@ -179,6 +186,9 @@ recent_chunk_embeddings = np.vstack([recent_chunk_embeddings, chunk_embeddings[r In content feeds, users consume items sequentially. SSD introduces novelty relative to recently shown items, keeping the experience fresh: + + + ```python import numpy as np from pyversity import diversify, Strategy @@ -207,6 +217,8 @@ recent_feed_embeddings = np.vstack([recent_feed_embeddings, feed_embeddings[resu When extracting from a long document, you want sections that cover different parts. MSD prefers items that are far apart from each other: + + ```python from pyversity import diversify, Strategy diff --git a/benchmarks/README.md b/benchmarks/README.md index d5ad7df..f91f0e6 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -284,15 +284,17 @@ python -m benchmarks report
Programmatic API + ```python from benchmarks import BenchmarkConfig, run_benchmark from pyversity import Strategy config = BenchmarkConfig( - dataset_path="benchmarks/data/ml-32m", + dataset="ml-32m", sample_users=2000, strategies=[Strategy.MMR, Strategy.DPP, Strategy.MSD, Strategy.SSD], diversity_values=[0.0, 0.3, 0.5, 0.7, 1.0], + output_dir=benchmark_data, ) results = run_benchmark(config) ``` diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..502bbbb --- /dev/null +++ b/conftest.py @@ -0,0 +1,148 @@ +""" +Fixtures for testing the README examples. + +DO NOT USE ANY OF THESE IN OTHER TESTS! +""" + +from pathlib import Path +from typing import Generator + +import numpy as np +import pandas as pd +import pytest + +_rng = np.random.default_rng(0) + +_N = 20 # number of candidate items +_D = 16 # embedding dimension +_R = 5 # number of recent items + + +def _emb(n: int = _N) -> np.ndarray: + return _rng.standard_normal((n, _D)).astype(np.float64) + + +def _scores(n: int = _N) -> np.ndarray: + raw = _rng.random(n).astype(np.float64) + return raw / raw.sum() + + +# --- benchmarks/README.md: Programmatic API --- + + +@pytest.fixture(scope="session") +def benchmark_data(tmp_path_factory: pytest.TempPathFactory) -> Generator[Path, None, None]: + """ + Tiny synthetic MovieLens-format dataset for testing the benchmark API. + + Patches DATASET_REGISTRY["ml-32m"] to point at a temp directory containing + a minimal ratings.csv, and returns a temp output directory to use as + BenchmarkConfig.output_dir so the test never writes to the real results tree. + """ + from benchmarks.core.data import DATASET_REGISTRY + + # Create fake ratings.csv: 100 users × 20 items each, rating=4.5 (above 4.0 threshold) + rng = np.random.default_rng(0) + n_users, n_items, per_user = 100, 50, 20 + user_ids = np.repeat(np.arange(1, n_users + 1), per_user) + item_ids = np.concatenate([rng.choice(n_items, size=per_user, replace=False) + 1 for _ in range(n_users)]) + df = pd.DataFrame({"userId": user_ids, "movieId": item_ids, "rating": 4.5}) + + data_dir: Path = tmp_path_factory.mktemp("ml-32m") + df.to_csv(data_dir / "ratings.csv", index=False) + + out_dir: Path = tmp_path_factory.mktemp("benchmark_results") + + original_path = DATASET_REGISTRY["ml-32m"].path + DATASET_REGISTRY["ml-32m"].path = str(data_dir) + + yield out_dir + + DATASET_REGISTRY["ml-32m"].path = original_path + + +# --- Product / Web Search (test_README_2) --- + + +@pytest.fixture +def item_embeddings() -> np.ndarray: + """Item embeddings for testing the product/web search example.""" + return _emb() + + +@pytest.fixture +def item_scores() -> np.ndarray: + """Item scores for testing the product/web search example.""" + return _scores() + + +# --- Literature Search (test_README_3) --- + + +@pytest.fixture +def paper_embeddings() -> np.ndarray: + """Paper embeddings for testing the literature search example.""" + return _emb() + + +@pytest.fixture +def paper_scores() -> np.ndarray: + """Paper scores for testing the literature search example.""" + return _scores() + + +# --- Conversational RAG (test_README_4) --- + + +@pytest.fixture +def chunk_embeddings() -> np.ndarray: + """Chunk embeddings for testing the conversational RAG example.""" + return _emb() + + +@pytest.fixture +def chunk_scores() -> np.ndarray: + """Chunk scores for testing the conversational RAG example.""" + return _scores() + + +@pytest.fixture +def recent_chunk_embeddings() -> np.ndarray: + """Recent chunk embeddings for testing the conversational RAG example.""" + return _emb(_R) + + +# --- Infinite Scroll / Recommendation Feed (test_README_5) --- + + +@pytest.fixture +def feed_embeddings() -> np.ndarray: + """Feed item embeddings for testing the infinite scroll / recommendation feed example.""" + return _emb() + + +@pytest.fixture +def feed_scores() -> np.ndarray: + """Feed item scores for testing the infinite scroll / recommendation feed example.""" + return _scores() + + +@pytest.fixture +def recent_feed_embeddings() -> np.ndarray: + """Recent feed item embeddings for testing the infinite scroll / recommendation feed example.""" + return _emb(_R) + + +# --- Single Long Document (test_README_6) --- + + +@pytest.fixture +def doc_chunk_embeddings() -> np.ndarray: + """Document chunk embeddings for testing the single long document example.""" + return _emb() + + +@pytest.fixture +def doc_chunk_scores() -> np.ndarray: + """Document chunk scores for testing the single long document example.""" + return _scores() diff --git a/pyproject.toml b/pyproject.toml index 9097017..93eb2a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,8 @@ dev = [ "mypy", "pre-commit", "pytest", - "pytest-coverage", + "pytest-codeblock", + "pytest-cov", "ruff", "types-requests", ] @@ -114,3 +115,33 @@ pyversity = ["py.typed"] [tool.setuptools.dynamic] version = {attr = "pyversity.version.__version__"} + +[tool.pytest.ini_options] +addopts = [ + "-ra", + "-vvv", + "-q", + "--cov=pyversity", + "--ignore=benchmarks", + "--cov-report=html", + "--cov-report=term-missing", + "--cov-append", + "--capture=no", +] +pythonpath = ["src"] +norecursedirs = [".git", "benchmarks"] + +[tool.coverage.run] +relative_files = true +omit = [] +source = ["pyversity"] + +[tool.coverage.report] +show_missing = true +exclude_lines = [ + "pragma: no cover", + "@overload", +] + +[tool.pytest-codeblock] +test_nameless_codeblocks = true diff --git a/uv.lock b/uv.lock index ea49b63..847999b 100644 --- a/uv.lock +++ b/uv.lock @@ -1999,41 +1999,30 @@ wheels = [ ] [[package]] -name = "pytest-cov" -version = "7.0.0" +name = "pytest-codeblock" +version = "0.5.6" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "coverage", extra = ["toml"] }, - { name = "pluggy" }, { name = "pytest" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, -] - -[[package]] -name = "pytest-cover" -version = "3.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pytest-cov" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/30/27/20964101a7cdb260f8d6c4e854659026968321d10c90552b1fe7f6c5f913/pytest-cover-3.0.0.tar.gz", hash = "sha256:5bdb6c1cc3dd75583bb7bc2c57f5e1034a1bfcb79d27c71aceb0b16af981dbf4", size = 3211, upload-time = "2015-08-01T19:20:22.562Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a8/75/fa7f30676fcbe87d8b26af75272ad0030aa9a92e04685c2bed9f34a76f64/pytest_codeblock-0.5.6.tar.gz", hash = "sha256:6361dbc438da2737d04a2ce61f5ca0a0534954313bcfde2c93b98d005396bf1b", size = 31730, upload-time = "2026-02-24T00:14:57.152Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/71/9b/7b4700c462628e169bd859c6368d596a6aedc87936bde733bead9f875fce/pytest_cover-3.0.0-py2.py3-none-any.whl", hash = "sha256:578249955eb3b5f3991209df6e532bb770b647743b7392d3d97698dc02f39ebb", size = 3769, upload-time = "2015-08-01T19:20:18.534Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ce/f4860f167d25df8c693e72fa27c91310dd0767150cb4b6e8e98350471803/pytest_codeblock-0.5.6-py3-none-any.whl", hash = "sha256:18e3763f8f0aa24f764c7990f20a399bd035537b0ac8826bbfd9c459313f8556", size = 33856, upload-time = "2026-02-24T00:14:55.71Z" }, ] [[package]] -name = "pytest-coverage" -version = "0.0" +name = "pytest-cov" +version = "7.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pytest-cover" }, + { name = "coverage", extra = ["toml"] }, + { name = "pluggy" }, + { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/01/81/1d954849aed17b254d1c397eb4447a05eedce612a56b627c071df2ce00c1/pytest-coverage-0.0.tar.gz", hash = "sha256:db6af2cbd7e458c7c9fd2b4207cee75258243c8a81cad31a7ee8cfad5be93c05", size = 873, upload-time = "2015-06-17T21:50:38.956Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5b/4b/d95b052f87db89a2383233c0754c45f6d3b427b7a4bcb771ac9316a6fae1/pytest_coverage-0.0-py2.py3-none-any.whl", hash = "sha256:dedd084c5e74d8e669355325916dc011539b190355021b037242514dee546368", size = 2013, upload-time = "2015-06-17T22:08:36.771Z" }, + { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, ] [[package]] @@ -2084,7 +2073,8 @@ dev = [ { name = "mypy" }, { name = "pre-commit" }, { name = "pytest" }, - { name = "pytest-coverage" }, + { name = "pytest-codeblock" }, + { name = "pytest-cov" }, { name = "ruff" }, { name = "types-requests" }, ] @@ -2100,7 +2090,8 @@ requires-dist = [ { name = "pandas", marker = "extra == 'benchmarks'", specifier = "~=2.3.0" }, { name = "pre-commit", marker = "extra == 'dev'" }, { name = "pytest", marker = "extra == 'dev'" }, - { name = "pytest-coverage", marker = "extra == 'dev'" }, + { name = "pytest-codeblock", marker = "extra == 'dev'" }, + { name = "pytest-cov", marker = "extra == 'dev'" }, { name = "requests", marker = "extra == 'benchmarks'", specifier = "~=2.32.0" }, { name = "ruff", marker = "extra == 'dev'" }, { name = "scikit-learn", marker = "extra == 'benchmarks'", specifier = ">=1.6.0" }, From ba511cc273548ca9374973fa5c3fc456abeafca9 Mon Sep 17 00:00:00 2001 From: Artur Barseghyan Date: Mon, 2 Mar 2026 23:39:53 +0100 Subject: [PATCH 2/5] Make sure tests in bencharks are executed, but coverage ignored --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 93eb2a7..1224e2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,18 +122,18 @@ addopts = [ "-vvv", "-q", "--cov=pyversity", - "--ignore=benchmarks", + "--cov=benchmarks", "--cov-report=html", "--cov-report=term-missing", "--cov-append", "--capture=no", ] pythonpath = ["src"] -norecursedirs = [".git", "benchmarks"] +norecursedirs = [".git"] [tool.coverage.run] relative_files = true -omit = [] +omit = ["benchmarks/*"] source = ["pyversity"] [tool.coverage.report] From edbd2f7e88734350049e4df9d443d64c4600176d Mon Sep 17 00:00:00 2001 From: Artur Barseghyan Date: Tue, 3 Mar 2026 22:45:27 +0100 Subject: [PATCH 3/5] Move conftest.py from root to tests/docs_conftest.py --- pyproject.toml | 3 ++- conftest.py => tests/docs_conftest.py | 0 2 files changed, 2 insertions(+), 1 deletion(-) rename conftest.py => tests/docs_conftest.py (100%) diff --git a/pyproject.toml b/pyproject.toml index 1224e2e..a1ebee3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,6 +121,7 @@ addopts = [ "-ra", "-vvv", "-q", + "-p", "tests.docs_conftest", "--cov=pyversity", "--cov=benchmarks", "--cov-report=html", @@ -128,7 +129,7 @@ addopts = [ "--cov-append", "--capture=no", ] -pythonpath = ["src"] +pythonpath = ["src", "."] norecursedirs = [".git"] [tool.coverage.run] diff --git a/conftest.py b/tests/docs_conftest.py similarity index 100% rename from conftest.py rename to tests/docs_conftest.py From e66e40f7950d1e9187bb4dbbe40cd0618519ebb2 Mon Sep 17 00:00:00 2001 From: Artur Barseghyan Date: Wed, 4 Mar 2026 13:14:16 +0100 Subject: [PATCH 4/5] Update install-no-pre-commit to include benchmarks --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fd353d0..2625839 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ install: uv run pre-commit install install-no-pre-commit: - uv pip install ".[dev]" + uv pip install ".[dev,benchmarks]" fix: uv run pre-commit run --all-files From 1b66b49915ce5983690b10ab8ca69ebc4fc35f7b Mon Sep 17 00:00:00 2001 From: Artur Barseghyan Date: Wed, 4 Mar 2026 13:47:54 +0100 Subject: [PATCH 5/5] Replace coverage test command with make test --- .github/workflows/ci.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d6b3544..7625164 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -40,9 +40,7 @@ jobs: # Run tests with coverage - name: Run tests under coverage - run: | - coverage run --source=pyversity -m pytest - coverage report + run: make test # Upload results to Codecov - name: Upload results to Codecov