From 01a79fdcd2b31ca02b16e9bf624438e17f0d374b Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Mon, 2 Mar 2026 21:21:50 +0100 Subject: [PATCH 01/17] Tighten screws --- .pre-commit-config.yaml | 2 +- pyproject.toml | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 574edff..97311f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,7 +45,7 @@ repos: args: [dfetch_hub] - id: xenon name: cyclomatic complexity - entry: xenon --max-absolute C --max-modules B --max-average A + entry: xenon --max-absolute B --max-modules A --max-average A language: python files: ^dfetch_hub/ types: [file, python] diff --git a/pyproject.toml b/pyproject.toml index 2e1efbe..f854505 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,11 +76,18 @@ max-line-length = "120" [tool.pylint.design] max-attributes = 8 +max-branches = 8 +max-locals = 12 +max-returns = 4 +max-statements = 25 +max-bool-expr = 3 +max-module-lines = 500 [tool.mypy] files = "dfetch_hub" ignore_missing_imports = true strict = true +warn_unreachable = true [tool.ruff] line-length = 120 @@ -89,22 +96,33 @@ target-version = "py311" [tool.ruff.lint] select = [ "A", # flake8-builtins: shadowing built-ins + "ARG", # flake8-unused-arguments "B", # flake8-bugbear: likely bugs and design issues "C4", # flake8-comprehensions: unnecessary comprehension forms "E", # pycodestyle errors "F", # pyflakes: undefined names, unused imports + "G", # flake8-logging-format: correct logging API usage "N", # pep8-naming + "PERF", # perflint: performance anti-patterns "PIE", # flake8-pie: misc lint improvements + "PTH", # flake8-use-pathlib: prefer pathlib over os.path / open() + "RET", # flake8-return: return statement hygiene + "RSE", # flake8-raise: raise statement hygiene "RUF", # ruff-specific rules "SIM", # flake8-simplify + "T20", # flake8-print: no print() in production code "TCH", # flake8-type-checking: move imports to TYPE_CHECKING + "TRY", # tryceratops: exception-handling hygiene "UP", # pyupgrade: use modern Python syntax "W", # pycodestyle warnings ] ignore = [ - "E501", # line length: enforced by pylint at 120 chars - "UP037", # remove `from __future__ import annotations`: kept per project convention - "SIM108", # ternary operator: readability preference + "E501", # line length: enforced by pylint at 120 chars + "UP037", # remove `from __future__ import annotations`: kept per project convention + "SIM108", # ternary operator: readability preference + "TRY003", # long exception messages in raise: acceptable for clarity + "TRY300", # return-in-try: acceptable pattern + "RET505", # superfluous else-after-return: readability preference ] [tool.ruff.lint.isort] @@ -125,7 +143,7 @@ targets = ["dfetch_hub"] [tool.pyright] pythonVersion = "3.11" -typeCheckingMode = "standard" +typeCheckingMode = "strict" venvPath = "." venv = ".venv" From 16b1c2cec42868dc624c66618f7c8d732d01feec Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Mon, 2 Mar 2026 21:24:17 +0100 Subject: [PATCH 02/17] Fix issues --- dfetch_hub/catalog/sources/__init__.py | 10 +- dfetch_hub/catalog/sources/clib.py | 134 ++++++++---- dfetch_hub/catalog/sources/conan.py | 137 ++++++++---- dfetch_hub/catalog/sources/readme.py | 19 +- dfetch_hub/catalog/sources/vcpkg.py | 22 +- dfetch_hub/catalog/writer.py | 287 ++++++++++++++++--------- dfetch_hub/commands/__init__.py | 2 +- dfetch_hub/commands/publish.py | 84 +++++--- dfetch_hub/commands/serve.py | 7 +- dfetch_hub/commands/update.py | 52 +++-- dfetch_hub/config.py | 7 +- pyproject.toml | 5 + test/test_catalog_clib.py | 10 +- test/test_catalog_sources.py | 24 +-- 14 files changed, 517 insertions(+), 283 deletions(-) diff --git a/dfetch_hub/catalog/sources/__init__.py b/dfetch_hub/catalog/sources/__init__.py index 016a3f6..65a65bb 100644 --- a/dfetch_hub/catalog/sources/__init__.py +++ b/dfetch_hub/catalog/sources/__init__.py @@ -53,15 +53,15 @@ def parse_vcs_slug(url: str) -> tuple[str, str, str] | None: _HEADERS = {"User-Agent": "dfetch-hub/0.0.1"} _README_NAMES = ("README.md", "readme.md", "Readme.md", "README.rst", "README") -_RAW_BRANCHES = ("main", "master") +RAW_BRANCHES = ("main", "master") -def _raw_url(owner: str, repo: str, branch: str, filename: str) -> str: +def raw_url(owner: str, repo: str, branch: str, filename: str) -> str: """Build a raw.githubusercontent.com URL for a specific file.""" return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{filename}" -def _fetch_raw(url: str) -> str | None: +def fetch_raw(url: str) -> str | None: """GET *url* and return the response body as a string, or ``None`` on failure.""" try: req = Request(url, headers=_HEADERS) @@ -85,9 +85,9 @@ def fetch_readme(owner: str, repo: str) -> str | None: The raw README text on success, or ``None`` if nothing is found. """ - for branch in _RAW_BRANCHES: + for branch in RAW_BRANCHES: for name in _README_NAMES: - content = _fetch_raw(_raw_url(owner, repo, branch, name)) + content = fetch_raw(raw_url(owner, repo, branch, name)) if content is not None: logger.debug("Fetched %s for %s/%s from %s", name, owner, repo, branch) return content diff --git a/dfetch_hub/catalog/sources/clib.py b/dfetch_hub/catalog/sources/clib.py index e4e85e8..899780d 100644 --- a/dfetch_hub/catalog/sources/clib.py +++ b/dfetch_hub/catalog/sources/clib.py @@ -10,12 +10,12 @@ from dfetch.log import get_logger from dfetch_hub.catalog.sources import ( - _RAW_BRANCHES, + RAW_BRANCHES, BaseManifest, - _fetch_raw, - _raw_url, + fetch_raw, fetch_readme, parse_vcs_slug, + raw_url, ) if TYPE_CHECKING: @@ -56,8 +56,8 @@ def _fetch_package_json(owner: str, repo: str) -> dict[str, object] | None: Tries ``main`` then ``master`` branch. Returns ``None`` on failure. """ - for branch in _RAW_BRANCHES: - raw = _fetch_raw(_raw_url(owner, repo, branch, "package.json")) + for branch in RAW_BRANCHES: + raw = fetch_raw(raw_url(owner, repo, branch, "package.json")) if raw is None: continue try: @@ -92,6 +92,43 @@ def _build_urls(vcs_url: str, canonical_url: str | None) -> dict[str, str]: return urls +def _str_or_none(value: object) -> str | None: + """Return ``str(value)`` when *value* is truthy, else ``None``.""" + return str(value) if value else None + + +def _pkg_json_keywords(raw: object) -> list[str]: + """Extract a flat keyword list from the ``keywords`` field of ``package.json``.""" + if isinstance(raw, list): + return [str(k) for k in raw] + if isinstance(raw, str): + return [raw] + return [] + + +def _enrich_from_pkg_json( + pkg_json: dict[str, object], vcs_url: str, tagline: str, repo: str +) -> tuple[str, str, str | None, str | None, list[str], str]: + """Extract package metadata from ``package.json``. + + Args: + pkg_json: Parsed ``package.json`` dict. + vcs_url: VCS repository URL used as the fallback homepage. + tagline: Description extracted from the wiki bullet (used when ``package.json`` has none). + repo: Repository name used as the fallback package name. + + Returns: + ``(package_name, description, license, version, keywords, canonical_url)`` + """ + package_name = str(pkg_json.get("name") or repo) + description = tagline or str(pkg_json.get("description") or "") + license_val = _str_or_none(pkg_json.get("license")) + version_val = _str_or_none(pkg_json.get("version")) + json_kws = _pkg_json_keywords(pkg_json.get("keywords")) + canonical_url = _str_or_none(pkg_json.get("homepage")) or vcs_url + return package_name, description, license_val, version_val, json_kws, canonical_url + + def _build_package( # pylint: disable=too-many-locals host: str, owner: str, repo: str, tagline: str, category: str ) -> CLibPackage: @@ -118,26 +155,17 @@ def _build_package( # pylint: disable=too-many-locals pkg_json = _fetch_package_json(owner, repo) if is_github else None if pkg_json is not None: - package_name = str(pkg_json.get("name") or repo) - description = tagline or str(pkg_json.get("description") or "") - license_val = str(pkg_json.get("license") or "") or None - version_val = str(pkg_json.get("version") or "") or None - raw_keywords = pkg_json.get("keywords") - if isinstance(raw_keywords, list): - json_kws: list[str] = [str(k) for k in raw_keywords] - elif isinstance(raw_keywords, str): - json_kws = [raw_keywords] - else: - json_kws = [] - # Prefer an explicit homepage from package.json (e.g. project website); - # fall back to the VCS repo URL so the field is always populated. - canonical_url: str | None = str(pkg_json.get("homepage") or "") or vcs_url + package_name, description, license_val, version_val, json_kws, canonical_url = ( + _enrich_from_pkg_json(pkg_json, vcs_url, tagline, repo) + ) else: - package_name = repo - description = tagline - license_val = None - version_val = None - json_kws = [] + package_name, description, license_val, version_val, json_kws = ( + repo, + tagline, + None, + None, + [], + ) canonical_url = vcs_url keywords: list[str] = ([category] if category else []) + [ @@ -156,6 +184,39 @@ def _build_package( # pylint: disable=too-many-locals ) +def _process_wiki_line( + line: str, current_category: str +) -> tuple[str, CLibPackage | None]: + """Process one Packages.md line; return updated category and optional package. + + Args: + line: A single line from ``Packages.md``. + current_category: The most-recently seen section heading. + + Returns: + A ``(category, package)`` pair where *category* may be updated and + *package* is ``None`` for non-bullet or unrecognised lines. + """ + heading_match = _HEADING_RE.match(line) + if heading_match: + return heading_match.group(1).strip(), None + + bullet_match = _BULLET_RE.match(line) + if not bullet_match: + return current_category, None + + _link_text, url, tagline = bullet_match.groups() + parsed = parse_vcs_slug(url) + if not parsed: + logger.debug("Skipping URL without recognized VCS host in Packages.md: %s", url) + return current_category, None + + host, owner, repo = parsed + return current_category, _build_package( + host, owner, repo, (tagline or "").strip(), current_category + ) + + def parse_packages_md( packages_md: "Path", limit: int | None = None ) -> list[CLibPackage]: @@ -179,29 +240,10 @@ def parse_packages_md( current_category: str = "" for line in packages_md.read_text(encoding="utf-8").splitlines(): - heading_match = _HEADING_RE.match(line) - if heading_match: - current_category = heading_match.group(1).strip() - continue - - bullet_match = _BULLET_RE.match(line) - if not bullet_match: - continue - - _link_text, url, tagline = bullet_match.groups() - parsed = parse_vcs_slug(url) - if not parsed: - logger.debug( - "Skipping URL without recognized VCS host in Packages.md: %s", url - ) - continue - if limit is not None and len(packages) >= limit: break - - host, owner, repo = parsed - packages.append( - _build_package(host, owner, repo, (tagline or "").strip(), current_category) - ) + current_category, pkg = _process_wiki_line(line, current_category) + if pkg is not None: + packages.append(pkg) return packages diff --git a/dfetch_hub/catalog/sources/conan.py b/dfetch_hub/catalog/sources/conan.py index 330092a..d933884 100644 --- a/dfetch_hub/catalog/sources/conan.py +++ b/dfetch_hub/catalog/sources/conan.py @@ -22,6 +22,25 @@ # --------------------------------------------------------------------------- +def _advance_in_string(text: str, i: int, str_char: str) -> tuple[int, bool]: + """Advance index *i* by one step while inside a string literal. + + Args: + text: Full source text being scanned. + i: Current position (the character to inspect). + str_char: The opening quote character of the current string. + + Returns: + ``(next_i, still_in_string)`` — *next_i* skips an extra character when + a backslash escape is encountered; *still_in_string* is ``False`` when + *i* points at the closing quote. + """ + ch = text[i] + if ch == "\\" and i + 1 < len(text): + return i + 1, True + return i, ch != str_char + + def _scan_paren_value(text: str, start: int) -> str: """Return the substring from *start* (a ``(`` character) to its matching ``)``.""" depth = 1 @@ -31,17 +50,13 @@ def _scan_paren_value(text: str, start: int) -> str: while i < len(text) and depth > 0: ch = text[i] if in_str: - if ch == "\\" and i + 1 < len(text): - i += 1 - elif ch == str_char: - in_str = False - else: - if ch in ('"', "'"): - in_str, str_char = True, ch - elif ch == "(": - depth += 1 - elif ch == ")": - depth -= 1 + i, in_str = _advance_in_string(text, i, str_char) + elif ch in ('"', "'"): + in_str, str_char = True, ch + elif ch == "(": + depth += 1 + elif ch == ")": + depth -= 1 i += 1 return text[start:i] @@ -135,28 +150,37 @@ def _latest_version(config_path: Path) -> tuple[str | None, str]: return None, "all" -def _find_conanfile(recipe_dir: Path, preferred_folder: str) -> Path | None: - """Return the path to ``conanfile.py`` inside *recipe_dir*. +def _safe_conanfile(candidate: Path, base: Path) -> Path | None: + """Resolve *candidate* and return it if it is a file strictly within *base*. - Tries *preferred_folder* first, then falls back to any subdirectory. - Both candidate paths are resolved and verified to be strictly contained - within *recipe_dir* before being returned, preventing path traversal via - crafted folder names or symlinks. - """ - base = recipe_dir.resolve() + Args: + candidate: Path to a potential ``conanfile.py``. + base: Resolved absolute root that the candidate must be under. - preferred = recipe_dir / preferred_folder / "conanfile.py" + Returns: + Resolved path on success, or ``None`` if resolution fails or the path + escapes *base* or does not point to an existing file. + """ try: - preferred_resolved = preferred.resolve() + resolved = candidate.resolve() except OSError: - preferred_resolved = None - if ( - preferred_resolved is not None - and preferred_resolved.is_relative_to(base) - and preferred_resolved.is_file() - ): - return preferred_resolved + return None + return resolved if resolved.is_relative_to(base) and resolved.is_file() else None + + +def _scan_recipe_dir(base: Path, recipe_dir: Path) -> Path | None: + """Scan all subdirectories of *recipe_dir* for ``conanfile.py``. + + Every candidate path is verified to be strictly contained within *base* + before being returned, preventing path traversal. + + Args: + base: Resolved absolute path of the recipe directory root. + recipe_dir: Recipe directory to scan. + Returns: + Path to the first safe ``conanfile.py`` found, or ``None``. + """ if not recipe_dir.exists() or not recipe_dir.is_dir(): return None @@ -167,22 +191,56 @@ def _find_conanfile(recipe_dir: Path, preferred_folder: str) -> Path | None: for sub in entries: if sub.is_dir(): - candidate = sub / "conanfile.py" - try: - candidate_resolved = candidate.resolve() - except OSError: - continue - if candidate_resolved.is_relative_to(base) and candidate_resolved.is_file(): - return candidate_resolved + result = _safe_conanfile(sub / "conanfile.py", base) + if result is not None: + return result return None +def _find_conanfile(recipe_dir: Path, preferred_folder: str) -> Path | None: + """Return the path to ``conanfile.py`` inside *recipe_dir*. + + Tries *preferred_folder* first, then falls back to any subdirectory. + Both candidate paths are verified to be strictly contained within + *recipe_dir* before being returned, preventing path traversal via + crafted folder names or symlinks. + + Args: + recipe_dir: Path to the recipe directory. + preferred_folder: Subfolder to try first (e.g. ``"all"``). + + Returns: + Path to ``conanfile.py`` on success, or ``None``. + """ + base = recipe_dir.resolve() + result = _safe_conanfile(recipe_dir / preferred_folder / "conanfile.py", base) + return result if result is not None else _scan_recipe_dir(base, recipe_dir) + + # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- +def _build_conan_urls(homepage: str | None, conan_url: str | None) -> dict[str, str]: + """Build the named URL mapping for a conan recipe. + + Args: + homepage: Upstream project website, or ``None``. + conan_url: Conan recipe repository URL, or ``None``. + + Returns: + A dict with ``"Homepage"`` and/or ``"Source"`` keys as available. + """ + urls: dict[str, str] = {} + if homepage: + urls["Homepage"] = homepage + if conan_url: + urls["Source"] = conan_url + return urls + + def parse_conan_recipe(recipe_dir: Path) -> ConanManifest | None: """Parse a single conan-center-index recipe directory. @@ -217,16 +275,9 @@ def parse_conan_recipe(recipe_dir: Path) -> ConanManifest | None: name = _extract_str_attr(text, "name") or recipe_dir.name description = _extract_str_attr(text, "description") or "" homepage = _extract_str_attr(text, "homepage") - conan_url = _extract_str_attr(text, "url") license_val = _extract_str_attr(text, "license") or None topics = _extract_tuple_attr(text, "topics") - urls: dict[str, str] = {} - if homepage: - urls["Homepage"] = homepage - if conan_url: - urls["Source"] = conan_url - return ConanManifest( entry_name=recipe_dir.name, package_name=name, @@ -236,5 +287,5 @@ def parse_conan_recipe(recipe_dir: Path) -> ConanManifest | None: version=version, topics=topics, readme_content=fetch_readme_for_homepage(homepage), - urls=urls, + urls=_build_conan_urls(homepage, _extract_str_attr(text, "url")), ) diff --git a/dfetch_hub/catalog/sources/readme.py b/dfetch_hub/catalog/sources/readme.py index 34feb24..870f463 100644 --- a/dfetch_hub/catalog/sources/readme.py +++ b/dfetch_hub/catalog/sources/readme.py @@ -21,12 +21,16 @@ _README_NAMES = ("README.md", "readme.md", "Readme.md", "README.rst", "README") -_HEADING_RE = re.compile(r"^#+\s+") -_BADGE_RE = re.compile(r"^\[!\[") -_BLANK_RE = re.compile(r"^\s*$") +# Matches lines that should be skipped: blank, Markdown headings, or badge links. +_SKIP_RE = re.compile(r"^(#+\s+|\[!\[|\s*$)") _DESCRIPTION_MAX = 120 +def _is_content_line(line: str, in_code_block: bool) -> bool: + """Return ``True`` if *line* is a prose content line worth using as a description.""" + return not in_code_block and not _SKIP_RE.match(line) and bool(line.strip()) + + def _extract_description(text: str) -> str: """Extract the first meaningful line from *text* as a short description. @@ -47,13 +51,8 @@ def _extract_description(text: str) -> str: if line.startswith(("```", "~~~")): in_code_block = not in_code_block continue - if in_code_block: - continue - if _BLANK_RE.match(line) or _HEADING_RE.match(line) or _BADGE_RE.match(line): - continue - stripped = line.strip() - if stripped: - return stripped[:_DESCRIPTION_MAX] + if _is_content_line(line, in_code_block): + return line.strip()[:_DESCRIPTION_MAX] return "" diff --git a/dfetch_hub/catalog/sources/vcpkg.py b/dfetch_hub/catalog/sources/vcpkg.py index f2e82cc..b247171 100644 --- a/dfetch_hub/catalog/sources/vcpkg.py +++ b/dfetch_hub/catalog/sources/vcpkg.py @@ -4,15 +4,12 @@ import json from dataclasses import dataclass, field -from typing import TYPE_CHECKING +from pathlib import Path from dfetch.log import get_logger from dfetch_hub.catalog.sources import BaseManifest, fetch_readme_for_homepage -if TYPE_CHECKING: - from pathlib import Path - logger = get_logger(__name__) @@ -57,6 +54,12 @@ def _extract_description(data: dict[str, object]) -> str: return str(desc) if desc else "" +def _extract_str_field(data: dict[str, object], key: str) -> str | None: + """Return the string value for *key* in *data*, or ``None`` if absent or non-string.""" + val = data.get(key) + return str(val) if isinstance(val, str) else None + + def _extract_dependencies(data: dict[str, object]) -> list[str]: """Return a flat list of dependency names from *data*. @@ -93,7 +96,7 @@ def parse_vcpkg_json(entry_dir: Path) -> VcpkgManifest | None: return None try: - with open(manifest_path, encoding="utf-8") as fh: + with Path.open(manifest_path, encoding="utf-8") as fh: loaded = json.load(fh) except (json.JSONDecodeError, OSError) as exc: logger.warning("Could not parse %s: %s", manifest_path, exc) @@ -104,12 +107,9 @@ def parse_vcpkg_json(entry_dir: Path) -> VcpkgManifest | None: return None data: dict[str, object] = loaded - raw_homepage = data.get("homepage") - homepage: str | None = str(raw_homepage) if isinstance(raw_homepage, str) else None - raw_license = data.get("license") - license_val: str | None = str(raw_license) if isinstance(raw_license, str) else None - raw_name = data.get("name") - package_name = str(raw_name) if isinstance(raw_name, str) else entry_dir.name + homepage = _extract_str_field(data, "homepage") + license_val = _extract_str_field(data, "license") + package_name = _extract_str_field(data, "name") or entry_dir.name urls: dict[str, str] = {} if homepage: diff --git a/dfetch_hub/catalog/writer.py b/dfetch_hub/catalog/writer.py index 84f89a0..dc74f38 100644 --- a/dfetch_hub/catalog/writer.py +++ b/dfetch_hub/catalog/writer.py @@ -4,16 +4,14 @@ import json from datetime import UTC, datetime -from typing import TYPE_CHECKING, Any +from pathlib import Path +from typing import Any from dfetch.log import get_logger from dfetch.vcs.git import GitRemote from dfetch_hub.catalog.sources import BaseManifest, parse_vcs_slug -if TYPE_CHECKING: - from pathlib import Path - logger = get_logger(__name__) @@ -52,7 +50,9 @@ def _catalog_id(vcs_host: str, org: str, repo: str) -> str: def _fetch_upstream_tags(url: str) -> list[dict[str, Any]]: """Return git tags from *url* using dfetch's GitRemote.""" try: - info = GitRemote._ls_remote(url) # pylint: disable=protected-access + info = GitRemote._ls_remote( # pylint: disable=protected-access # pyright: ignore[reportPrivateUsage] + url + ) except Exception as exc: # pylint: disable=broad-exception-caught logger.warning("Could not list tags for %s: %s", url, exc) # pragma: no cover return [] # pragma: no cover @@ -76,14 +76,14 @@ def _fetch_upstream_tags(url: str) -> list[dict[str, Any]]: def _load_json(path: Path) -> Any: if path.exists(): - with open(path, encoding="utf-8") as fh: + with Path.open(path, encoding="utf-8") as fh: return json.load(fh) return None def _save_json(path: Path, data: Any) -> None: path.parent.mkdir(parents=True, exist_ok=True) - with open(path, "w", encoding="utf-8") as fh: + with Path.open(path, "w", encoding="utf-8") as fh: json.dump(data, fh, indent=2, ensure_ascii=False) fh.write("\n") @@ -97,6 +97,48 @@ def _now_iso() -> str: # --------------------------------------------------------------------------- +def _ensure_version_tag(tags: list[dict[str, Any]], version: str) -> None: + """Add *version* to *tags* if not already present. + + Normalises by stripping a leading ``"v"`` so ``"6.4.0"`` matches ``"v6.4.0"``. + + Args: + tags: Mutable tag list to update in-place. + version: Version string to ensure is present. + """ + tag_names_normalised = {str(t.get("name") or "").lstrip("v") for t in tags} + if version.lstrip("v") not in tag_names_normalised: + tags.insert( + 0, + { + "name": version, + "is_tag": True, + "commit_sha": None, + "date": None, + }, + ) + + +def _merge_topics( + entry: dict[str, Any], + existing: dict[str, Any] | None, + topics: list[str], +) -> None: + """Merge *topics* into ``entry["topics"]`` when updating an existing entry. + + No-op when *existing* is ``None`` (newly created entry already has the topics + embedded in the initial dict) or *topics* is empty. + + Args: + entry: Catalog entry dict to update in-place. + existing: Previous value of the entry, or ``None`` if newly created. + topics: Topics from the current manifest to add. + """ + if existing and topics: + existing_topics: list[str] = entry.setdefault("topics", []) + existing_topics.extend(t for t in topics if t not in existing_topics) + + def _merge_catalog_entry( # pylint: disable=too-many-arguments,too-many-positional-arguments existing: dict[str, Any] | None, manifest: BaseManifest, @@ -122,10 +164,7 @@ def _merge_catalog_entry( # pylint: disable=too-many-arguments,too-many-positio "tags": [], } - # Merge in topics from the current source if the entry already existed - if existing and topics: - existing_topics: list[str] = entry.setdefault("topics", []) - existing_topics.extend(t for t in topics if t not in existing_topics) + _merge_topics(entry, existing, topics) # Update fields that the manifest knows about and the catalog may be stale on if manifest.description and not entry.get("description"): @@ -138,21 +177,8 @@ def _merge_catalog_entry( # pylint: disable=too-many-arguments,too-many-positio if label not in labels: labels.append(label) - # Add a version tag if we have one and it's not already listed. - # Normalise by stripping a leading "v" so "6.4.0" matches "v6.4.0". if manifest.version: - tags: list[dict[str, Any]] = entry.setdefault("tags", []) - tag_names_normalised = {str(t.get("name") or "").lstrip("v") for t in tags} - if manifest.version.lstrip("v") not in tag_names_normalised: - tags.insert( - 0, - { - "name": manifest.version, - "is_tag": True, - "commit_sha": None, - "date": None, - }, - ) + _ensure_version_tag(entry.setdefault("tags", []), manifest.version) return entry @@ -176,6 +202,46 @@ def _catalog_source_entry( } +def _merge_catalog_sources( + detail: dict[str, Any], + manifest: BaseManifest, + source_name: str, + label: str, + registry_path: str, +) -> None: + """Update the ``catalog_sources`` list in *detail* for this manifest. + + Purges stale entries that share the same ``index_path`` but carry an + outdated ``source_name`` (e.g. after a source rename in ``dfetch-hub.toml``), + then upserts the current source entry. + + Args: + detail: Per-project detail dict to update in-place. + manifest: Package manifest supplying entry metadata. + source_name: Internal name of the catalog source. + label: Human-readable label for the source. + registry_path: Sub-path used to build the ``index_path``. + """ + sources: list[dict[str, Any]] = detail.setdefault("catalog_sources", []) + new_source = _catalog_source_entry(manifest, source_name, label, registry_path) + new_index_path = new_source["index_path"] + detail["catalog_sources"] = sources = [ + s + for s in sources + if not ( + s.get("index_path") == new_index_path + and s.get("source_name") != source_name + ) + ] + existing_source = next( + (s for s in sources if s.get("source_name") == source_name), None + ) + if existing_source is None: + sources.append(new_source) + else: + existing_source.update(new_source) + + def _merge_detail( # pylint: disable=too-many-arguments,too-many-positional-arguments existing: dict[str, Any] | None, manifest: BaseManifest, @@ -212,47 +278,15 @@ def _merge_detail( # pylint: disable=too-many-arguments,too-many-positional-arg # Merge named URLs from this manifest into the detail's url map detail.setdefault("urls", {}).update(getattr(manifest, "urls", {})) - # Update or add the catalog source for this label. - # First, purge any stale entries that share the same index_path but carry an - # outdated source_name (e.g. after a source rename in dfetch-hub.toml). - sources: list[dict[str, Any]] = detail.setdefault("catalog_sources", []) - new_source = _catalog_source_entry(manifest, source_name, label, registry_path) - new_index_path = new_source["index_path"] - detail["catalog_sources"] = sources = [ - s - for s in sources - if not ( - s.get("index_path") == new_index_path - and s.get("source_name") != source_name - ) - ] - existing_source = next( - (s for s in sources if s.get("source_name") == source_name), None - ) - if existing_source is None: - sources.append(new_source) - else: - existing_source.update(new_source) + _merge_catalog_sources(detail, manifest, source_name, label, registry_path) # Populate tags from the upstream repo when the list is empty tags: list[dict[str, Any]] = detail.setdefault("tags", []) if not tags and manifest.homepage: tags.extend(_fetch_upstream_tags(manifest.homepage)) - # Ensure the current version appears in the tag list. - # Normalise by stripping a leading "v" so that "6.4.0" matches "v6.4.0". if manifest.version: - tag_names_normalised = {t.get("name", "").lstrip("v") for t in tags} - if manifest.version.lstrip("v") not in tag_names_normalised: - tags.insert( - 0, - { - "name": manifest.version, - "is_tag": True, - "commit_sha": None, - "date": None, - }, - ) + _ensure_version_tag(tags, manifest.version) return detail @@ -291,7 +325,91 @@ def _generate_readme(manifest: BaseManifest, repo: str, url: str) -> str: # --------------------------------------------------------------------------- -def write_catalog( # pylint: disable=too-many-locals +def _write_detail_json( # pylint: disable=too-many-arguments,too-many-positional-arguments + data_dir: Path, + vcs_host: str, + org: str, + repo: str, + manifest: BaseManifest, + source_name: str, + label: str, + registry_path: str, +) -> None: + """Write or update the per-project detail JSON for one package. + + Args: + data_dir: Root of the catalog data directory. + vcs_host: Short VCS host label (e.g. ``"github"``). + org: Repository owner / organisation (lowercased). + repo: Repository name (lowercased). + manifest: Package manifest supplying metadata. + source_name: Internal name of the catalog source. + label: Human-readable label for the source. + registry_path: Sub-path used to build the ``index_path``. + """ + detail_path = data_dir / vcs_host / org / f"{repo}.json" + _save_json( + detail_path, + _merge_detail( + _load_json(detail_path), + manifest, + org, + repo, + source_name, + label, + registry_path, + ), + ) + + +def _process_manifest( # pylint: disable=too-many-arguments,too-many-positional-arguments + manifest: BaseManifest, + catalog: dict[str, Any], + data_dir: Path, + source_name: str, + label: str, + registry_path: str, +) -> tuple[bool, bool]: + """Update *catalog* and write the detail JSON for one manifest. + + Args: + manifest: Package manifest to process. + catalog: Mutable catalog dict to update in-place. + data_dir: Root of the catalog data directory. + source_name: Internal name of the catalog source. + label: Human-readable source label. + registry_path: Sub-path used to build the ``index_path``. + + Returns: + ``(added, updated)`` booleans — exactly one is ``True`` when the manifest + was successfully written; both are ``False`` when the manifest is skipped. + """ + if not manifest.homepage: + logger.warning( + "cannot determine upstream repo without a URL of %s", manifest.entry_name + ) + return False, False + + parsed = parse_vcs_slug(manifest.homepage) + if not parsed: + logger.warning( + "skipping entry without recognized VCS URL: %s", manifest.homepage + ) + return False, False + + host, org, repo = parsed + vcs_host = _vcs_host_label(host) + existing_entry = catalog.get(_catalog_id(vcs_host, org, repo)) + catalog[_catalog_id(vcs_host, org, repo)] = _merge_catalog_entry( + existing_entry, manifest, vcs_host, org, repo, label + ) + _write_detail_json( + data_dir, vcs_host, org, repo, manifest, source_name, label, registry_path + ) + return existing_entry is None, existing_entry is not None + + +def write_catalog( manifests: list[BaseManifest], data_dir: Path, source_name: str, @@ -313,56 +431,15 @@ def write_catalog( # pylint: disable=too-many-locals """ catalog_path = data_dir / "catalog.json" catalog: dict[str, Any] = _load_json(catalog_path) or {} - added = 0 updated = 0 for manifest in manifests: - if not manifest.homepage: - logger.warning( - "cannot determine upstream repo without a URL of %s", - manifest.entry_name, - ) - continue - - parsed = parse_vcs_slug(manifest.homepage) - if not parsed: - logger.warning( - "skipping entry without recognized VCS URL: %s", manifest.homepage - ) - continue - - host, org, repo = parsed - vcs_host = _vcs_host_label(host) - cat_id = _catalog_id(vcs_host, org, repo) - - existing_entry = catalog.get(cat_id) - catalog[cat_id] = _merge_catalog_entry( - existing_entry, - manifest, - vcs_host, - org, - repo, - label, - ) - if existing_entry is None: - added += 1 - else: - updated += 1 - - # Per-project detail JSON - detail_path = data_dir / vcs_host / org / f"{repo}.json" - existing_detail = _load_json(detail_path) - merged_detail = _merge_detail( - existing_detail, - manifest, - org, - repo, - source_name, - label, - registry_path, + was_added, was_updated = _process_manifest( + manifest, catalog, data_dir, source_name, label, registry_path ) - _save_json(detail_path, merged_detail) + added += was_added + updated += was_updated _save_json(catalog_path, catalog) return added, updated diff --git a/dfetch_hub/commands/__init__.py b/dfetch_hub/commands/__init__.py index 4f6dd4d..b0c75d8 100644 --- a/dfetch_hub/commands/__init__.py +++ b/dfetch_hub/commands/__init__.py @@ -42,7 +42,7 @@ def load_config_with_data_dir( try: config = load_config(config_path) except FileNotFoundError: - _logger.error("Config file '%s' not found", config_path) + _logger.exception("Config file '%s' not found", config_path) sys.exit(1) if data_dir_override is not None: diff --git a/dfetch_hub/commands/publish.py b/dfetch_hub/commands/publish.py index 71c6a94..969f462 100644 --- a/dfetch_hub/commands/publish.py +++ b/dfetch_hub/commands/publish.py @@ -27,19 +27,21 @@ def _minify_json(src: Path, dst: Path) -> None: """Read *src*, minify JSON, write to *dst*.""" dst.parent.mkdir(parents=True, exist_ok=True) - with open(src, encoding="utf-8") as fh: + with Path.open(src, encoding="utf-8") as fh: data = json.load(fh) - with open(dst, "w", encoding="utf-8") as fh: + with Path.open(dst, "w", encoding="utf-8") as fh: json.dump(data, fh, separators=(",", ":"), ensure_ascii=False) -def _cmd_publish(parsed: argparse.Namespace) -> None: - """Build a deployable static site for GitHub Pages / GitLab Pages.""" - output = Path(parsed.output) - _config, data_dir = load_config_with_data_dir( - parsed.config, parsed.data_dir, _DEFAULT_DATA_DIR - ) +def _validate_output_dir(data_dir: Path, output: Path) -> None: + """Validate that *data_dir* is populated and does not overlap *output*. + Logs an error and calls ``sys.exit(1)`` if any check fails. + + Args: + data_dir: Catalog data directory that must contain JSON files. + output: Intended output directory that must not overlap *data_dir*. + """ if not data_dir.is_dir(): logger.error( "Catalog data directory '%s' does not exist or is not a directory", data_dir @@ -48,8 +50,7 @@ def _cmd_publish(parsed: argparse.Namespace) -> None: if not any(data_dir.rglob("*.json")): logger.error("Catalog data directory '%s' contains no JSON files", data_dir) sys.exit(1) - out_res = output.resolve() - src_res = data_dir.resolve() + out_res, src_res = output.resolve(), data_dir.resolve() if ( out_res == src_res or out_res.is_relative_to(src_res) @@ -62,37 +63,68 @@ def _cmd_publish(parsed: argparse.Namespace) -> None: ) sys.exit(1) - if output.exists(): - logger.print_info_line("publish", f"Removing existing '{output}' ...") - shutil.rmtree(output) - output.mkdir(parents=True) - # Copy site assets; rewrite the two fetch() paths in index.html so that - # data/ is relative to the output root instead of ../data/ (the dev layout). - logger.print_info_line("publish", "Copying site assets ...") - for src in _SITE_DIR.rglob("*"): +def _copy_assets(site_dir: Path, output: Path) -> None: + """Copy site assets from *site_dir* to *output*, rewriting data paths in ``index.html``. + + The ``../data/`` prefix in ``index.html`` fetch calls is rewritten to ``data/`` + so that paths are relative to the output root rather than the dev layout. + + Args: + site_dir: Source directory containing site assets. + output: Destination directory for the published site. + """ + for src in site_dir.rglob("*"): if not src.is_file(): continue - relative = src.relative_to(_SITE_DIR) - dst = output / relative + dst = output / src.relative_to(site_dir) dst.parent.mkdir(parents=True, exist_ok=True) if src.name == "index.html": text = src.read_text(encoding="utf-8") - text = re.sub(r"(['\"`])\.\.\/data\/", r"\1data/", text) - dst.write_text(text, encoding="utf-8") + dst.write_text( + re.sub(r"(['\"`])\.\.\/data\/", r"\1data/", text), encoding="utf-8" + ) else: shutil.copy2(src, dst) - # Copy and minify all JSON from the data directory. + +def _minify_catalog(data_dir: Path, output: Path) -> int: + """Copy and minify all JSON files from *data_dir* into ``output/data/``. + + Args: + data_dir: Source catalog data directory. + output: Destination output directory. + + Returns: + The number of JSON files processed. + """ json_files = sorted(data_dir.rglob("*.json")) logger.print_info_line("publish", f"Minifying {len(json_files)} JSON file(s) ...") for src in json_files: - dst = output / "data" / src.relative_to(data_dir) - _minify_json(src, dst) + _minify_json(src, output / "data" / src.relative_to(data_dir)) + return len(json_files) + + +def _cmd_publish(parsed: argparse.Namespace) -> None: + """Build a deployable static site for GitHub Pages / GitLab Pages.""" + output = Path(parsed.output) + _config, data_dir = load_config_with_data_dir( + parsed.config, parsed.data_dir, _DEFAULT_DATA_DIR + ) + _validate_output_dir(data_dir, output) + + if output.exists(): + logger.print_info_line("publish", f"Removing existing '{output}' ...") + shutil.rmtree(output) + output.mkdir(parents=True) + + logger.print_info_line("publish", "Copying site assets ...") + _copy_assets(_SITE_DIR, output) + count = _minify_catalog(data_dir, output) logger.print_info_line( "publish", - f"Done — static site written to '{output}' ({len(json_files)} JSON file(s) minified)", + f"Done — static site written to '{output}' ({count} JSON file(s) minified)", ) diff --git a/dfetch_hub/commands/serve.py b/dfetch_hub/commands/serve.py index 1d8d728..69586ed 100644 --- a/dfetch_hub/commands/serve.py +++ b/dfetch_hub/commands/serve.py @@ -8,7 +8,10 @@ import webbrowser from pathlib import Path +from dfetch.log import get_logger + _PACKAGE_DIR = Path(__file__).parent.parent +logger = get_logger(__name__) def _port_type(value: str) -> int: @@ -36,7 +39,7 @@ def log_message( pass # suppress per-request noise url = f"http://localhost:{port}/site/index.html" - print(f"Serving {serve_dir} on {url} (Ctrl-C to stop)") + logger.info("Serving %s on %s (Ctrl-C to stop)", serve_dir, url) server = http.server.ThreadingHTTPServer(("127.0.0.1", port), _Handler) @@ -45,7 +48,7 @@ def log_message( try: server.serve_forever() except KeyboardInterrupt: - print("\nStopped.") + logger.info("Stopped.") finally: server.server_close() diff --git a/dfetch_hub/commands/update.py b/dfetch_hub/commands/update.py index 62efba0..dcface4 100644 --- a/dfetch_hub/commands/update.py +++ b/dfetch_hub/commands/update.py @@ -19,6 +19,8 @@ from dfetch_hub.commands import load_config_with_data_dir if TYPE_CHECKING: + from collections.abc import Callable + from dfetch_hub.catalog.sources import BaseManifest from dfetch_hub.config import SourceConfig @@ -81,6 +83,38 @@ def _subfolder_homepage(source: SourceConfig) -> str | None: return source.url or None +def _parse_entry_dirs( + entry_dirs: list[Path], + parse_fn: Callable[[Path], BaseManifest | None], + fallback_homepage: str | None, +) -> tuple[list[BaseManifest], int]: + """Parse *entry_dirs* and return ``(manifests, skipped_count)``. + + For each directory the appropriate parser is called. Packages whose + homepage is ``None`` get the *fallback_homepage* (i.e. the repository root + URL) if one is available. + + Args: + entry_dirs: Sorted list of package directories to process. + parse_fn: Parser function for the manifest type. + fallback_homepage: Repository root URL used when a manifest has no homepage. + + Returns: + A ``(manifests, skipped)`` tuple. + """ + manifests: list[BaseManifest] = [] + skipped = 0 + for entry_dir in entry_dirs: + m = parse_fn(entry_dir) + if m is None: + skipped += 1 + else: + if m.homepage is None and fallback_homepage is not None: + m.homepage = fallback_homepage + manifests.append(m) + return manifests, skipped + + def _process_subfolders_source( source: SourceConfig, data_dir: Path, @@ -107,8 +141,7 @@ def _process_subfolders_source( source.name, f"Fetching {source.url} (src: {source.path}) ..." ) with tempfile.TemporaryDirectory(prefix="dfetch-hub-") as tmp: - tmp_path = Path(tmp) - fetched_dir = clone_source(source, tmp_path) + fetched_dir = clone_source(source, Path(tmp)) entry_dirs = sorted(d for d in fetched_dir.iterdir() if d.is_dir()) entry_dirs = _filter_sentinel(source, entry_dirs) @@ -116,18 +149,9 @@ def _process_subfolders_source( entry_dirs = entry_dirs[:limit] logger.print_info_line(source.name, f"Parsing {len(entry_dirs)} package(s) ...") - manifests: list[BaseManifest] = [] - skipped = 0 - for entry_dir in entry_dirs: - m = parse_fn(entry_dir) - if m is None: - skipped += 1 - else: - if m.homepage is None: - hp = _subfolder_homepage(source) - if hp is not None: - m.homepage = hp - manifests.append(m) + manifests, skipped = _parse_entry_dirs( + entry_dirs, parse_fn, _subfolder_homepage(source) + ) if skipped: logger.print_warning_line( diff --git a/dfetch_hub/config.py b/dfetch_hub/config.py index 45c8b8a..f3881cc 100644 --- a/dfetch_hub/config.py +++ b/dfetch_hub/config.py @@ -4,6 +4,7 @@ import tomllib from dataclasses import dataclass, field, fields +from pathlib import Path @dataclass @@ -84,18 +85,18 @@ def load_config(path: str = "dfetch-hub.toml") -> HubConfig: tomllib.TOMLDecodeError: If the file is not valid TOML. """ - with open(path, "rb") as fh: + with Path(path).open("rb") as fh: data = tomllib.load(fh) raw_settings_obj = data.get("settings", {}) if not isinstance(raw_settings_obj, dict): - raise ValueError("`[settings]` must be a TOML table") + raise TypeError("`[settings]` must be a TOML table") raw_sources_obj = data.get("source", []) if not isinstance(raw_sources_obj, list) or any( not isinstance(raw, dict) for raw in raw_sources_obj ): - raise ValueError("`[[source]]` must be an array of TOML tables") + raise TypeError("`[[source]]` must be an array of TOML tables") raw_settings = {k: v for k, v in raw_settings_obj.items() if k in _SETTINGS_FIELDS} settings = Settings(**raw_settings) diff --git a/pyproject.toml b/pyproject.toml index f854505..c89d050 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -146,6 +146,11 @@ pythonVersion = "3.11" typeCheckingMode = "strict" venvPath = "." venv = ".venv" +include = ["dfetch_hub"] +reportMissingTypeStubs = "none" +reportUnknownVariableType = "none" +reportUnknownArgumentType = "none" +reportUnknownMemberType = "none" [tool.pytest.ini_options] testpaths = ["test"] diff --git a/test/test_catalog_clib.py b/test/test_catalog_clib.py index dda16b9..46b4add 100644 --- a/test/test_catalog_clib.py +++ b/test/test_catalog_clib.py @@ -423,7 +423,7 @@ def test_fetch_package_json_returns_dict_on_valid_response() -> None: """Returns the parsed dict when the raw response is valid JSON.""" data = {"name": "buffer", "version": "0.4.0"} with patch( - "dfetch_hub.catalog.sources.clib._fetch_raw", return_value=json.dumps(data) + "dfetch_hub.catalog.sources.clib.fetch_raw", return_value=json.dumps(data) ): result = _fetch_package_json("clibs", "buffer") @@ -432,7 +432,7 @@ def test_fetch_package_json_returns_dict_on_valid_response() -> None: def test_fetch_package_json_returns_none_when_fetch_fails() -> None: """Returns None when every HTTP fetch attempt fails.""" - with patch("dfetch_hub.catalog.sources.clib._fetch_raw", return_value=None): + with patch("dfetch_hub.catalog.sources.clib.fetch_raw", return_value=None): result = _fetch_package_json("owner", "repo") assert result is None @@ -440,7 +440,7 @@ def test_fetch_package_json_returns_none_when_fetch_fails() -> None: def test_fetch_package_json_returns_none_on_bad_json() -> None: """Returns None when the response body is not valid JSON.""" - with patch("dfetch_hub.catalog.sources.clib._fetch_raw", return_value="not json"): + with patch("dfetch_hub.catalog.sources.clib.fetch_raw", return_value="not json"): result = _fetch_package_json("owner", "repo") assert result is None @@ -449,7 +449,7 @@ def test_fetch_package_json_returns_none_on_bad_json() -> None: def test_fetch_package_json_returns_none_for_non_object_json() -> None: """Returns None when the JSON root is not an object (e.g. a list).""" with patch( - "dfetch_hub.catalog.sources.clib._fetch_raw", + "dfetch_hub.catalog.sources.clib.fetch_raw", return_value=json.dumps([1, 2, 3]), ): result = _fetch_package_json("owner", "repo") @@ -464,7 +464,7 @@ def test_fetch_package_json_falls_back_to_master_branch() -> None: def _side_effect(url: str) -> str | None: return json.dumps(data) if "master" in url else None - with patch("dfetch_hub.catalog.sources.clib._fetch_raw", side_effect=_side_effect): + with patch("dfetch_hub.catalog.sources.clib.fetch_raw", side_effect=_side_effect): result = _fetch_package_json("owner", "repo") assert result == data diff --git a/test/test_catalog_sources.py b/test/test_catalog_sources.py index 7b23273..5a8cb24 100644 --- a/test/test_catalog_sources.py +++ b/test/test_catalog_sources.py @@ -7,10 +7,10 @@ from dfetch_hub.catalog.sources import ( BaseManifest, - _fetch_raw, - _raw_url, + fetch_raw, fetch_readme, fetch_readme_for_homepage, + raw_url, ) # --------------------------------------------------------------------------- @@ -47,26 +47,26 @@ def test_base_manifest_urls_accepts_populated_dict() -> None: # --------------------------------------------------------------------------- -# _raw_url +# raw_url # --------------------------------------------------------------------------- def test_raw_url_produces_correct_format() -> None: """URL is assembled from owner, repo, branch, and filename.""" - url = _raw_url("owner", "repo", "main", "README.md") + url = raw_url("owner", "repo", "main", "README.md") assert url == "https://raw.githubusercontent.com/owner/repo/main/README.md" def test_raw_url_reflects_branch_and_filename() -> None: """Different branch and filename produce the expected URL.""" - url = _raw_url("org", "project", "master", "README.rst") + url = raw_url("org", "project", "master", "README.rst") assert "master" in url assert "README.rst" in url assert "org/project" in url # --------------------------------------------------------------------------- -# _fetch_raw +# fetch_raw # --------------------------------------------------------------------------- @@ -85,7 +85,7 @@ def test_fetch_raw_returns_decoded_content_on_success() -> None: "dfetch_hub.catalog.sources.urlopen", return_value=_make_response(b"hello world"), ): - result = _fetch_raw("https://example.com/file") + result = fetch_raw("https://example.com/file") assert result == "hello world" @@ -93,7 +93,7 @@ def test_fetch_raw_returns_decoded_content_on_success() -> None: def test_fetch_raw_returns_none_on_url_error() -> None: """Returns None when urllib raises URLError (network failure, 404, etc.).""" with patch("dfetch_hub.catalog.sources.urlopen", side_effect=URLError("timeout")): - result = _fetch_raw("https://example.com/file") + result = fetch_raw("https://example.com/file") assert result is None @@ -103,7 +103,7 @@ def test_fetch_raw_returns_none_on_os_error() -> None: with patch( "dfetch_hub.catalog.sources.urlopen", side_effect=OSError("connection reset") ): - result = _fetch_raw("https://example.com/file") + result = fetch_raw("https://example.com/file") assert result is None @@ -119,7 +119,7 @@ def test_fetch_readme_returns_first_found_content() -> None: def _side_effect(url: str) -> str | None: return "# README" if ("main" in url and "README.md" in url) else None - with patch("dfetch_hub.catalog.sources._fetch_raw", side_effect=_side_effect): + with patch("dfetch_hub.catalog.sources.fetch_raw", side_effect=_side_effect): result = fetch_readme("owner", "repo") assert result == "# README" @@ -131,7 +131,7 @@ def test_fetch_readme_falls_back_to_master_branch() -> None: def _side_effect(url: str) -> str | None: return "# Master README" if "master" in url else None - with patch("dfetch_hub.catalog.sources._fetch_raw", side_effect=_side_effect): + with patch("dfetch_hub.catalog.sources.fetch_raw", side_effect=_side_effect): result = fetch_readme("owner", "repo") assert result == "# Master README" @@ -139,7 +139,7 @@ def _side_effect(url: str) -> str | None: def test_fetch_readme_returns_none_when_all_attempts_fail() -> None: """Returns None when every branch/filename combination returns nothing.""" - with patch("dfetch_hub.catalog.sources._fetch_raw", return_value=None): + with patch("dfetch_hub.catalog.sources.fetch_raw", return_value=None): result = fetch_readme("owner", "repo") assert result is None From a707c001d3307d1aeae766af244381659ccd0923 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Mon, 2 Mar 2026 22:41:56 +0100 Subject: [PATCH 03/17] Remove hard paths --- dfetch_hub/commands/publish.py | 7 ++++--- dfetch_hub/commands/serve.py | 4 ++-- dfetch_hub/commands/update.py | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/dfetch_hub/commands/publish.py b/dfetch_hub/commands/publish.py index 969f462..1ea2384 100644 --- a/dfetch_hub/commands/publish.py +++ b/dfetch_hub/commands/publish.py @@ -2,6 +2,7 @@ from __future__ import annotations +import importlib.resources import json import re import shutil @@ -18,9 +19,9 @@ logger = get_logger(__name__) -_PACKAGE_DIR = Path(__file__).parent.parent -_DEFAULT_DATA_DIR = _PACKAGE_DIR / "data" -_SITE_DIR = _PACKAGE_DIR / "site" +_PKG = importlib.resources.files("dfetch_hub") +_DEFAULT_DATA_DIR: Path = Path(str(_PKG / "data")) +_SITE_DIR: Path = Path(str(_PKG / "site")) _DEFAULT_OUTPUT = Path("public") diff --git a/dfetch_hub/commands/serve.py b/dfetch_hub/commands/serve.py index 69586ed..a6d8c63 100644 --- a/dfetch_hub/commands/serve.py +++ b/dfetch_hub/commands/serve.py @@ -4,13 +4,13 @@ import argparse import http.server +import importlib.resources import threading import webbrowser from pathlib import Path from dfetch.log import get_logger -_PACKAGE_DIR = Path(__file__).parent.parent logger = get_logger(__name__) @@ -25,7 +25,7 @@ def _port_type(value: str) -> int: def _cmd_serve(parsed: argparse.Namespace) -> None: """Serve the site from the package directory and open the browser.""" port: int = parsed.port - serve_dir = _PACKAGE_DIR + serve_dir = Path(str(importlib.resources.files("dfetch_hub"))) class _Handler(http.server.SimpleHTTPRequestHandler): def __init__(self, *args: object, **kwargs: object) -> None: diff --git a/dfetch_hub/commands/update.py b/dfetch_hub/commands/update.py index dcface4..54d8d7e 100644 --- a/dfetch_hub/commands/update.py +++ b/dfetch_hub/commands/update.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse +import importlib.resources import sys import tempfile from pathlib import Path @@ -26,8 +27,7 @@ logger = get_logger(__name__) -_PACKAGE_DIR = Path(__file__).parent.parent -_DEFAULT_DATA_DIR = _PACKAGE_DIR / "data" +_DEFAULT_DATA_DIR: Path = Path(str(importlib.resources.files("dfetch_hub") / "data")) _MANIFEST_PARSERS = { "vcpkg.json": parse_vcpkg_json, From 832a88dd8c3e95c8b39bd640fe75bb5e6bc40b41 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Mon, 2 Mar 2026 22:42:22 +0100 Subject: [PATCH 04/17] Update CLAUDE instructions --- CLAUDE.md | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 11c6660..34ee569 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -19,10 +19,10 @@ ## Data model - Use `@dataclass` for all structured data. -- Mutable defaults must use `field(default_factory=...)`. -- Shared structural interface: `ComponentManifest` Protocol in `updater.py` — new manifest types must satisfy it via - duck-typing (no explicit inheritance needed). Optional extra fields (`topics`, `readme_content`, …) are accessed - with `getattr(manifest, "field", default)` in the updater. +- Mutable defaults must use `field(default_factory=list)` / `field(default_factory=dict)` — **never** `lambda: []` or + `lambda: {}` (ruff PIE807). +- New manifest types inherit from `BaseManifest` (in `catalog/sources/__init__.py`). Optional extra fields (`topics`, + `readme_content`, …) are accessed with `getattr(manifest, "field", default)` in the writer. ## Module structure @@ -51,9 +51,19 @@ - Use `logger = get_logger(__name__)` from `dfetch.log`. - Raise `RuntimeError` only for infrastructure failures (e.g. missing fetch output). +## Package resources + +- **Never** use `Path(__file__)` to locate bundled files. Use `importlib.resources` instead: + ```python + import importlib.resources + _DEFAULT_DATA_DIR: Path = Path(str(importlib.resources.files("dfetch_hub") / "data")) + ``` +- The `str()` + `Path()` conversion materialises the `Traversable` into a real filesystem path. + This is safe because setuptools installs packages as directories, not zips. + ## Catalog pipeline conventions -- `fetch_source()` (fetcher.py) uses the dfetch Python API — no `subprocess`. +- `clone_source()` (catalog/cloner.py) uses the dfetch Python API — no `subprocess`. - External HTTP calls use stdlib `urllib.request` only — no `requests` dependency. - GitHub org/repo values are **always lowercased** at extraction time so catalog IDs, file paths, and JSON fields stay consistent. @@ -77,8 +87,42 @@ ## Tooling +All tools live in `.venv/`. Activate the virtual environment before running any tool: + ``` -pre-commit run --all-files # isort + black + pylint -.venv/bin/mypy dfetch_hub # strict type check -.venv/bin/pytest # full test suite +source .venv/bin/activate +pre-commit run --all-files # full hook suite (must all pass before committing) +pytest # full test suite (206 tests, all mocked — no network) ``` + +### pre-commit hooks + +| Hook | What it enforces | +|---|---| +| isort | Import order (stdlib → third-party → local, `profile = "black"`) | +| black | Code formatting, 120-char lines | +| ruff | Lint: `A B C4 E F G N PERF PIE PTH RET RSE RUF SIM T20 TCH TRY UP W` | +| pylint | Structural lint; design limits (see `[tool.pylint.design]` in `pyproject.toml`) | +| mypy | Strict type checking | +| pyright | Strict type checking (supplementary; some checks suppressed in `pyproject.toml`) | +| bandit | Security linting | +| xenon | Cyclomatic complexity: `--max-absolute B --max-modules A --max-average A` | +| pyroma | Package metadata quality | +| djlint | HTML linting | +| pydocstyle | Docstring style (Google convention) | +| doc8 | RST/Markdown style, 120-char lines | + +### Code quality gotchas + +- **pylint `too-many-locals`** counts function *parameters* as locals. Limit is 12 total + (params + body variables). Inline a temporary variable or extract a helper to reduce the count. +- **ruff TC003**: imports from `collections.abc` or `pathlib` used *only in annotations* must live + inside the `if TYPE_CHECKING:` block (safe with `from __future__ import annotations`). +- **ruff PIE807**: use `field(default_factory=list)` / `field(default_factory=dict)`, not lambdas. +- **xenon average A (≤ 5.0)**: extracting a helper lowers the module average even when the total CC + sum stays the same — it adds a new block to the denominator. +- **suppress comments must be on the same line as the violation**, not on a trailing `)`. Black may + move a trailing comment from `_fn(\n arg\n) # noqa` to the `)` line, detaching it from the + actual offending expression. Put the comment on the opening `_fn( # noqa` line instead. +- **`str(None) == "None"`** (truthy): when converting an `object` value to `str | None`, check + `value` first — `return str(value) if value else None` — not `s = str(value); return s if s`. From 6177f5083ab934bab8afce81b3f809062d2d4dc4 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Mon, 2 Mar 2026 22:52:04 +0100 Subject: [PATCH 05/17] Also check in action with stricter rules --- .github/workflows/test.yml | 26 ++--- dfetch_hub/catalog/cloner.py | 8 +- dfetch_hub/catalog/sources/clib.py | 20 ++-- dfetch_hub/catalog/sources/conan.py | 6 +- dfetch_hub/catalog/writer.py | 34 ++---- dfetch_hub/cli.py | 4 +- dfetch_hub/commands/publish.py | 18 +--- dfetch_hub/commands/update.py | 16 +-- dfetch_hub/config.py | 7 +- pyproject.toml | 3 + test/test_catalog_clib.py | 40 ++----- test/test_catalog_cloner.py | 8 +- test/test_catalog_conan.py | 25 ++--- test/test_catalog_readme.py | 4 +- test/test_catalog_sources.py | 8 +- test/test_catalog_vcpkg.py | 21 +--- test/test_catalog_writer.py | 156 +++++++--------------------- 17 files changed, 109 insertions(+), 295 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6050f6e..cb154db 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,16 +22,16 @@ jobs: run: | pip install .[development] - - run: isort --diff dfetch_hub # Checks import order - - run: black --check dfetch_hub # Checks code style - - run: ruff check dfetch_hub # Fast linter (bugbear, comprehensions, naming, …) - - run: pylint dfetch_hub # Deep static analysis - - run: mypy --strict dfetch_hub # mypy type check (strict) - - run: pyright dfetch_hub # pyright type check (standard) - - run: bandit -c pyproject.toml -r dfetch_hub # Security linting - - run: xenon --max-absolute C --max-modules B --max-average A dfetch_hub # Cyclomatic complexity - - run: pyroma --directory --min 9 . # Package metadata quality - - run: djlint --lint dfetch_hub # HTML linting - - run: pydocstyle --convention=google dfetch_hub # Docstring style (Google convention) - - run: doc8 --extension .md --max-line-length 120 README.md # Documentation prose style - - run: pytest --cov=dfetch_hub test # Run tests with coverage + - run: isort --check dfetch_hub # Import order (config: [tool.isort]) + - run: black --check dfetch_hub # Code style (config: [tool.black]) + - run: ruff check dfetch_hub # Lint rules (config: [tool.ruff]) + - run: pylint dfetch_hub # Static analysis (config: [tool.pylint]) + - run: mypy # Type check (config: [tool.mypy] files+strict) + - run: pyright # Type check (config: [tool.pyright]) + - run: bandit -c pyproject.toml -r dfetch_hub # Security (config: [tool.bandit]) + - run: xenon --max-absolute B --max-modules A --max-average A dfetch_hub # Complexity + - run: pyroma --directory --min 9 . # Package metadata quality + - run: djlint --lint dfetch_hub # HTML lint (config: [tool.djlint]) + - run: pydocstyle dfetch_hub # Docstrings (config: [tool.pydocstyle]) + - run: doc8 # Doc style (config: [tool.doc8]) + - run: pytest # Tests (config: [tool.pytest.ini_options]) diff --git a/dfetch_hub/catalog/cloner.py b/dfetch_hub/catalog/cloner.py index 5d0a057..3af0d71 100644 --- a/dfetch_hub/catalog/cloner.py +++ b/dfetch_hub/catalog/cloner.py @@ -108,12 +108,8 @@ def clone_source(source: SourceConfig, dest_dir: Path) -> Path: cloned = dest_dir / source.name if not cloned.resolve().is_relative_to(dest_dir.resolve()): - raise RuntimeError( - f"Source name {source.name!r} resolves outside dest_dir {dest_dir}" - ) + raise RuntimeError(f"Source name {source.name!r} resolves outside dest_dir {dest_dir}") if not cloned.is_dir(): - raise RuntimeError( - f"Expected dfetch output directory {cloned} not found after update" - ) + raise RuntimeError(f"Expected dfetch output directory {cloned} not found after update") logger.debug("Clone complete: %s", cloned) return cloned diff --git a/dfetch_hub/catalog/sources/clib.py b/dfetch_hub/catalog/sources/clib.py index 899780d..ce13489 100644 --- a/dfetch_hub/catalog/sources/clib.py +++ b/dfetch_hub/catalog/sources/clib.py @@ -155,8 +155,8 @@ def _build_package( # pylint: disable=too-many-locals pkg_json = _fetch_package_json(owner, repo) if is_github else None if pkg_json is not None: - package_name, description, license_val, version_val, json_kws, canonical_url = ( - _enrich_from_pkg_json(pkg_json, vcs_url, tagline, repo) + package_name, description, license_val, version_val, json_kws, canonical_url = _enrich_from_pkg_json( + pkg_json, vcs_url, tagline, repo ) else: package_name, description, license_val, version_val, json_kws = ( @@ -168,9 +168,7 @@ def _build_package( # pylint: disable=too-many-locals ) canonical_url = vcs_url - keywords: list[str] = ([category] if category else []) + [ - k for k in json_kws if k != category - ] + keywords: list[str] = ([category] if category else []) + [k for k in json_kws if k != category] return CLibPackage( entry_name=f"{host}/{owner}/{repo}", package_name=package_name, @@ -184,9 +182,7 @@ def _build_package( # pylint: disable=too-many-locals ) -def _process_wiki_line( - line: str, current_category: str -) -> tuple[str, CLibPackage | None]: +def _process_wiki_line(line: str, current_category: str) -> tuple[str, CLibPackage | None]: """Process one Packages.md line; return updated category and optional package. Args: @@ -212,14 +208,10 @@ def _process_wiki_line( return current_category, None host, owner, repo = parsed - return current_category, _build_package( - host, owner, repo, (tagline or "").strip(), current_category - ) + return current_category, _build_package(host, owner, repo, (tagline or "").strip(), current_category) -def parse_packages_md( - packages_md: "Path", limit: int | None = None -) -> list[CLibPackage]: +def parse_packages_md(packages_md: "Path", limit: int | None = None) -> list[CLibPackage]: """Parse ``Packages.md`` from the clib wiki into a list of :class:`CLibPackage`. For each bullet-point entry the function: diff --git a/dfetch_hub/catalog/sources/conan.py b/dfetch_hub/catalog/sources/conan.py index d933884..94b6a58 100644 --- a/dfetch_hub/catalog/sources/conan.py +++ b/dfetch_hub/catalog/sources/conan.py @@ -139,11 +139,7 @@ def _latest_version(config_path: Path) -> tuple[str | None, str]: if versions: latest = list(versions)[-1] latest_meta = versions.get(latest) - folder = ( - str(latest_meta.get("folder", "all")) - if isinstance(latest_meta, dict) - else "all" - ) + folder = str(latest_meta.get("folder", "all")) if isinstance(latest_meta, dict) else "all" return latest, folder except (OSError, yaml.YAMLError) as exc: logger.debug("Could not parse %s: %s", config_path, exc) diff --git a/dfetch_hub/catalog/writer.py b/dfetch_hub/catalog/writer.py index dc74f38..5f7e54a 100644 --- a/dfetch_hub/catalog/writer.py +++ b/dfetch_hub/catalog/writer.py @@ -50,9 +50,7 @@ def _catalog_id(vcs_host: str, org: str, repo: str) -> str: def _fetch_upstream_tags(url: str) -> list[dict[str, Any]]: """Return git tags from *url* using dfetch's GitRemote.""" try: - info = GitRemote._ls_remote( # pylint: disable=protected-access # pyright: ignore[reportPrivateUsage] - url - ) + info = GitRemote._ls_remote(url) # pylint: disable=protected-access # pyright: ignore[reportPrivateUsage] except Exception as exc: # pylint: disable=broad-exception-caught logger.warning("Could not list tags for %s: %s", url, exc) # pragma: no cover return [] # pragma: no cover @@ -226,16 +224,9 @@ def _merge_catalog_sources( new_source = _catalog_source_entry(manifest, source_name, label, registry_path) new_index_path = new_source["index_path"] detail["catalog_sources"] = sources = [ - s - for s in sources - if not ( - s.get("index_path") == new_index_path - and s.get("source_name") != source_name - ) + s for s in sources if not (s.get("index_path") == new_index_path and s.get("source_name") != source_name) ] - existing_source = next( - (s for s in sources if s.get("source_name") == source_name), None - ) + existing_source = next((s for s in sources if s.get("source_name") == source_name), None) if existing_source is None: sources.append(new_source) else: @@ -260,8 +251,7 @@ def _merge_detail( # pylint: disable=too-many-arguments,too-many-positional-arg "subfolder_path": None, "catalog_sources": [], "manifests": [], - "readme": fetched_readme - or _generate_readme(manifest, repo, manifest.homepage or ""), + "readme": fetched_readme or _generate_readme(manifest, repo, manifest.homepage or ""), "tags": [], "branches": [ {"name": "main", "is_tag": False, "commit_sha": None, "date": None}, @@ -385,16 +375,12 @@ def _process_manifest( # pylint: disable=too-many-arguments,too-many-positional was successfully written; both are ``False`` when the manifest is skipped. """ if not manifest.homepage: - logger.warning( - "cannot determine upstream repo without a URL of %s", manifest.entry_name - ) + logger.warning("cannot determine upstream repo without a URL of %s", manifest.entry_name) return False, False parsed = parse_vcs_slug(manifest.homepage) if not parsed: - logger.warning( - "skipping entry without recognized VCS URL: %s", manifest.homepage - ) + logger.warning("skipping entry without recognized VCS URL: %s", manifest.homepage) return False, False host, org, repo = parsed @@ -403,9 +389,7 @@ def _process_manifest( # pylint: disable=too-many-arguments,too-many-positional catalog[_catalog_id(vcs_host, org, repo)] = _merge_catalog_entry( existing_entry, manifest, vcs_host, org, repo, label ) - _write_detail_json( - data_dir, vcs_host, org, repo, manifest, source_name, label, registry_path - ) + _write_detail_json(data_dir, vcs_host, org, repo, manifest, source_name, label, registry_path) return existing_entry is None, existing_entry is not None @@ -435,9 +419,7 @@ def write_catalog( updated = 0 for manifest in manifests: - was_added, was_updated = _process_manifest( - manifest, catalog, data_dir, source_name, label, registry_path - ) + was_added, was_updated = _process_manifest(manifest, catalog, data_dir, source_name, label, registry_path) added += was_added updated += was_updated diff --git a/dfetch_hub/cli.py b/dfetch_hub/cli.py index 4266e86..b644431 100644 --- a/dfetch_hub/cli.py +++ b/dfetch_hub/cli.py @@ -32,9 +32,7 @@ def main(args: list[str] | None = None) -> None: logger.debug("Could not determine package version: %s", exc) pkg_version = "unknown" - root_logger.info( - "[bold blue]Dfetch:[white]hub[/white] (%s)[/bold blue]", pkg_version - ) + root_logger.info("[bold blue]Dfetch:[white]hub[/white] (%s)[/bold blue]", pkg_version) parser = argparse.ArgumentParser( prog="dfetch-hub", diff --git a/dfetch_hub/commands/publish.py b/dfetch_hub/commands/publish.py index 1ea2384..e56bba1 100644 --- a/dfetch_hub/commands/publish.py +++ b/dfetch_hub/commands/publish.py @@ -44,19 +44,13 @@ def _validate_output_dir(data_dir: Path, output: Path) -> None: output: Intended output directory that must not overlap *data_dir*. """ if not data_dir.is_dir(): - logger.error( - "Catalog data directory '%s' does not exist or is not a directory", data_dir - ) + logger.error("Catalog data directory '%s' does not exist or is not a directory", data_dir) sys.exit(1) if not any(data_dir.rglob("*.json")): logger.error("Catalog data directory '%s' contains no JSON files", data_dir) sys.exit(1) out_res, src_res = output.resolve(), data_dir.resolve() - if ( - out_res == src_res - or out_res.is_relative_to(src_res) - or src_res.is_relative_to(out_res) - ): + if out_res == src_res or out_res.is_relative_to(src_res) or src_res.is_relative_to(out_res): logger.error( "Output '%s' and data directory '%s' overlap — aborting to prevent data loss", output, @@ -82,9 +76,7 @@ def _copy_assets(site_dir: Path, output: Path) -> None: dst.parent.mkdir(parents=True, exist_ok=True) if src.name == "index.html": text = src.read_text(encoding="utf-8") - dst.write_text( - re.sub(r"(['\"`])\.\.\/data\/", r"\1data/", text), encoding="utf-8" - ) + dst.write_text(re.sub(r"(['\"`])\.\.\/data\/", r"\1data/", text), encoding="utf-8") else: shutil.copy2(src, dst) @@ -109,9 +101,7 @@ def _minify_catalog(data_dir: Path, output: Path) -> int: def _cmd_publish(parsed: argparse.Namespace) -> None: """Build a deployable static site for GitHub Pages / GitLab Pages.""" output = Path(parsed.output) - _config, data_dir = load_config_with_data_dir( - parsed.config, parsed.data_dir, _DEFAULT_DATA_DIR - ) + _config, data_dir = load_config_with_data_dir(parsed.config, parsed.data_dir, _DEFAULT_DATA_DIR) _validate_output_dir(data_dir, output) if output.exists(): diff --git a/dfetch_hub/commands/update.py b/dfetch_hub/commands/update.py index 54d8d7e..fab6f94 100644 --- a/dfetch_hub/commands/update.py +++ b/dfetch_hub/commands/update.py @@ -137,9 +137,7 @@ def _process_subfolders_source( ) return - logger.print_info_line( - source.name, f"Fetching {source.url} (src: {source.path}) ..." - ) + logger.print_info_line(source.name, f"Fetching {source.url} (src: {source.path}) ...") with tempfile.TemporaryDirectory(prefix="dfetch-hub-") as tmp: fetched_dir = clone_source(source, Path(tmp)) @@ -149,9 +147,7 @@ def _process_subfolders_source( entry_dirs = entry_dirs[:limit] logger.print_info_line(source.name, f"Parsing {len(entry_dirs)} package(s) ...") - manifests, skipped = _parse_entry_dirs( - entry_dirs, parse_fn, _subfolder_homepage(source) - ) + manifests, skipped = _parse_entry_dirs(entry_dirs, parse_fn, _subfolder_homepage(source)) if skipped: logger.print_warning_line( @@ -204,9 +200,7 @@ def _process_git_wiki_source( logger.print_info_line(source.name, f"Parsing {source.manifest} ...") packages: list[CLibPackage] = parse_packages_md(index_file, limit=limit) - logger.print_info_line( - source.name, f"Fetched metadata for {len(packages)} package(s)" - ) + logger.print_info_line(source.name, f"Fetched metadata for {len(packages)} package(s)") _added, _updated = write_catalog( packages, # type: ignore[arg-type] data_dir, @@ -239,9 +233,7 @@ def _process_source( def _cmd_update(parsed: argparse.Namespace) -> None: """Run the catalog update pipeline.""" - config, data_dir = load_config_with_data_dir( - parsed.config, parsed.data_dir, _DEFAULT_DATA_DIR - ) + config, data_dir = load_config_with_data_dir(parsed.config, parsed.data_dir, _DEFAULT_DATA_DIR) sources = config.sources if parsed.source: diff --git a/dfetch_hub/config.py b/dfetch_hub/config.py index f3881cc..2a442c0 100644 --- a/dfetch_hub/config.py +++ b/dfetch_hub/config.py @@ -93,17 +93,14 @@ def load_config(path: str = "dfetch-hub.toml") -> HubConfig: raise TypeError("`[settings]` must be a TOML table") raw_sources_obj = data.get("source", []) - if not isinstance(raw_sources_obj, list) or any( - not isinstance(raw, dict) for raw in raw_sources_obj - ): + if not isinstance(raw_sources_obj, list) or any(not isinstance(raw, dict) for raw in raw_sources_obj): raise TypeError("`[[source]]` must be an array of TOML tables") raw_settings = {k: v for k, v in raw_settings_obj.items() if k in _SETTINGS_FIELDS} settings = Settings(**raw_settings) sources: list[SourceConfig] = [ - SourceConfig(**{k: v for k, v in raw.items() if k in _SOURCE_FIELDS}) - for raw in raw_sources_obj + SourceConfig(**{k: v for k, v in raw.items() if k in _SOURCE_FIELDS}) for raw in raw_sources_obj ] return HubConfig(settings=settings, sources=sources) diff --git a/pyproject.toml b/pyproject.toml index c89d050..c9078c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,9 @@ dfetch_hub = [ "data/**/*.json", ] +[tool.black] +line-length = 120 + [tool.isort] profile = "black" line_length = 120 diff --git a/test/test_catalog_clib.py b/test/test_catalog_clib.py index 46b4add..dc1605c 100644 --- a/test/test_catalog_clib.py +++ b/test/test_catalog_clib.py @@ -106,9 +106,7 @@ def test_build_package_uses_json_homepage_as_canonical_url() -> None: """Prefers the explicit homepage from package.json over the VCS URL.""" pkg_json = {**_PACKAGE_JSON_BUFFER, "homepage": "https://example.com/buffer"} with ( - patch( - "dfetch_hub.catalog.sources.clib._fetch_package_json", return_value=pkg_json - ), + patch("dfetch_hub.catalog.sources.clib._fetch_package_json", return_value=pkg_json), patch("dfetch_hub.catalog.sources.clib.fetch_readme", return_value=None), ): pkg = _build_package("github.com", "clibs", "buffer", "desc", "Strings") @@ -130,12 +128,8 @@ def test_build_package_falls_back_to_vcs_url_when_no_package_json() -> None: def test_build_package_non_github_uses_vcs_url() -> None: """Non-GitHub entries use their own VCS URL as homepage.""" with ( - patch( - "dfetch_hub.catalog.sources.clib._fetch_package_json", return_value=None - ) as mock_pkg_json, - patch( - "dfetch_hub.catalog.sources.clib.fetch_readme", return_value=None - ) as mock_readme, + patch("dfetch_hub.catalog.sources.clib._fetch_package_json", return_value=None) as mock_pkg_json, + patch("dfetch_hub.catalog.sources.clib.fetch_readme", return_value=None) as mock_readme, ): pkg = _build_package("gitlab.com", "myorg", "myrepo", "a gitlab lib", "Tools") mock_pkg_json.assert_not_called() @@ -156,9 +150,7 @@ def test_build_package_stores_fetched_readme() -> None: "dfetch_hub.catalog.sources.clib._fetch_package_json", return_value=_PACKAGE_JSON_BUFFER, ), - patch( - "dfetch_hub.catalog.sources.clib.fetch_readme", return_value=_SAMPLE_README - ), + patch("dfetch_hub.catalog.sources.clib.fetch_readme", return_value=_SAMPLE_README), ): pkg = _build_package("github.com", "clibs", "buffer", "desc", "Strings") @@ -179,12 +171,8 @@ def test_build_package_readme_none_when_not_found() -> None: def test_build_package_non_github_readme_is_none() -> None: """Non-GitHub packages always have readme_content=None (raw URL not available).""" with ( - patch( - "dfetch_hub.catalog.sources.clib._fetch_package_json", return_value=None - ) as mock_pkg_json, - patch( - "dfetch_hub.catalog.sources.clib.fetch_readme", return_value=None - ) as mock_readme, + patch("dfetch_hub.catalog.sources.clib._fetch_package_json", return_value=None) as mock_pkg_json, + patch("dfetch_hub.catalog.sources.clib.fetch_readme", return_value=None) as mock_readme, ): pkg = _build_package("gitlab.com", "org", "repo", "desc", "Cat") mock_pkg_json.assert_not_called() @@ -269,14 +257,10 @@ def test_build_package_category_not_duplicated_in_keywords() -> None: """Category keyword that already appears in package.json keywords is not duplicated.""" pkg_json = {**_PACKAGE_JSON_BUFFER, "keywords": ["String manipulation", "buffer"]} with ( - patch( - "dfetch_hub.catalog.sources.clib._fetch_package_json", return_value=pkg_json - ), + patch("dfetch_hub.catalog.sources.clib._fetch_package_json", return_value=pkg_json), patch("dfetch_hub.catalog.sources.clib.fetch_readme", return_value=None), ): - pkg = _build_package( - "github.com", "clibs", "buffer", "desc", "String manipulation" - ) + pkg = _build_package("github.com", "clibs", "buffer", "desc", "String manipulation") assert pkg.keywords.count("String manipulation") == 1 @@ -298,9 +282,7 @@ def test_parse_packages_md_includes_non_github_vcs_urls(packages_md_file: Path) pkgs = parse_packages_md(packages_md_file) urls = [p.homepage for p in pkgs] - assert any( - "gitlab.com" in (u or "") for u in urls - ), "Expected GitLab entry to be included" + assert any("gitlab.com" in (u or "") for u in urls), "Expected GitLab entry to be included" def test_parse_packages_md_non_github_entry_has_no_readme( @@ -422,9 +404,7 @@ def test_parse_packages_md_limit_zero_returns_empty(packages_md_file: Path) -> N def test_fetch_package_json_returns_dict_on_valid_response() -> None: """Returns the parsed dict when the raw response is valid JSON.""" data = {"name": "buffer", "version": "0.4.0"} - with patch( - "dfetch_hub.catalog.sources.clib.fetch_raw", return_value=json.dumps(data) - ): + with patch("dfetch_hub.catalog.sources.clib.fetch_raw", return_value=json.dumps(data)): result = _fetch_package_json("clibs", "buffer") assert result == data diff --git a/test/test_catalog_cloner.py b/test/test_catalog_cloner.py index eac5060..0017766 100644 --- a/test/test_catalog_cloner.py +++ b/test/test_catalog_cloner.py @@ -86,9 +86,7 @@ def test_create_manifest_idempotent(tmp_path: Path) -> None: # --------------------------------------------------------------------------- -def _patch_dfetch( - tmp_path: Path, source: SourceConfig -) -> tuple[Path, MagicMock, MagicMock]: +def _patch_dfetch(tmp_path: Path, source: SourceConfig) -> tuple[Path, MagicMock, MagicMock]: """Create the expected output dir and return (fetched_path, mock_project, mock_sub).""" fetched = tmp_path / source.name fetched.mkdir() @@ -175,9 +173,7 @@ def test_clone_source_passes_manifest_path_to_parse(tmp_path: Path) -> None: "bad_name", ["..", ".", "a/b", "/absolute"], ) -def test_create_manifest_raises_for_unsafe_source_name( - tmp_path: Path, bad_name: str -) -> None: +def test_create_manifest_raises_for_unsafe_source_name(tmp_path: Path, bad_name: str) -> None: """create_manifest raises ValueError for names that could escape dest_dir.""" bad_source = SourceConfig( name=bad_name, diff --git a/test/test_catalog_conan.py b/test/test_catalog_conan.py index 4fdf4a5..98e5c2a 100644 --- a/test/test_catalog_conan.py +++ b/test/test_catalog_conan.py @@ -71,9 +71,7 @@ class ZlibConan(ConanFile): @pytest.fixture(autouse=True) def _mock_fetch_readme() -> object: """Prevent real network calls to fetch_readme in all conan tests.""" - with patch( - "dfetch_hub.catalog.sources.conan.fetch_readme_for_homepage", return_value=None - ): + with patch("dfetch_hub.catalog.sources.conan.fetch_readme_for_homepage", return_value=None): yield @@ -97,10 +95,7 @@ def test_extract_str_attr_simple() -> None: def test_extract_str_attr_homepage() -> None: - assert ( - _extract_str_attr(_CONANFILE_SIMPLE, "homepage") - == "https://github.com/abseil/abseil-cpp" - ) + assert _extract_str_attr(_CONANFILE_SIMPLE, "homepage") == "https://github.com/abseil/abseil-cpp" def test_extract_str_attr_license() -> None: @@ -189,18 +184,14 @@ def test_parse_conan_recipe_entry_name_is_dir_name(recipe_dir: Path) -> None: def test_parse_conan_recipe_no_conanfile_returns_none(tmp_path: Path) -> None: - (tmp_path / "config.yml").write_text( - 'versions:\n "1.0":\n folder: all\n', encoding="utf-8" - ) + (tmp_path / "config.yml").write_text('versions:\n "1.0":\n folder: all\n', encoding="utf-8") # No conanfile.py created assert parse_conan_recipe(tmp_path) is None def test_parse_conan_recipe_falls_back_to_any_subfolder(tmp_path: Path) -> None: """When config.yml points to a missing folder, fall back to scanning subdirs.""" - (tmp_path / "config.yml").write_text( - 'versions:\n "1.0":\n folder: 1.x\n', encoding="utf-8" - ) + (tmp_path / "config.yml").write_text('versions:\n "1.0":\n folder: 1.x\n', encoding="utf-8") other_dir = tmp_path / "other" other_dir.mkdir() (other_dir / "conanfile.py").write_text(_CONANFILE_SIMPLE, encoding="utf-8") @@ -214,9 +205,7 @@ def test_parse_conan_recipe_multiline_description(tmp_path: Path) -> None: all_dir = tmp_path / "all" all_dir.mkdir() (all_dir / "conanfile.py").write_text(_CONANFILE_MULTILINE_DESC, encoding="utf-8") - (tmp_path / "config.yml").write_text( - 'versions:\n "1.3.1":\n folder: all\n', encoding="utf-8" - ) + (tmp_path / "config.yml").write_text('versions:\n "1.3.1":\n folder: all\n', encoding="utf-8") m = parse_conan_recipe(tmp_path) assert m is not None @@ -363,9 +352,7 @@ def test_parse_conan_recipe_urls_empty_without_homepage(tmp_path: Path) -> None: all_dir = tmp_path / "all" all_dir.mkdir() (all_dir / "conanfile.py").write_text(minimal, encoding="utf-8") - (tmp_path / "config.yml").write_text( - 'versions:\n "1.0":\n folder: all\n', encoding="utf-8" - ) + (tmp_path / "config.yml").write_text('versions:\n "1.0":\n folder: all\n', encoding="utf-8") m = parse_conan_recipe(tmp_path) assert m is not None diff --git a/test/test_catalog_readme.py b/test/test_catalog_readme.py index 35375b3..d52bf1a 100644 --- a/test/test_catalog_readme.py +++ b/test/test_catalog_readme.py @@ -129,9 +129,7 @@ def test_parses_readme_md(self, tmp_path: Path) -> None: """Parses a README.md file and returns a BaseManifest.""" pkg = tmp_path / "my-pkg" pkg.mkdir() - (pkg / "README.md").write_text( - "# My Package\n\nA great package.", encoding="utf-8" - ) + (pkg / "README.md").write_text("# My Package\n\nA great package.", encoding="utf-8") result = parse_readme_dir(pkg) diff --git a/test/test_catalog_sources.py b/test/test_catalog_sources.py index 5a8cb24..1c7f2f8 100644 --- a/test/test_catalog_sources.py +++ b/test/test_catalog_sources.py @@ -100,9 +100,7 @@ def test_fetch_raw_returns_none_on_url_error() -> None: def test_fetch_raw_returns_none_on_os_error() -> None: """Returns None when urllib raises OSError (socket-level failure).""" - with patch( - "dfetch_hub.catalog.sources.urlopen", side_effect=OSError("connection reset") - ): + with patch("dfetch_hub.catalog.sources.urlopen", side_effect=OSError("connection reset")): result = fetch_raw("https://example.com/file") assert result is None @@ -171,9 +169,7 @@ def test_fetch_readme_for_homepage_returns_none_for_non_github_host() -> None: def test_fetch_readme_for_homepage_delegates_to_fetch_readme_for_github() -> None: """Calls fetch_readme(owner, repo) for GitHub URLs and returns its result.""" - with patch( - "dfetch_hub.catalog.sources.fetch_readme", return_value="# content" - ) as mock_fn: + with patch("dfetch_hub.catalog.sources.fetch_readme", return_value="# content") as mock_fn: result = fetch_readme_for_homepage("https://github.com/myorg/myrepo") mock_fn.assert_called_once_with("myorg", "myrepo") diff --git a/test/test_catalog_vcpkg.py b/test/test_catalog_vcpkg.py index 3e4f5ab..d7f9aa3 100644 --- a/test/test_catalog_vcpkg.py +++ b/test/test_catalog_vcpkg.py @@ -36,9 +36,7 @@ @pytest.fixture(autouse=True) def _mock_readme() -> object: """Prevent real network calls in all vcpkg tests.""" - with patch( - "dfetch_hub.catalog.sources.vcpkg.fetch_readme_for_homepage", return_value=None - ): + with patch("dfetch_hub.catalog.sources.vcpkg.fetch_readme_for_homepage", return_value=None): yield @@ -78,10 +76,7 @@ def test_extract_description_plain_string() -> None: def test_extract_description_list_joined() -> None: """List description elements are joined with a space.""" - assert ( - _extract_description({"description": ["Summary.", "Detail."]}) - == "Summary. Detail." - ) + assert _extract_description({"description": ["Summary.", "Detail."]}) == "Summary. Detail." def test_extract_description_missing_returns_empty() -> None: @@ -106,9 +101,7 @@ def test_extract_dependencies_dict_items() -> None: def test_extract_dependencies_mixed() -> None: """Mixed string/dict entries are flattened; dicts without 'name' are skipped.""" - result = _extract_dependencies( - {"dependencies": ["a", {"name": "b"}, {"other": "x"}]} - ) + result = _extract_dependencies({"dependencies": ["a", {"name": "b"}, {"other": "x"}]}) assert result == ["a", "b"] @@ -184,9 +177,7 @@ def test_parse_vcpkg_json_uses_dir_name_when_no_name_field(tmp_path: Path) -> No """Falls back to the directory name when vcpkg.json has no 'name' field.""" pkg = tmp_path / "mylib" pkg.mkdir() - (pkg / "vcpkg.json").write_text( - json.dumps({"description": "A library"}), encoding="utf-8" - ) + (pkg / "vcpkg.json").write_text(json.dumps({"description": "A library"}), encoding="utf-8") result = parse_vcpkg_json(pkg) @@ -214,9 +205,7 @@ def test_parse_vcpkg_json_version_date_key(tmp_path: Path) -> None: """version-date is used when version-semver is absent.""" pkg = tmp_path / "pkg" pkg.mkdir() - (pkg / "vcpkg.json").write_text( - json.dumps({"name": "pkg", "version-date": "2024-01-16"}), encoding="utf-8" - ) + (pkg / "vcpkg.json").write_text(json.dumps({"name": "pkg", "version-date": "2024-01-16"}), encoding="utf-8") result = parse_vcpkg_json(pkg) diff --git a/test/test_catalog_writer.py b/test/test_catalog_writer.py index 74c12f7..0fc0b5a 100644 --- a/test/test_catalog_writer.py +++ b/test/test_catalog_writer.py @@ -91,9 +91,7 @@ def _existing_detail() -> dict[str, Any]: "manifests": [], "readme": "placeholder readme", "tags": [], - "branches": [ - {"name": "main", "is_tag": False, "commit_sha": None, "date": None} - ], + "branches": [{"name": "main", "is_tag": False, "commit_sha": None, "date": None}], "license_text": None, "fetched_at": "2024-01-01T00:00:00+00:00", } @@ -190,41 +188,31 @@ def test_catalog_id_gitlab() -> None: def test_merge_catalog_entry_new_has_correct_id() -> None: """New entry has the correct vcs_host/org/repo ID.""" - entry = _merge_catalog_entry( - None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert entry["id"] == "github/abseil/abseil-cpp" def test_merge_catalog_entry_new_populates_name() -> None: """New entry takes package_name from the manifest.""" - entry = _merge_catalog_entry( - None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert entry["name"] == "abseil-cpp" def test_merge_catalog_entry_new_populates_description() -> None: """New entry takes description from the manifest.""" - entry = _merge_catalog_entry( - None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert entry["description"] == "Abseil C++ libraries from Google" def test_merge_catalog_entry_new_populates_license() -> None: """New entry takes license from the manifest.""" - entry = _merge_catalog_entry( - None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert entry["license"] == "Apache-2.0" def test_merge_catalog_entry_source_type_matches_vcs_host() -> None: """source_type equals the vcs_host passed in.""" - github_entry = _merge_catalog_entry( - None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + github_entry = _merge_catalog_entry(None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert github_entry["source_type"] == "github" gitlab_entry = _merge_catalog_entry( @@ -240,17 +228,13 @@ def test_merge_catalog_entry_source_type_matches_vcs_host() -> None: def test_merge_catalog_entry_adds_source_label() -> None: """Label is present in source_labels of the new entry.""" - entry = _merge_catalog_entry( - None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert "vcpkg" in entry["source_labels"] def test_merge_catalog_entry_adds_version_tag() -> None: """Version is recorded in the tags list.""" - entry = _merge_catalog_entry( - None, _manifest(version="20240116.2"), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(None, _manifest(version="20240116.2"), "github", "abseil", "abseil-cpp", "vcpkg") tag_names = {t["name"] for t in entry["tags"]} assert "20240116.2" in tag_names @@ -258,9 +242,7 @@ def test_merge_catalog_entry_adds_version_tag() -> None: def test_merge_catalog_entry_no_duplicate_version_tag() -> None: """The same version is not added a second time.""" existing = _existing_catalog_entry() - existing["tags"] = [ - {"name": "20240116.2", "is_tag": True, "commit_sha": None, "date": None} - ] + existing["tags"] = [{"name": "20240116.2", "is_tag": True, "commit_sha": None, "date": None}] entry = _merge_catalog_entry( existing, _manifest(version="20240116.2"), @@ -275,9 +257,7 @@ def test_merge_catalog_entry_no_duplicate_version_tag() -> None: def test_merge_catalog_entry_merges_source_labels() -> None: """Updating an existing entry preserves its other labels.""" existing = _existing_catalog_entry(label="conan") - entry = _merge_catalog_entry( - existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert "conan" in entry["source_labels"] assert "vcpkg" in entry["source_labels"] @@ -285,25 +265,19 @@ def test_merge_catalog_entry_merges_source_labels() -> None: def test_merge_catalog_entry_no_duplicate_label() -> None: """Applying the same label twice does not duplicate it.""" existing = _existing_catalog_entry(label="vcpkg") - entry = _merge_catalog_entry( - existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert entry["source_labels"].count("vcpkg") == 1 def test_merge_catalog_entry_url_uses_homepage() -> None: """New entry uses manifest.homepage as the package URL.""" - entry = _merge_catalog_entry( - None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(None, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert entry["url"] == "https://github.com/abseil/abseil-cpp" def test_merge_catalog_entry_no_version_no_tag_added() -> None: """No tag entry is created when version is None.""" - entry = _merge_catalog_entry( - None, _manifest(version=None), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(None, _manifest(version=None), "github", "abseil", "abseil-cpp", "vcpkg") assert not entry["tags"] @@ -311,27 +285,21 @@ def test_merge_catalog_entry_backfills_missing_description() -> None: """Existing entry with no description is backfilled from the manifest.""" existing = _existing_catalog_entry() existing["description"] = None - entry = _merge_catalog_entry( - existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert entry["description"] == "Abseil C++ libraries from Google" def test_merge_catalog_entry_does_not_overwrite_existing_description() -> None: """An already-populated description must not be replaced by the manifest.""" existing = _existing_catalog_entry() # description = "old description" - entry = _merge_catalog_entry( - existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert entry["description"] == "old description" def test_merge_catalog_entry_backfills_missing_license() -> None: """Existing entry with no license is backfilled from the manifest.""" existing = _existing_catalog_entry() # license = None by default - entry = _merge_catalog_entry( - existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert entry["license"] == "Apache-2.0" @@ -339,21 +307,15 @@ def test_merge_catalog_entry_does_not_overwrite_existing_license() -> None: """An already-populated license must not be replaced by the manifest.""" existing = _existing_catalog_entry() existing["license"] = "MIT" - entry = _merge_catalog_entry( - existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg" - ) + entry = _merge_catalog_entry(existing, _manifest(), "github", "abseil", "abseil-cpp", "vcpkg") assert entry["license"] == "MIT" def test_merge_catalog_entry_v_prefix_tag_not_duplicated() -> None: """Version '1.2.3' is not added if 'v1.2.3' already exists in the tag list.""" existing = _existing_catalog_entry() - existing["tags"] = [ - {"name": "v1.2.3", "is_tag": True, "commit_sha": None, "date": None} - ] - entry = _merge_catalog_entry( - existing, _manifest(version="1.2.3"), "github", "abseil", "abseil-cpp", "vcpkg" - ) + existing["tags"] = [{"name": "v1.2.3", "is_tag": True, "commit_sha": None, "date": None}] + entry = _merge_catalog_entry(existing, _manifest(version="1.2.3"), "github", "abseil", "abseil-cpp", "vcpkg") assert sum(1 for t in entry["tags"] if t["name"].lstrip("v") == "1.2.3") == 1 @@ -364,16 +326,12 @@ def test_merge_catalog_entry_v_prefix_tag_not_duplicated() -> None: def test_generate_readme_contains_package_name() -> None: """Package name appears in the generated README heading.""" - assert "abseil-cpp" in _generate_readme( - _manifest(), "abseil-cpp", "https://github.com/abseil/abseil-cpp" - ) + assert "abseil-cpp" in _generate_readme(_manifest(), "abseil-cpp", "https://github.com/abseil/abseil-cpp") def test_generate_readme_contains_description() -> None: """Package description appears in the generated README.""" - assert "Abseil C++ libraries" in _generate_readme( - _manifest(), "abseil-cpp", "https://github.com/abseil/abseil-cpp" - ) + assert "Abseil C++ libraries" in _generate_readme(_manifest(), "abseil-cpp", "https://github.com/abseil/abseil-cpp") def test_generate_readme_contains_version_tag() -> None: @@ -387,17 +345,13 @@ def test_generate_readme_contains_version_tag() -> None: def test_generate_readme_omits_tag_when_no_version() -> None: """No 'tag:' line is emitted when version is None.""" - readme = _generate_readme( - _manifest(version=None), "abseil-cpp", "https://github.com/abseil/abseil-cpp" - ) + readme = _generate_readme(_manifest(version=None), "abseil-cpp", "https://github.com/abseil/abseil-cpp") assert "tag:" not in readme def test_generate_readme_contains_dfetch_yaml_snippet() -> None: """The generated README contains a dfetch.yaml code block.""" - readme = _generate_readme( - _manifest(), "abseil-cpp", "https://github.com/abseil/abseil-cpp" - ) + readme = _generate_readme(_manifest(), "abseil-cpp", "https://github.com/abseil/abseil-cpp") assert "dfetch.yaml" in readme @@ -421,9 +375,7 @@ def test_fetch_upstream_tags_returns_tag_entries() -> None: f"refs/tags/v2.0.0": "b" * 40, "refs/heads/main": "c" * 40, # branches must be excluded } - with patch( - "dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote - ): + with patch("dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote): tags = _fetch_upstream_tags("https://github.com/owner/repo") tag_names = {t["name"] for t in tags} @@ -436,9 +388,7 @@ def test_fetch_upstream_tags_excludes_branch_refs() -> None: "refs/heads/main": _FULL_SHA, "refs/heads/dev": "b" * 40, } - with patch( - "dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote - ): + with patch("dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote): tags = _fetch_upstream_tags("https://github.com/owner/repo") assert tags == [] @@ -447,9 +397,7 @@ def test_fetch_upstream_tags_excludes_branch_refs() -> None: def test_fetch_upstream_tags_commit_sha_is_full_length() -> None: """commit_sha must be the full 40-character SHA, not a shortened form.""" ls_remote = {"refs/tags/v1.0.0": _FULL_SHA} - with patch( - "dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote - ): + with patch("dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote): tags = _fetch_upstream_tags("https://github.com/owner/repo") assert len(tags) == 1 @@ -459,9 +407,7 @@ def test_fetch_upstream_tags_commit_sha_is_full_length() -> None: def test_fetch_upstream_tags_is_tag_true() -> None: """Every entry returned has is_tag set to True.""" ls_remote = {"refs/tags/v1.0.0": _FULL_SHA} - with patch( - "dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote - ): + with patch("dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote): tags = _fetch_upstream_tags("https://github.com/owner/repo") assert tags[0]["is_tag"] is True @@ -470,9 +416,7 @@ def test_fetch_upstream_tags_is_tag_true() -> None: def test_fetch_upstream_tags_name_strips_refs_prefix() -> None: """The 'refs/tags/' prefix is stripped from the tag name.""" ls_remote = {"refs/tags/release-2024": _FULL_SHA} - with patch( - "dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote - ): + with patch("dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote): tags = _fetch_upstream_tags("https://github.com/owner/repo") assert tags[0]["name"] == "release-2024" @@ -497,9 +441,7 @@ def test_fetch_upstream_tags_returns_empty_on_error() -> None: def test_merge_detail_new_sets_org_and_repo() -> None: """New detail record stores org and repo.""" with patch("dfetch_hub.catalog.writer._fetch_upstream_tags", return_value=[]): - detail = _merge_detail( - None, _manifest(), "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports" - ) + detail = _merge_detail(None, _manifest(), "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports") assert detail["org"] == "abseil" assert detail["repo"] == "abseil-cpp" @@ -507,9 +449,7 @@ def test_merge_detail_new_sets_org_and_repo() -> None: def test_merge_detail_new_adds_catalog_source() -> None: """New detail record contains exactly one catalog source entry.""" with patch("dfetch_hub.catalog.writer._fetch_upstream_tags", return_value=[]): - detail = _merge_detail( - None, _manifest(), "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports" - ) + detail = _merge_detail(None, _manifest(), "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports") sources = detail["catalog_sources"] assert len(sources) == 1 assert sources[0]["source_name"] == "vcpkg" @@ -556,9 +496,7 @@ def test_merge_detail_updates_existing_catalog_source() -> None: existing = _existing_detail() m = _manifest(version="2.0") with patch("dfetch_hub.catalog.writer._fetch_upstream_tags", return_value=[]): - detail = _merge_detail( - existing, m, "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports" - ) + detail = _merge_detail(existing, m, "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports") assert detail["catalog_sources"][0]["registry_version"] == "2.0" assert len(detail["catalog_sources"]) == 1 @@ -568,9 +506,7 @@ def test_merge_detail_appends_new_catalog_source() -> None: existing = _existing_detail() m = _manifest() with patch("dfetch_hub.catalog.writer._fetch_upstream_tags", return_value=[]): - detail = _merge_detail( - existing, m, "abseil", "abseil-cpp", "conan", "conan", "recipes" - ) + detail = _merge_detail(existing, m, "abseil", "abseil-cpp", "conan", "conan", "recipes") source_names = [s["source_name"] for s in detail["catalog_sources"]] assert "vcpkg" in source_names assert "conan" in source_names @@ -595,14 +531,10 @@ def test_merge_detail_version_tag_added_when_absent() -> None: def test_merge_detail_version_tag_not_duplicated() -> None: """The manifest version is not added again if already present (modulo leading v).""" existing = _existing_detail() - existing["tags"] = [ - {"name": "v1.2.3", "is_tag": True, "commit_sha": None, "date": None} - ] + existing["tags"] = [{"name": "v1.2.3", "is_tag": True, "commit_sha": None, "date": None}] m = _manifest(version="1.2.3") with patch("dfetch_hub.catalog.writer._fetch_upstream_tags", return_value=[]): - detail = _merge_detail( - existing, m, "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports" - ) + detail = _merge_detail(existing, m, "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports") assert sum(1 for t in detail["tags"] if t["name"].lstrip("v") == "1.2.3") == 1 @@ -619,9 +551,7 @@ def test_merge_detail_stale_source_name_replaced_not_duplicated() -> None: m = _manifest(version="1.0") with patch("dfetch_hub.catalog.writer._fetch_upstream_tags", return_value=[]): - detail = _merge_detail( - existing, m, "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports" - ) + detail = _merge_detail(existing, m, "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports") source_names = [s["source_name"] for s in detail["catalog_sources"]] assert source_names == ["vcpkg"], f"expected only 'vcpkg', got {source_names}" @@ -642,9 +572,7 @@ def test_merge_detail_urls_propagated_from_manifest() -> None: }, ) with patch("dfetch_hub.catalog.writer._fetch_upstream_tags", return_value=[]): - detail = _merge_detail( - None, m, "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports" - ) + detail = _merge_detail(None, m, "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports") assert detail["urls"]["Homepage"] == "https://github.com/abseil/abseil-cpp" assert detail["urls"]["Source"] == "https://github.com/x/y" @@ -652,9 +580,7 @@ def test_merge_detail_urls_propagated_from_manifest() -> None: def test_merge_detail_urls_empty_when_manifest_has_no_urls() -> None: """urls field in the detail JSON is an empty dict when the manifest carries none.""" with patch("dfetch_hub.catalog.writer._fetch_upstream_tags", return_value=[]): - detail = _merge_detail( - None, _manifest(), "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports" - ) + detail = _merge_detail(None, _manifest(), "abseil", "abseil-cpp", "vcpkg", "vcpkg", "ports") assert detail["urls"] == {} @@ -672,9 +598,7 @@ def test_merge_detail_urls_merged_across_sources() -> None: urls={"Source": "https://github.com/conan-io/conan-center-index"}, ) with patch("dfetch_hub.catalog.writer._fetch_upstream_tags", return_value=[]): - detail = _merge_detail( - existing, m, "abseil", "abseil-cpp", "conan", "conan", "recipes" - ) + detail = _merge_detail(existing, m, "abseil", "abseil-cpp", "conan", "conan", "recipes") assert "Homepage" in detail["urls"] assert "Source" in detail["urls"] @@ -865,9 +789,7 @@ def test_write_catalog_detail_json_has_both_sources(tmp_path: Path) -> None: label="conan", registry_path="recipes", ) - detail = json.loads( - (tmp_path / "github" / "abseil" / "abseil-cpp.json").read_text(encoding="utf-8") - ) + detail = json.loads((tmp_path / "github" / "abseil" / "abseil-cpp.json").read_text(encoding="utf-8")) source_names = [s["source_name"] for s in detail["catalog_sources"]] assert "vcpkg" in source_names assert "conan" in source_names From 1c5e05ad6e2ff94e5a323a63ca0e70c97f7d8fd7 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Mon, 2 Mar 2026 22:56:26 +0100 Subject: [PATCH 06/17] Review comments --- CLAUDE.md | 2 +- dfetch_hub/catalog/sources/readme.py | 2 +- dfetch_hub/catalog/sources/vcpkg.py | 7 +++++-- dfetch_hub/catalog/writer.py | 10 ++++++---- dfetch_hub/config.py | 2 ++ 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 34ee569..d2f03da 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -89,7 +89,7 @@ All tools live in `.venv/`. Activate the virtual environment before running any tool: -``` +```bash source .venv/bin/activate pre-commit run --all-files # full hook suite (must all pass before committing) pytest # full test suite (206 tests, all mocked — no network) diff --git a/dfetch_hub/catalog/sources/readme.py b/dfetch_hub/catalog/sources/readme.py index 870f463..012137c 100644 --- a/dfetch_hub/catalog/sources/readme.py +++ b/dfetch_hub/catalog/sources/readme.py @@ -28,7 +28,7 @@ def _is_content_line(line: str, in_code_block: bool) -> bool: """Return ``True`` if *line* is a prose content line worth using as a description.""" - return not in_code_block and not _SKIP_RE.match(line) and bool(line.strip()) + return not in_code_block and not _SKIP_RE.match(line.lstrip()) and bool(line.strip()) def _extract_description(text: str) -> str: diff --git a/dfetch_hub/catalog/sources/vcpkg.py b/dfetch_hub/catalog/sources/vcpkg.py index b247171..c4f5cff 100644 --- a/dfetch_hub/catalog/sources/vcpkg.py +++ b/dfetch_hub/catalog/sources/vcpkg.py @@ -4,12 +4,15 @@ import json from dataclasses import dataclass, field -from pathlib import Path +from typing import TYPE_CHECKING from dfetch.log import get_logger from dfetch_hub.catalog.sources import BaseManifest, fetch_readme_for_homepage +if TYPE_CHECKING: + from pathlib import Path + logger = get_logger(__name__) @@ -96,7 +99,7 @@ def parse_vcpkg_json(entry_dir: Path) -> VcpkgManifest | None: return None try: - with Path.open(manifest_path, encoding="utf-8") as fh: + with manifest_path.open(encoding="utf-8") as fh: loaded = json.load(fh) except (json.JSONDecodeError, OSError) as exc: logger.warning("Could not parse %s: %s", manifest_path, exc) diff --git a/dfetch_hub/catalog/writer.py b/dfetch_hub/catalog/writer.py index 5f7e54a..cae196e 100644 --- a/dfetch_hub/catalog/writer.py +++ b/dfetch_hub/catalog/writer.py @@ -4,14 +4,16 @@ import json from datetime import UTC, datetime -from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any from dfetch.log import get_logger from dfetch.vcs.git import GitRemote from dfetch_hub.catalog.sources import BaseManifest, parse_vcs_slug +if TYPE_CHECKING: + from pathlib import Path + logger = get_logger(__name__) @@ -74,14 +76,14 @@ def _fetch_upstream_tags(url: str) -> list[dict[str, Any]]: def _load_json(path: Path) -> Any: if path.exists(): - with Path.open(path, encoding="utf-8") as fh: + with path.open(encoding="utf-8") as fh: return json.load(fh) return None def _save_json(path: Path, data: Any) -> None: path.parent.mkdir(parents=True, exist_ok=True) - with Path.open(path, "w", encoding="utf-8") as fh: + with path.open("w", encoding="utf-8") as fh: json.dump(data, fh, indent=2, ensure_ascii=False) fh.write("\n") diff --git a/dfetch_hub/config.py b/dfetch_hub/config.py index 2a442c0..b5b165a 100644 --- a/dfetch_hub/config.py +++ b/dfetch_hub/config.py @@ -83,6 +83,8 @@ def load_config(path: str = "dfetch-hub.toml") -> HubConfig: Raises: FileNotFoundError: If *path* does not exist. tomllib.TOMLDecodeError: If the file is not valid TOML. + TypeError: If ``[settings]`` is not a TOML table, or if ``[[source]]`` + is not an array of TOML tables. """ with Path(path).open("rb") as fh: From 3f71925d0805e0da05e221b5cb3eb9abb9bc0cfe Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Mon, 2 Mar 2026 23:36:48 +0100 Subject: [PATCH 07/17] Fix doc8 --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index c9078c0..27b3a51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,6 +137,9 @@ ignore = "H021,H031" # H021: inline styles intentional in single-file app; H031 [tool.doc8] max-line-length = 120 extension = [".md", ".rst"] +ignore-path = ["*.egg-info/*", "build/*", "dist/*", ".venv/*"] +show-source = true +strict = true [tool.pydocstyle] convention = "google" From 41b2c2a770604d6ea4396bf61f492ca938f2bc8d Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 3 Mar 2026 07:41:17 +0100 Subject: [PATCH 08/17] Update pyproject.toml Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- pyproject.toml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 27b3a51..98e15ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,10 +154,9 @@ venvPath = "." venv = ".venv" include = ["dfetch_hub"] reportMissingTypeStubs = "none" -reportUnknownVariableType = "none" -reportUnknownArgumentType = "none" -reportUnknownMemberType = "none" - +reportUnknownVariableType = "warning" +reportUnknownArgumentType = "warning" +reportUnknownMemberType = "warning" [tool.pytest.ini_options] testpaths = ["test"] addopts = "--cov=dfetch_hub --cov-report=term-missing" From 4afe6c9bf609f57c61e4addbe1c9251a59d23c5b Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Wed, 4 Mar 2026 17:31:50 +0100 Subject: [PATCH 09/17] Ignore .venv --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b95d82d..0d626e9 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ doc/_build doc/landing-page/_build example/Tests/ venv* +.venv public From e3122951f4cc167b3c5210342cfd87d9b2937e00 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Wed, 4 Mar 2026 17:38:01 +0100 Subject: [PATCH 10/17] Add codespell --- .github/workflows/test.yml | 1 + .pre-commit-config.yaml | 7 +++++++ pyproject.toml | 4 ++++ test/test_catalog_conan.py | 2 +- 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cb154db..f42fb1e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,3 +35,4 @@ jobs: - run: pydocstyle dfetch_hub # Docstrings (config: [tool.pydocstyle]) - run: doc8 # Doc style (config: [tool.doc8]) - run: pytest # Tests (config: [tool.pytest.ini_options]) + - run: codespell # spelling (config: [tool.codespell]) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 97311f3..e24040a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -73,3 +73,10 @@ repos: entry: doc8 --max-line-length 120 --extension .md language: python types_or: [rst, markdown] + - id: codespell + name: codespell + description: Checks for common misspellings in text files. + entry: codespell --toml pyproject.toml + language: python + types: [text] + exclude: ^dfetch_hub/data/ diff --git a/pyproject.toml b/pyproject.toml index 98e15ab..a42659d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ development = [ "bandit[toml]==1.9.4", "black==25.1.0", + "codespell==2.4.1", "djlint==1.36.4", "doc8==2.0.0", "isort==6.0.0", @@ -173,3 +174,6 @@ exclude_lines = [ "raise NotImplementedError", ] + +[tool.codespell] +skip = "./dfetch_hub/data/*" diff --git a/test/test_catalog_conan.py b/test/test_catalog_conan.py index 98e5c2a..004dfb6 100644 --- a/test/test_catalog_conan.py +++ b/test/test_catalog_conan.py @@ -288,7 +288,7 @@ def test_attr_literal_returns_none_for_non_string_non_paren_value() -> None: def test_attr_literal_returns_none_on_literal_eval_failure() -> None: """Returns None when ast.literal_eval raises (e.g. unclosed string).""" - # No closing quote → end_q == -1 → value_text is unparseable + # No closing quote → end_q == -1 → value_text is unparsable assert _attr_literal(' name = "unclosed', "name") is None From 2662752408297272fbff168d58f0944ae8fcdde3 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Wed, 4 Mar 2026 22:15:35 +0100 Subject: [PATCH 11/17] Check mypy stricter --- dfetch_hub/catalog/cloner.py | 1 - pyproject.toml | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dfetch_hub/catalog/cloner.py b/dfetch_hub/catalog/cloner.py index 3af0d71..61a2b8d 100644 --- a/dfetch_hub/catalog/cloner.py +++ b/dfetch_hub/catalog/cloner.py @@ -63,7 +63,6 @@ def create_manifest(source: SourceConfig, dest_dir: Path) -> Path: src=source.path, branch=source.branch or "", revision="", - repo_path="", vcs="git", ) manifest_dict = ManifestDict( diff --git a/pyproject.toml b/pyproject.toml index a42659d..816dc03 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,8 @@ max-module-lines = 500 [tool.mypy] files = "dfetch_hub" -ignore_missing_imports = true +ignore_missing_imports = false +follow_untyped_imports = true strict = true warn_unreachable = true From a088f877cf55088e47325ce5283df5c5c468b377 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Wed, 4 Mar 2026 22:16:18 +0100 Subject: [PATCH 12/17] Make bandit stricter --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 816dc03..df74a3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -148,6 +148,9 @@ convention = "google" [tool.bandit] targets = ["dfetch_hub"] +skips = [] +severity = "LOW" +confidence = "LOW" [tool.pyright] pythonVersion = "3.11" From 637cebc077e4b6913a74e3f70436914f421f5d1b Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Wed, 4 Mar 2026 22:46:15 +0100 Subject: [PATCH 13/17] Review comments --- dfetch_hub/catalog/sources/__init__.py | 12 +++++++++++- dfetch_hub/catalog/sources/clib.py | 2 +- dfetch_hub/commands/publish.py | 14 +++++++++----- dfetch_hub/commands/serve.py | 3 ++- test/test_catalog_cloner.py | 5 ++++- test/test_catalog_conan.py | 1 - test/test_catalog_writer.py | 4 ++-- 7 files changed, 29 insertions(+), 12 deletions(-) diff --git a/dfetch_hub/catalog/sources/__init__.py b/dfetch_hub/catalog/sources/__init__.py index 65a65bb..788fae0 100644 --- a/dfetch_hub/catalog/sources/__init__.py +++ b/dfetch_hub/catalog/sources/__init__.py @@ -57,7 +57,17 @@ def parse_vcs_slug(url: str) -> tuple[str, str, str] | None: def raw_url(owner: str, repo: str, branch: str, filename: str) -> str: - """Build a raw.githubusercontent.com URL for a specific file.""" + """Build a raw.githubusercontent.com URL for a specific file. + + Args: + owner: Repository owner or organization. + repo: Repository name. + branch: Branch name to fetch from. + filename: Filename within the repository root. + + Returns: + Raw GitHub content + """ return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{filename}" diff --git a/dfetch_hub/catalog/sources/clib.py b/dfetch_hub/catalog/sources/clib.py index ce13489..2e467c7 100644 --- a/dfetch_hub/catalog/sources/clib.py +++ b/dfetch_hub/catalog/sources/clib.py @@ -121,7 +121,7 @@ def _enrich_from_pkg_json( ``(package_name, description, license, version, keywords, canonical_url)`` """ package_name = str(pkg_json.get("name") or repo) - description = tagline or str(pkg_json.get("description") or "") + description = _str_or_none(pkg_json.get("description")) or tagline or "" license_val = _str_or_none(pkg_json.get("license")) version_val = _str_or_none(pkg_json.get("version")) json_kws = _pkg_json_keywords(pkg_json.get("keywords")) diff --git a/dfetch_hub/commands/publish.py b/dfetch_hub/commands/publish.py index e56bba1..a13f814 100644 --- a/dfetch_hub/commands/publish.py +++ b/dfetch_hub/commands/publish.py @@ -28,9 +28,9 @@ def _minify_json(src: Path, dst: Path) -> None: """Read *src*, minify JSON, write to *dst*.""" dst.parent.mkdir(parents=True, exist_ok=True) - with Path.open(src, encoding="utf-8") as fh: + with src.open(encoding="utf-8") as fh: data = json.load(fh) - with Path.open(dst, "w", encoding="utf-8") as fh: + with dst.open("w", encoding="utf-8") as fh: json.dump(data, fh, separators=(",", ":"), ensure_ascii=False) @@ -105,9 +105,13 @@ def _cmd_publish(parsed: argparse.Namespace) -> None: _validate_output_dir(data_dir, output) if output.exists(): - logger.print_info_line("publish", f"Removing existing '{output}' ...") - shutil.rmtree(output) - output.mkdir(parents=True) + if output.is_dir(): + logger.print_info_line("publish", f"Removing existing '{output}' ...") + shutil.rmtree(output) + else: + logger.error("Output path '%s' exists and is not a directory", output) + sys.exit(1) + output.mkdir(parents=True, exist_ok=True) logger.print_info_line("publish", "Copying site assets ...") _copy_assets(_SITE_DIR, output) diff --git a/dfetch_hub/commands/serve.py b/dfetch_hub/commands/serve.py index a6d8c63..1a2d66e 100644 --- a/dfetch_hub/commands/serve.py +++ b/dfetch_hub/commands/serve.py @@ -17,7 +17,8 @@ def _port_type(value: str) -> int: """Parse *value* as a TCP port number (1-65535).""" port = int(value) - if not 1 <= port <= 65535: + max_port = 65535 + if not 1 <= port <= max_port: raise argparse.ArgumentTypeError("--port must be between 1 and 65535") return port diff --git a/test/test_catalog_cloner.py b/test/test_catalog_cloner.py index 0017766..8b1a950 100644 --- a/test/test_catalog_cloner.py +++ b/test/test_catalog_cloner.py @@ -7,7 +7,7 @@ from __future__ import annotations -from pathlib import Path +from typing import TYPE_CHECKING from unittest.mock import MagicMock, patch import pytest @@ -15,6 +15,9 @@ from dfetch_hub.catalog.cloner import clone_source, create_manifest from dfetch_hub.config import SourceConfig +if TYPE_CHECKING: + from pathlib import Path + # --------------------------------------------------------------------------- # Test data # --------------------------------------------------------------------------- diff --git a/test/test_catalog_conan.py b/test/test_catalog_conan.py index 004dfb6..9f6c776 100644 --- a/test/test_catalog_conan.py +++ b/test/test_catalog_conan.py @@ -9,7 +9,6 @@ import pytest from dfetch_hub.catalog.sources.conan import ( - ConanManifest, _attr_literal, _extract_str_attr, _extract_tuple_attr, diff --git a/test/test_catalog_writer.py b/test/test_catalog_writer.py index 0fc0b5a..7618160 100644 --- a/test/test_catalog_writer.py +++ b/test/test_catalog_writer.py @@ -371,8 +371,8 @@ def test_generate_readme_uses_provided_url() -> None: def test_fetch_upstream_tags_returns_tag_entries() -> None: """Tags are extracted from refs/tags/* entries returned by ls-remote.""" ls_remote = { - f"refs/tags/v1.0.0": _FULL_SHA, - f"refs/tags/v2.0.0": "b" * 40, + "refs/tags/v1.0.0": _FULL_SHA, + "refs/tags/v2.0.0": "b" * 40, "refs/heads/main": "c" * 40, # branches must be excluded } with patch("dfetch_hub.catalog.writer.GitRemote._ls_remote", return_value=ls_remote): From 6443679d91c570351a9cebe471e730c23c126aa4 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Wed, 4 Mar 2026 22:48:29 +0100 Subject: [PATCH 14/17] Tighten ruff --- pyproject.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index df74a3d..572b333 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,19 +101,25 @@ target-version = "py311" [tool.ruff.lint] select = [ "A", # flake8-builtins: shadowing built-ins + # "ANN", # require type annotations everywhere "ARG", # flake8-unused-arguments "B", # flake8-bugbear: likely bugs and design issues "C4", # flake8-comprehensions: unnecessary comprehension forms + "DTZ", # forbid naive datetimes "E", # pycodestyle errors + "ERA", # remove commented-out code "F", # pyflakes: undefined names, unused imports "G", # flake8-logging-format: correct logging API usage + "ICN", # enforce import conventions "N", # pep8-naming "PERF", # perflint: performance anti-patterns "PIE", # flake8-pie: misc lint improvements + # "PL", # pylint rules via ruff "PTH", # flake8-use-pathlib: prefer pathlib over os.path / open() "RET", # flake8-return: return statement hygiene "RSE", # flake8-raise: raise statement hygiene "RUF", # ruff-specific rules + # "S", # flake8-bandit security checks (if not duplicating bandit) "SIM", # flake8-simplify "T20", # flake8-print: no print() in production code "TCH", # flake8-type-checking: move imports to TYPE_CHECKING @@ -129,6 +135,8 @@ ignore = [ "TRY300", # return-in-try: acceptable pattern "RET505", # superfluous else-after-return: readability preference ] +[tool.ruff.lint.per-file-ignores] +"test/*.py" = ["S101", "PL"] # Allow usage of assert statements in test files [tool.ruff.lint.isort] known-first-party = ["dfetch_hub"] From 4f974e6797df6a4b8678e1dcd4511b2ae325f5b8 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Fri, 6 Mar 2026 22:06:26 +0100 Subject: [PATCH 15/17] Update test after prod fix --- test/test_catalog_clib.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_catalog_clib.py b/test/test_catalog_clib.py index dc1605c..ee5379c 100644 --- a/test/test_catalog_clib.py +++ b/test/test_catalog_clib.py @@ -211,8 +211,8 @@ def test_build_package_basic_fields() -> None: assert "buffer" in pkg.keywords -def test_build_package_tagline_preferred_over_json_description() -> None: - """The wiki tagline takes priority over the package.json description.""" +def test_json_description_preferred_over_build_package_tagline() -> None: + """The package.json description takes priority over the wiki tagline.""" with ( patch( "dfetch_hub.catalog.sources.clib._fetch_package_json", @@ -222,7 +222,7 @@ def test_build_package_tagline_preferred_over_json_description() -> None: ): pkg = _build_package("github.com", "clibs", "buffer", "my custom tagline", "") - assert pkg.description == "my custom tagline" + assert pkg.description == "Higher level C-string utilities" def test_build_package_falls_back_to_json_description_when_no_tagline() -> None: From 267b10e498a242b1b9fe73fa2fc4c5679c9cd7a0 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Fri, 6 Mar 2026 22:13:38 +0100 Subject: [PATCH 16/17] Add codespell project path --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f42fb1e..027ecea 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,4 +35,4 @@ jobs: - run: pydocstyle dfetch_hub # Docstrings (config: [tool.pydocstyle]) - run: doc8 # Doc style (config: [tool.doc8]) - run: pytest # Tests (config: [tool.pytest.ini_options]) - - run: codespell # spelling (config: [tool.codespell]) + - run: codespell --toml pyproject.toml # spelling (config: [tool.codespell]) From 90876d31a77355dc602ef6e8080cb04cae33d606 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Fri, 6 Mar 2026 22:17:29 +0100 Subject: [PATCH 17/17] Use pre-commit in github action --- .github/workflows/test.yml | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 027ecea..a6c72e9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,21 +18,19 @@ jobs: with: python-version: '3.12' - - name: Install dependencies + - name: Create & Activate virtualenv and install dependencies run: | + python -m venv .venv + source .venv/bin/activate + pip install --upgrade pip pip install .[development] - - run: isort --check dfetch_hub # Import order (config: [tool.isort]) - - run: black --check dfetch_hub # Code style (config: [tool.black]) - - run: ruff check dfetch_hub # Lint rules (config: [tool.ruff]) - - run: pylint dfetch_hub # Static analysis (config: [tool.pylint]) - - run: mypy # Type check (config: [tool.mypy] files+strict) - - run: pyright # Type check (config: [tool.pyright]) - - run: bandit -c pyproject.toml -r dfetch_hub # Security (config: [tool.bandit]) - - run: xenon --max-absolute B --max-modules A --max-average A dfetch_hub # Complexity - - run: pyroma --directory --min 9 . # Package metadata quality - - run: djlint --lint dfetch_hub # HTML lint (config: [tool.djlint]) - - run: pydocstyle dfetch_hub # Docstrings (config: [tool.pydocstyle]) - - run: doc8 # Doc style (config: [tool.doc8]) - - run: pytest # Tests (config: [tool.pytest.ini_options]) - - run: codespell --toml pyproject.toml # spelling (config: [tool.codespell]) + - name: Run pre-commit + run: | + source .venv/bin/activate + pre-commit run --all-files + + - name: Run tests + run: | + source .venv/bin/activate + pytest