From 3e9d2d67bbc971424a3361461a78ef154b5164e7 Mon Sep 17 00:00:00 2001 From: Adam Patch Date: Mon, 2 Feb 2026 15:27:38 -0500 Subject: [PATCH 1/2] chore(dev): update submodule pointer for session archive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Session docs from blueprint refactor have been archived to: dev/sessions/2026-01-26-blueprint-refactor/ Includes: - BLUEPRINT_REFACTOR_PROGRESS.md - Session progress tracking - COMMIT_SUMMARY.md - Commit checklist - GIT_COMMIT_CHECKLIST.md - Git workflow notes - MERGE_WORKFLOW.md - PR merge instructions - app.py.before-refactor - Pre-refactor backup (268KB) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- dev | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev b/dev index 292e6a5..38ffc14 160000 --- a/dev +++ b/dev @@ -1 +1 @@ -Subproject commit 292e6a51cd20912b28b099ebcf4bb704e3e6116d +Subproject commit 38ffc1495e5736a5c77e6e8c35aced687bbe7932 From 397214ca27fda873596130e98890767f2e43c4b1 Mon Sep 17 00:00:00 2001 From: Adam Patch Date: Mon, 2 Feb 2026 16:30:07 -0500 Subject: [PATCH 2/2] refactor(interpreters): centralize registration with auto-discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace ~60 lines of manual interpreter registration with a centralized registry pattern. Each interpreter now declares its file extensions as a class attribute, and registration happens automatically via a single register_all() call. Changes: - Add scidk/interpreters/__init__.py with INTERPRETERS list and register_all() - Add extensions attribute to all interpreter classes (PythonCodeInterpreter, CsvInterpreter, JsonInterpreter, YamlInterpreter, IpynbInterpreter, TxtInterpreter, XlsxInterpreter) - Update scidk/app.py to use register_all() instead of manual registration - Auto-generate rules from interpreter extensions (pattern matching) Benefits: - Adding new interpreters now requires only: create class, add to INTERPRETERS list - No more manual registry.register_extension() and registry.register_rule() calls - Centralized source of truth for interpreter metadata - Reduces boilerplate and maintenance burden 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- scidk/app.py | 38 ++------------ scidk/interpreters/__init__.py | 69 +++++++++++++++++++++++++ scidk/interpreters/csv_interpreter.py | 1 + scidk/interpreters/ipynb_interpreter.py | 1 + scidk/interpreters/json_interpreter.py | 1 + scidk/interpreters/python_code.py | 1 + scidk/interpreters/txt_interpreter.py | 1 + scidk/interpreters/xlsx_interpreter.py | 1 + scidk/interpreters/yaml_interpreter.py | 1 + 9 files changed, 79 insertions(+), 35 deletions(-) create mode 100644 scidk/interpreters/__init__.py diff --git a/scidk/app.py b/scidk/app.py index a7bed95..c82aafa 100644 --- a/scidk/app.py +++ b/scidk/app.py @@ -8,14 +8,7 @@ from .core.graph import InMemoryGraph from .core.filesystem import FilesystemManager from .core.registry import InterpreterRegistry -from .interpreters.python_code import PythonCodeInterpreter -from .interpreters.csv_interpreter import CsvInterpreter -from .interpreters.json_interpreter import JsonInterpreter -from .interpreters.yaml_interpreter import YamlInterpreter -from .interpreters.ipynb_interpreter import IpynbInterpreter -from .interpreters.txt_interpreter import TxtInterpreter -from .interpreters.xlsx_interpreter import XlsxInterpreter -from .core.pattern_matcher import Rule +from .interpreters import register_all as register_interpreters from .core.providers import ProviderRegistry as FsProviderRegistry, LocalFSProvider, MountedFSProvider, RcloneProvider from .web.helpers import commit_to_neo4j_batched @@ -118,33 +111,8 @@ def create_app(): except Exception: settings = None - # Register interpreters - py_interp = PythonCodeInterpreter() - csv_interp = CsvInterpreter() - json_interp = JsonInterpreter() - yaml_interp = YamlInterpreter() - ipynb_interp = IpynbInterpreter() - txt_interp = TxtInterpreter() - xlsx_interp = XlsxInterpreter() - registry.register_extension(".py", py_interp) - registry.register_extension(".csv", csv_interp) - registry.register_extension(".json", json_interp) - registry.register_extension(".yml", yaml_interp) - registry.register_extension(".yaml", yaml_interp) - registry.register_extension(".ipynb", ipynb_interp) - registry.register_extension(".txt", txt_interp) - registry.register_extension(".xlsx", xlsx_interp) - registry.register_extension(".xlsm", xlsx_interp) - # Register simple rules to prefer interpreters for extensions - registry.register_rule(Rule(id="rule.py.default", interpreter_id=py_interp.id, pattern="*.py", priority=10, conditions={"ext": ".py"})) - registry.register_rule(Rule(id="rule.csv.default", interpreter_id=csv_interp.id, pattern="*.csv", priority=10, conditions={"ext": ".csv"})) - registry.register_rule(Rule(id="rule.json.default", interpreter_id=json_interp.id, pattern="*.json", priority=10, conditions={"ext": ".json"})) - registry.register_rule(Rule(id="rule.yml.default", interpreter_id=yaml_interp.id, pattern="*.yml", priority=10, conditions={"ext": ".yml"})) - registry.register_rule(Rule(id="rule.yaml.default", interpreter_id=yaml_interp.id, pattern="*.yaml", priority=10, conditions={"ext": ".yaml"})) - registry.register_rule(Rule(id="rule.ipynb.default", interpreter_id=ipynb_interp.id, pattern="*.ipynb", priority=10, conditions={"ext": ".ipynb"})) - registry.register_rule(Rule(id="rule.txt.default", interpreter_id=txt_interp.id, pattern="*.txt", priority=10, conditions={"ext": ".txt"})) - registry.register_rule(Rule(id="rule.xlsx.default", interpreter_id=xlsx_interp.id, pattern="*.xlsx", priority=10, conditions={"ext": ".xlsx"})) - registry.register_rule(Rule(id="rule.xlsm.default", interpreter_id=xlsx_interp.id, pattern="*.xlsm", priority=10, conditions={"ext": ".xlsm"})) + # Register interpreters with extensions and rules + register_interpreters(registry) # Compute effective interpreter enablement (CLI envs > global settings > defaults) testing_env = bool(os.environ.get('PYTEST_CURRENT_TEST')) or bool(os.environ.get('SCIDK_DISABLE_SETTINGS')) diff --git a/scidk/interpreters/__init__.py b/scidk/interpreters/__init__.py new file mode 100644 index 0000000..85a7545 --- /dev/null +++ b/scidk/interpreters/__init__.py @@ -0,0 +1,69 @@ +""" +Auto-discovery for SciDK interpreters. + +Each interpreter declares its extensions and metadata as class attributes. +Adding a new interpreter requires only: +1. Create the interpreter class file +2. Add import and class to INTERPRETERS list +3. Define extensions = [...] on the class +""" + +from .python_code import PythonCodeInterpreter +from .csv_interpreter import CsvInterpreter +from .json_interpreter import JsonInterpreter +from .yaml_interpreter import YamlInterpreter +from .ipynb_interpreter import IpynbInterpreter +from .txt_interpreter import TxtInterpreter +from .xlsx_interpreter import XlsxInterpreter + +# Centralized interpreter registry +INTERPRETERS = [ + PythonCodeInterpreter, + CsvInterpreter, + JsonInterpreter, + YamlInterpreter, + IpynbInterpreter, + TxtInterpreter, + XlsxInterpreter, +] + + +def register_all(registry): + """ + Register all interpreters with their extensions and rules. + + This replaces ~60 lines of manual registration code in app.py. + Each interpreter is: + 1. Instantiated + 2. Registered for each of its extensions + 3. Auto-assigned rules for pattern matching + + Args: + registry: InterpreterRegistry instance to register with + """ + from ..core.pattern_matcher import Rule + + for interp_class in INTERPRETERS: + instance = interp_class() + + # Get extensions from class metadata + extensions = getattr(interp_class, 'extensions', []) + + # Register by each extension + for ext in extensions: + registry.register_extension(ext, instance) + + # Auto-create default rules for each extension + for ext in extensions: + pattern = f"*{ext}" + # Convert '.py' → 'py' for rule id + ext_name = ext.lstrip('.') + rule_id = f"rule.{ext_name}.default" + + registry.register_rule(Rule( + id=rule_id, + interpreter_id=instance.id, + pattern=pattern, + priority=10, + conditions={"ext": ext} + )) diff --git a/scidk/interpreters/csv_interpreter.py b/scidk/interpreters/csv_interpreter.py index 6119339..bc30929 100644 --- a/scidk/interpreters/csv_interpreter.py +++ b/scidk/interpreters/csv_interpreter.py @@ -6,6 +6,7 @@ class CsvInterpreter: id = "csv" name = "CSV Interpreter" version = "0.1.0" + extensions = [".csv"] def __init__(self, max_bytes: int = 10 * 1024 * 1024): self.max_bytes = max_bytes diff --git a/scidk/interpreters/ipynb_interpreter.py b/scidk/interpreters/ipynb_interpreter.py index 111b3ee..2c9452e 100644 --- a/scidk/interpreters/ipynb_interpreter.py +++ b/scidk/interpreters/ipynb_interpreter.py @@ -13,6 +13,7 @@ class IpynbInterpreter: id = "ipynb" name = "Jupyter Notebook Interpreter" version = "0.3.0" + extensions = [".ipynb"] def __init__(self, max_bytes: int = 5 * 1024 * 1024): self.max_bytes = max_bytes diff --git a/scidk/interpreters/json_interpreter.py b/scidk/interpreters/json_interpreter.py index 419ff73..12dc06d 100644 --- a/scidk/interpreters/json_interpreter.py +++ b/scidk/interpreters/json_interpreter.py @@ -6,6 +6,7 @@ class JsonInterpreter: id = "json" name = "JSON Interpreter" version = "0.1.0" + extensions = [".json"] def __init__(self, max_bytes: int = 5 * 1024 * 1024): self.max_bytes = max_bytes diff --git a/scidk/interpreters/python_code.py b/scidk/interpreters/python_code.py index 176f06f..b4f053e 100644 --- a/scidk/interpreters/python_code.py +++ b/scidk/interpreters/python_code.py @@ -6,6 +6,7 @@ class PythonCodeInterpreter: id = "python_code" name = "Python Code Analyzer" version = "0.1.0" + extensions = [".py"] def interpret(self, file_path: Path): with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: diff --git a/scidk/interpreters/txt_interpreter.py b/scidk/interpreters/txt_interpreter.py index a99085e..48ce98d 100644 --- a/scidk/interpreters/txt_interpreter.py +++ b/scidk/interpreters/txt_interpreter.py @@ -6,6 +6,7 @@ class TxtInterpreter: id = "txt" name = "Text File Interpreter" version = "0.1.0" + extensions = [".txt"] def __init__(self, max_bytes: int = 10 * 1024 * 1024, max_preview_bytes: int = 4096, max_preview_lines: int = 100): self.max_bytes = max_bytes diff --git a/scidk/interpreters/xlsx_interpreter.py b/scidk/interpreters/xlsx_interpreter.py index 25d042d..63aff73 100644 --- a/scidk/interpreters/xlsx_interpreter.py +++ b/scidk/interpreters/xlsx_interpreter.py @@ -11,6 +11,7 @@ class XlsxInterpreter: id = "xlsx" name = "Excel Workbook Interpreter" version = "0.1.0" + extensions = [".xlsx", ".xlsm"] def __init__(self, max_bytes: int = 20 * 1024 * 1024): self.max_bytes = max_bytes diff --git a/scidk/interpreters/yaml_interpreter.py b/scidk/interpreters/yaml_interpreter.py index 91be3d5..f81dc06 100644 --- a/scidk/interpreters/yaml_interpreter.py +++ b/scidk/interpreters/yaml_interpreter.py @@ -10,6 +10,7 @@ class YamlInterpreter: id = "yaml" name = "YAML Interpreter" version = "0.1.0" + extensions = [".yml", ".yaml"] def __init__(self, max_bytes: int = 5 * 1024 * 1024): self.max_bytes = max_bytes