Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev
Submodule dev updated from 292e6a to 38ffc1
38 changes: 3 additions & 35 deletions scidk/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,7 @@
from .core.graph import InMemoryGraph
from .core.filesystem import FilesystemManager
from .core.registry import InterpreterRegistry
from .interpreters.python_code import PythonCodeInterpreter
from .interpreters.csv_interpreter import CsvInterpreter
from .interpreters.json_interpreter import JsonInterpreter
from .interpreters.yaml_interpreter import YamlInterpreter
from .interpreters.ipynb_interpreter import IpynbInterpreter
from .interpreters.txt_interpreter import TxtInterpreter
from .interpreters.xlsx_interpreter import XlsxInterpreter
from .core.pattern_matcher import Rule
from .interpreters import register_all as register_interpreters
from .core.providers import ProviderRegistry as FsProviderRegistry, LocalFSProvider, MountedFSProvider, RcloneProvider
from .web.helpers import commit_to_neo4j_batched

Expand Down Expand Up @@ -118,33 +111,8 @@ def create_app():
except Exception:
settings = None

# Register interpreters
py_interp = PythonCodeInterpreter()
csv_interp = CsvInterpreter()
json_interp = JsonInterpreter()
yaml_interp = YamlInterpreter()
ipynb_interp = IpynbInterpreter()
txt_interp = TxtInterpreter()
xlsx_interp = XlsxInterpreter()
registry.register_extension(".py", py_interp)
registry.register_extension(".csv", csv_interp)
registry.register_extension(".json", json_interp)
registry.register_extension(".yml", yaml_interp)
registry.register_extension(".yaml", yaml_interp)
registry.register_extension(".ipynb", ipynb_interp)
registry.register_extension(".txt", txt_interp)
registry.register_extension(".xlsx", xlsx_interp)
registry.register_extension(".xlsm", xlsx_interp)
# Register simple rules to prefer interpreters for extensions
registry.register_rule(Rule(id="rule.py.default", interpreter_id=py_interp.id, pattern="*.py", priority=10, conditions={"ext": ".py"}))
registry.register_rule(Rule(id="rule.csv.default", interpreter_id=csv_interp.id, pattern="*.csv", priority=10, conditions={"ext": ".csv"}))
registry.register_rule(Rule(id="rule.json.default", interpreter_id=json_interp.id, pattern="*.json", priority=10, conditions={"ext": ".json"}))
registry.register_rule(Rule(id="rule.yml.default", interpreter_id=yaml_interp.id, pattern="*.yml", priority=10, conditions={"ext": ".yml"}))
registry.register_rule(Rule(id="rule.yaml.default", interpreter_id=yaml_interp.id, pattern="*.yaml", priority=10, conditions={"ext": ".yaml"}))
registry.register_rule(Rule(id="rule.ipynb.default", interpreter_id=ipynb_interp.id, pattern="*.ipynb", priority=10, conditions={"ext": ".ipynb"}))
registry.register_rule(Rule(id="rule.txt.default", interpreter_id=txt_interp.id, pattern="*.txt", priority=10, conditions={"ext": ".txt"}))
registry.register_rule(Rule(id="rule.xlsx.default", interpreter_id=xlsx_interp.id, pattern="*.xlsx", priority=10, conditions={"ext": ".xlsx"}))
registry.register_rule(Rule(id="rule.xlsm.default", interpreter_id=xlsx_interp.id, pattern="*.xlsm", priority=10, conditions={"ext": ".xlsm"}))
# Register interpreters with extensions and rules
register_interpreters(registry)

# Compute effective interpreter enablement (CLI envs > global settings > defaults)
testing_env = bool(os.environ.get('PYTEST_CURRENT_TEST')) or bool(os.environ.get('SCIDK_DISABLE_SETTINGS'))
Expand Down
69 changes: 69 additions & 0 deletions scidk/interpreters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
Auto-discovery for SciDK interpreters.

Each interpreter declares its extensions and metadata as class attributes.
Adding a new interpreter requires only:
1. Create the interpreter class file
2. Add import and class to INTERPRETERS list
3. Define extensions = [...] on the class
"""

from .python_code import PythonCodeInterpreter
from .csv_interpreter import CsvInterpreter
from .json_interpreter import JsonInterpreter
from .yaml_interpreter import YamlInterpreter
from .ipynb_interpreter import IpynbInterpreter
from .txt_interpreter import TxtInterpreter
from .xlsx_interpreter import XlsxInterpreter

# Centralized interpreter registry
INTERPRETERS = [
PythonCodeInterpreter,
CsvInterpreter,
JsonInterpreter,
YamlInterpreter,
IpynbInterpreter,
TxtInterpreter,
XlsxInterpreter,
]


def register_all(registry):
"""
Register all interpreters with their extensions and rules.

This replaces ~60 lines of manual registration code in app.py.
Each interpreter is:
1. Instantiated
2. Registered for each of its extensions
3. Auto-assigned rules for pattern matching

Args:
registry: InterpreterRegistry instance to register with
"""
from ..core.pattern_matcher import Rule

for interp_class in INTERPRETERS:
instance = interp_class()

# Get extensions from class metadata
extensions = getattr(interp_class, 'extensions', [])

# Register by each extension
for ext in extensions:
registry.register_extension(ext, instance)

# Auto-create default rules for each extension
for ext in extensions:
pattern = f"*{ext}"
# Convert '.py' → 'py' for rule id
ext_name = ext.lstrip('.')
rule_id = f"rule.{ext_name}.default"

registry.register_rule(Rule(
id=rule_id,
interpreter_id=instance.id,
pattern=pattern,
priority=10,
conditions={"ext": ext}
))
1 change: 1 addition & 0 deletions scidk/interpreters/csv_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class CsvInterpreter:
id = "csv"
name = "CSV Interpreter"
version = "0.1.0"
extensions = [".csv"]

def __init__(self, max_bytes: int = 10 * 1024 * 1024):
self.max_bytes = max_bytes
Expand Down
1 change: 1 addition & 0 deletions scidk/interpreters/ipynb_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class IpynbInterpreter:
id = "ipynb"
name = "Jupyter Notebook Interpreter"
version = "0.3.0"
extensions = [".ipynb"]

def __init__(self, max_bytes: int = 5 * 1024 * 1024):
self.max_bytes = max_bytes
Expand Down
1 change: 1 addition & 0 deletions scidk/interpreters/json_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class JsonInterpreter:
id = "json"
name = "JSON Interpreter"
version = "0.1.0"
extensions = [".json"]

def __init__(self, max_bytes: int = 5 * 1024 * 1024):
self.max_bytes = max_bytes
Expand Down
1 change: 1 addition & 0 deletions scidk/interpreters/python_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class PythonCodeInterpreter:
id = "python_code"
name = "Python Code Analyzer"
version = "0.1.0"
extensions = [".py"]

def interpret(self, file_path: Path):
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
Expand Down
1 change: 1 addition & 0 deletions scidk/interpreters/txt_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class TxtInterpreter:
id = "txt"
name = "Text File Interpreter"
version = "0.1.0"
extensions = [".txt"]

def __init__(self, max_bytes: int = 10 * 1024 * 1024, max_preview_bytes: int = 4096, max_preview_lines: int = 100):
self.max_bytes = max_bytes
Expand Down
1 change: 1 addition & 0 deletions scidk/interpreters/xlsx_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class XlsxInterpreter:
id = "xlsx"
name = "Excel Workbook Interpreter"
version = "0.1.0"
extensions = [".xlsx", ".xlsm"]

def __init__(self, max_bytes: int = 20 * 1024 * 1024):
self.max_bytes = max_bytes
Expand Down
1 change: 1 addition & 0 deletions scidk/interpreters/yaml_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class YamlInterpreter:
id = "yaml"
name = "YAML Interpreter"
version = "0.1.0"
extensions = [".yml", ".yaml"]

def __init__(self, max_bytes: int = 5 * 1024 * 1024):
self.max_bytes = max_bytes
Expand Down