diff --git a/Cargo.lock b/Cargo.lock
index ab4a72a0..20460fcd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3045,6 +3045,7 @@ dependencies = [
  "regex",
  "serde",
  "serde_json",
+ "tempfile",
  "thiserror 2.0.18",
  "tracing",
 ]
@@ -3060,6 +3061,7 @@ dependencies = [
  "pythonize",
  "serde_json",
  "tokio",
+ "tracing",
 ]
 
 [[package]]
@@ -3074,6 +3076,7 @@ dependencies = [
  "serde_json",
  "tempfile",
  "tokio",
+ "tracing",
  "uuid",
 ]
 
diff --git a/crates/nvisy-cli/src/config/mod.rs b/crates/nvisy-cli/src/config/mod.rs
index 32ae3a3b..f09f45b8 100644
--- a/crates/nvisy-cli/src/config/mod.rs
+++ b/crates/nvisy-cli/src/config/mod.rs
@@ -31,11 +31,12 @@ mod server;
 use std::path::PathBuf;
 
 use clap::Parser;
-pub use file::MiddlewareSection;
 use nvisy_engine::RuntimeConfig;
-pub use server::{ResolvedServer, ServerConfig};
 use tracing_subscriber::EnvFilter;
 
+pub use self::file::MiddlewareSection;
+pub use self::server::{ResolvedServer, ServerConfig};
+
 /// Top-level CLI entry point.
 ///
 /// Parses command-line arguments and loads the TOML configuration file.
diff --git a/crates/nvisy-cli/src/server/mod.rs b/crates/nvisy-cli/src/server/mod.rs
index 8f1510f8..7ae6dbc2 100644
--- a/crates/nvisy-cli/src/server/mod.rs
+++ b/crates/nvisy-cli/src/server/mod.rs
@@ -3,4 +3,4 @@
 mod listen;
 mod shutdown;
 
-pub use listen::run;
+pub use self::listen::run;
diff --git a/crates/nvisy-codec/src/document/mod.rs b/crates/nvisy-codec/src/document/mod.rs
index c8303dea..fdfb5a74 100644
--- a/crates/nvisy-codec/src/document/mod.rs
+++ b/crates/nvisy-codec/src/document/mod.rs
@@ -9,9 +9,9 @@ use nvisy_core::content::{ContentData, ContentSource};
 use nvisy_core::media::{
     AudioFormat, DocumentType, ImageFormat, SpreadsheetFormat, TextFormat, WordFormat,
 };
-pub use span::Span;
-pub use stream::SpanStream;
 
+pub use self::span::Span;
+pub use self::stream::SpanStream;
 use crate::handler::{
     BoxedAudioHandler, BoxedImageHandler, BoxedRichHandler, BoxedTextHandler, CsvLoader, CsvParams,
     Handler, HtmlLoader, HtmlParams, JpegLoader, JpegParams, JsonLoader, JsonParams, Loader,
diff --git a/crates/nvisy-codec/src/handler/audio/mod.rs b/crates/nvisy-codec/src/handler/audio/mod.rs
index 8a3825e1..0a58386e 100644
--- a/crates/nvisy-codec/src/handler/audio/mod.rs
+++ b/crates/nvisy-codec/src/handler/audio/mod.rs
@@ -14,14 +14,14 @@ mod mp3_loader;
 mod wav_handler;
 mod wav_loader;
 
-pub use audio_data::AudioData;
-pub use audio_handler::BoxedAudioHandler;
-use audio_handler_macro::impl_audio_handler;
-pub use audio_span_id::AudioSpanId;
-pub use mp3_handler::Mp3Handler;
-pub use mp3_loader::{Mp3Loader, Mp3Params};
-pub use wav_handler::WavHandler;
-pub use wav_loader::{WavLoader, WavParams};
+pub use self::audio_data::AudioData;
+pub use self::audio_handler::BoxedAudioHandler;
+use self::audio_handler_macro::impl_audio_handler;
+pub use self::audio_span_id::AudioSpanId;
+pub use self::mp3_handler::Mp3Handler;
+pub use self::mp3_loader::{Mp3Loader, Mp3Params};
+pub use self::wav_handler::WavHandler;
+pub use self::wav_loader::{WavLoader, WavParams};
 
 /// Capability trait for handlers that expose audio content.
 ///
diff --git a/crates/nvisy-codec/src/handler/image/mod.rs b/crates/nvisy-codec/src/handler/image/mod.rs
index 8f1063f2..140ade5d 100644
--- a/crates/nvisy-codec/src/handler/image/mod.rs
+++ b/crates/nvisy-codec/src/handler/image/mod.rs
@@ -16,14 +16,14 @@ mod jpeg_loader;
 mod png_handler;
 mod png_loader;
 
-pub use image_data::ImageData;
-pub use image_handler::BoxedImageHandler;
-pub(crate) use image_handler_macro::impl_image_handler;
-pub use image_span_id::ImageSpanId;
-pub use jpeg_handler::JpegHandler;
-pub use jpeg_loader::{JpegLoader, JpegParams};
-pub use png_handler::PngHandler;
-pub use png_loader::{PngLoader, PngParams};
+pub use self::image_data::ImageData;
+pub use self::image_handler::BoxedImageHandler;
+pub(crate) use self::image_handler_macro::impl_image_handler;
+pub use self::image_span_id::ImageSpanId;
+pub use self::jpeg_handler::JpegHandler;
+pub use self::jpeg_loader::{JpegLoader, JpegParams};
+pub use self::png_handler::PngHandler;
+pub use self::png_loader::{PngLoader, PngParams};
 
 /// Capability trait for handlers that expose image content.
 ///
diff --git a/crates/nvisy-codec/src/handler/mod.rs b/crates/nvisy-codec/src/handler/mod.rs
index e9cba021..95de961c 100644
--- a/crates/nvisy-codec/src/handler/mod.rs
+++ b/crates/nvisy-codec/src/handler/mod.rs
@@ -17,11 +17,12 @@ mod image;
 mod rich;
 mod text;
 
-pub use audio::*;
-pub use image::*;
 use nvisy_core::content::ContentSource;
-pub use rich::*;
-pub use text::*;
+
+pub use self::audio::*;
+pub use self::image::*;
+pub use self::rich::*;
+pub use self::text::*;
 
 /// Base trait implemented by all format handlers.
 ///
diff --git a/crates/nvisy-codec/src/handler/rich/mod.rs b/crates/nvisy-codec/src/handler/rich/mod.rs
index aa1a7e0d..a8aaede1 100644
--- a/crates/nvisy-codec/src/handler/rich/mod.rs
+++ b/crates/nvisy-codec/src/handler/rich/mod.rs
@@ -13,9 +13,9 @@ mod docx_loader;
 mod rich_handler;
 
 #[cfg(feature = "docx")]
-pub use docx_loader::{DocxLoader, DocxParams};
+pub use self::docx_loader::{DocxLoader, DocxParams};
 #[cfg(feature = "pdf")]
-pub use pdf_handler::{RichTextHandler, RichTextSpan};
+pub use self::pdf_handler::{RichTextHandler, RichTextSpan};
 #[cfg(feature = "pdf")]
-pub use pdf_loader::{PdfLoader, PdfParams};
-pub use rich_handler::BoxedRichHandler;
+pub use self::pdf_loader::{PdfLoader, PdfParams};
+pub use self::rich_handler::BoxedRichHandler;
diff --git a/crates/nvisy-codec/src/handler/text/mod.rs b/crates/nvisy-codec/src/handler/text/mod.rs
index c9c0b3cf..80e1693b 100644
--- a/crates/nvisy-codec/src/handler/text/mod.rs
+++ b/crates/nvisy-codec/src/handler/text/mod.rs
@@ -25,22 +25,22 @@ mod xlsx_handler;
 #[cfg(feature = "xlsx")]
 mod xlsx_loader;
 
-pub use csv_handler::{CsvData, CsvHandler, CsvSpan};
-pub use csv_loader::{CsvLoader, CsvParams};
+pub use self::csv_handler::{CsvData, CsvHandler, CsvSpan};
+pub use self::csv_loader::{CsvLoader, CsvParams};
 #[cfg(feature = "html")]
-pub use html_handler::{HtmlData, HtmlHandler, HtmlSpan};
+pub use self::html_handler::{HtmlData, HtmlHandler, HtmlSpan};
 #[cfg(feature = "html")]
-pub use html_loader::{HtmlLoader, HtmlParams};
-pub use json_handler::{JsonData, JsonHandler, JsonIndent, JsonPath};
-pub use json_loader::{JsonLoader, JsonParams};
-pub use text_data::TextData;
-pub use text_handler::BoxedTextHandler;
-pub use txt_handler::{TxtHandler, TxtSpan};
-pub use txt_loader::{TxtLoader, TxtParams};
+pub use self::html_loader::{HtmlLoader, HtmlParams};
+pub use self::json_handler::{JsonData, JsonHandler, JsonIndent, JsonPath};
+pub use self::json_loader::{JsonLoader, JsonParams};
+pub use self::text_data::TextData;
+pub use self::text_handler::BoxedTextHandler;
+pub use self::txt_handler::{TxtHandler, TxtSpan};
+pub use self::txt_loader::{TxtLoader, TxtParams};
 #[cfg(feature = "xlsx")]
-pub use xlsx_handler::XlsxHandler;
+pub use self::xlsx_handler::XlsxHandler;
 #[cfg(feature = "xlsx")]
-pub use xlsx_loader::{XlsxLoader, XlsxParams};
+pub use self::xlsx_loader::{XlsxLoader, XlsxParams};
 
 /// Capability trait for handlers that expose text content.
 ///
diff --git a/crates/nvisy-codec/src/lib.rs b/crates/nvisy-codec/src/lib.rs
index 4bf99170..90f92dc8 100644
--- a/crates/nvisy-codec/src/lib.rs
+++ b/crates/nvisy-codec/src/lib.rs
@@ -6,7 +6,7 @@ mod document;
 pub mod handler;
 pub mod transform;
 
-pub use document::{Document, Span, SpanStream};
+pub use self::document::{Document, Span, SpanStream};
 
 #[doc(hidden)]
 pub mod prelude;
diff --git a/crates/nvisy-codec/src/transform/audio/mod.rs b/crates/nvisy-codec/src/transform/audio/mod.rs
index d45e2a81..3c7d3a8a 100644
--- a/crates/nvisy-codec/src/transform/audio/mod.rs
+++ b/crates/nvisy-codec/src/transform/audio/mod.rs
@@ -3,5 +3,5 @@
 mod instruction;
 mod transform;
 
-pub use instruction::{AudioOutput, AudioRedaction};
-pub use transform::AudioTransform;
+pub use self::instruction::{AudioOutput, AudioRedaction};
+pub use self::transform::AudioTransform;
diff --git a/crates/nvisy-codec/src/transform/image/mod.rs b/crates/nvisy-codec/src/transform/image/mod.rs
index e40f4129..8ff18a98 100644
--- a/crates/nvisy-codec/src/transform/image/mod.rs
+++ b/crates/nvisy-codec/src/transform/image/mod.rs
@@ -4,5 +4,5 @@ mod instruction;
 mod ops;
 mod transform;
 
-pub use instruction::{ImageOutput, ImageRedaction};
-pub use transform::ImageTransform;
+pub use self::instruction::{ImageOutput, ImageRedaction};
+pub use self::transform::ImageTransform;
diff --git a/crates/nvisy-codec/src/transform/mod.rs b/crates/nvisy-codec/src/transform/mod.rs
index 98f67a6c..823d80ae 100644
--- a/crates/nvisy-codec/src/transform/mod.rs
+++ b/crates/nvisy-codec/src/transform/mod.rs
@@ -4,6 +4,6 @@ mod audio;
 mod image;
 mod text;
 
-pub use audio::{AudioOutput, AudioRedaction, AudioTransform};
-pub use image::{ImageOutput, ImageRedaction, ImageTransform};
-pub use text::{TextOutput, TextRedaction, TextTransform};
+pub use self::audio::{AudioOutput, AudioRedaction, AudioTransform};
+pub use self::image::{ImageOutput, ImageRedaction, ImageTransform};
+pub use self::text::{TextOutput, TextRedaction, TextTransform};
diff --git a/crates/nvisy-codec/src/transform/text/mod.rs b/crates/nvisy-codec/src/transform/text/mod.rs
index 4235678f..0993a6d9 100644
--- a/crates/nvisy-codec/src/transform/text/mod.rs
+++ b/crates/nvisy-codec/src/transform/text/mod.rs
@@ -3,5 +3,5 @@
 mod instruction;
 mod transform;
 
-pub use instruction::{TextOutput, TextRedaction};
-pub use transform::TextTransform;
+pub use self::instruction::{TextOutput, TextRedaction};
+pub use self::transform::TextTransform;
diff --git a/crates/nvisy-core/src/content/mod.rs b/crates/nvisy-core/src/content/mod.rs
index 7783432e..26e4bb29 100644
--- a/crates/nvisy-core/src/content/mod.rs
+++ b/crates/nvisy-core/src/content/mod.rs
@@ -13,10 +13,10 @@ mod data_reference;
 mod encoding;
 mod source;
 
-pub use bundle::Content;
-pub use content_bytes::ContentBytes;
-pub use content_data::ContentData;
-pub use content_metadata::ContentMetadata;
-pub use data_reference::DataReference;
-pub use encoding::TextEncoding;
-pub use source::ContentSource;
+pub use self::bundle::Content;
+pub use self::content_bytes::ContentBytes;
+pub use self::content_data::ContentData;
+pub use self::content_metadata::ContentMetadata;
+pub use self::data_reference::DataReference;
+pub use self::encoding::TextEncoding;
+pub use self::source::ContentSource;
diff --git a/crates/nvisy-core/src/error.rs b/crates/nvisy-core/src/error.rs
index 063cd422..1a50448a 100644
--- a/crates/nvisy-core/src/error.rs
+++ b/crates/nvisy-core/src/error.rs
@@ -44,7 +44,7 @@ pub struct Error {
     /// Human-readable description of what went wrong.
     pub message: String,
     /// Name of the component that produced this error (e.g. `"s3-read"`, `"detect-regex"`).
-    pub source_component: Option<String>,
+    pub component: Option<String>,
     /// Whether the operation that failed can be safely retried.
     pub retryable: bool,
     /// The underlying cause, if any.
@@ -58,7 +58,7 @@ impl Error {
         Self {
             kind,
             message: message.into(),
-            source_component: None,
+            component: None,
             retryable: false,
             source: None,
         }
@@ -72,7 +72,7 @@ impl Error {
 
     /// Tag this error with the name of the component that produced it.
     pub fn with_component(mut self, component: impl Into<String>) -> Self {
-        self.source_component = Some(component.into());
+        self.component = Some(component.into());
         self
     }
 
diff --git a/crates/nvisy-core/src/lib.rs b/crates/nvisy-core/src/lib.rs
index 8e0e6344..ef2c9136 100644
--- a/crates/nvisy-core/src/lib.rs
+++ b/crates/nvisy-core/src/lib.rs
@@ -7,7 +7,7 @@ pub mod math;
 pub mod media;
 
 mod error;
-pub use error::{Error, ErrorKind, Result};
+pub use self::error::{Error, ErrorKind, Result};
 
 #[doc(hidden)]
 pub mod prelude;
diff --git a/crates/nvisy-core/src/math/mod.rs b/crates/nvisy-core/src/math/mod.rs
index 0c677122..69fa2091 100644
--- a/crates/nvisy-core/src/math/mod.rs
+++ b/crates/nvisy-core/src/math/mod.rs
@@ -8,7 +8,7 @@ mod dpi;
 mod polygon;
 mod time_span;
 
-pub use bounding_box::{BoundingBox, BoundingBoxPixel};
-pub use dpi::Dpi;
-pub use polygon::{Polygon, Vertex};
-pub use time_span::TimeSpan;
+pub use self::bounding_box::{BoundingBox, BoundingBoxPixel};
+pub use self::dpi::Dpi;
+pub use self::polygon::{Polygon, Vertex};
+pub use self::time_span::TimeSpan;
diff --git a/crates/nvisy-core/src/media/mod.rs b/crates/nvisy-core/src/media/mod.rs
index 68ba8983..e6adb9f8 100644
--- a/crates/nvisy-core/src/media/mod.rs
+++ b/crates/nvisy-core/src/media/mod.rs
@@ -6,8 +6,8 @@
 mod content_kind;
 mod document_type;
 
-pub use content_kind::ContentKind;
-pub use document_type::{
+pub use self::content_kind::ContentKind;
+pub use self::document_type::{
     AudioFormat, DocumentType, ImageFormat, PresentationFormat, SpreadsheetFormat, TextFormat,
     WordFormat,
 };
diff --git a/crates/nvisy-engine/src/compiler/graph/mod.rs b/crates/nvisy-engine/src/compiler/graph/mod.rs
index d8d5310c..221de0b1 100644
--- a/crates/nvisy-engine/src/compiler/graph/mod.rs
+++ b/crates/nvisy-engine/src/compiler/graph/mod.rs
@@ -11,15 +11,15 @@ mod target;
 
 use std::collections::HashSet;
 
-pub use action::{ActionKind, ActionNode};
 use nvisy_core::Error;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
-pub use source::SourceNode;
-pub use target::TargetNode;
 use uuid::Uuid;
 use validator::Validate;
 
+pub use self::action::{ActionKind, ActionNode};
+pub use self::source::SourceNode;
+pub use self::target::TargetNode;
 use super::policy::{RetryPolicy, TimeoutPolicy};
 
 /// A node in the pipeline graph.
diff --git a/crates/nvisy-engine/src/compiler/mod.rs b/crates/nvisy-engine/src/compiler/mod.rs
index 9ce2b6ae..6ac7baa2 100644
--- a/crates/nvisy-engine/src/compiler/mod.rs
+++ b/crates/nvisy-engine/src/compiler/mod.rs
@@ -10,16 +10,17 @@ mod policy;
 
 use std::collections::HashMap;
 
-pub use graph::{
-    ActionKind, ActionNode, Graph, GraphEdge, GraphNode, GraphNodeKind, SourceNode, TargetNode,
-};
 use nvisy_core::Error;
 use petgraph::algo::{is_cyclic_directed, toposort};
 use petgraph::graph::{DiGraph, NodeIndex};
-pub(crate) use plan::{ExecutionPlan, ResolvedNode};
-pub use policy::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy};
 use uuid::Uuid;
 
+pub use self::graph::{
+    ActionKind, ActionNode, Graph, GraphEdge, GraphNode, GraphNodeKind, SourceNode, TargetNode,
+};
+pub(crate) use self::plan::{ExecutionPlan, ResolvedNode};
+pub use self::policy::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy};
+
 /// Pipeline compiler with optional default policies.
 ///
 /// Nodes that don't carry their own retry or timeout policy will inherit
diff --git a/crates/nvisy-engine/src/compiler/policy/mod.rs b/crates/nvisy-engine/src/compiler/policy/mod.rs
index 13adfb43..769e82f5 100644
--- a/crates/nvisy-engine/src/compiler/policy/mod.rs
+++ b/crates/nvisy-engine/src/compiler/policy/mod.rs
@@ -3,5 +3,5 @@
 mod retry;
 mod timeout;
 
-pub use retry::{BackoffStrategy, RetryPolicy};
-pub use timeout::{TimeoutBehavior, TimeoutPolicy};
+pub use self::retry::{BackoffStrategy, RetryPolicy};
+pub use self::timeout::{TimeoutBehavior, TimeoutPolicy};
diff --git a/crates/nvisy-engine/src/lib.rs b/crates/nvisy-engine/src/lib.rs
index cbe92933..08727136 100644
--- a/crates/nvisy-engine/src/lib.rs
+++ b/crates/nvisy-engine/src/lib.rs
@@ -8,11 +8,11 @@ pub mod pipeline;
 pub mod provenance;
 
 // Re-export graph data model for pipeline definitions.
-pub use compiler::{
+pub use self::compiler::{
     ActionKind, ActionNode, Graph, GraphEdge, GraphNode, GraphNodeKind, SourceNode, TargetNode,
 };
 // Re-export retry and timeout policies for pipeline nodes.
-pub use compiler::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy};
-pub use pipeline::{
+pub use self::compiler::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy};
+pub use self::pipeline::{
     DefaultEngine, EngineSection, LlmSection, OcrSection, RuntimeConfig, SttSection, TtsSection,
 };
diff --git a/crates/nvisy-engine/src/operation/context/mod.rs b/crates/nvisy-engine/src/operation/context/mod.rs
index ccbfc2fc..01cffa9f 100644
--- a/crates/nvisy-engine/src/operation/context/mod.rs
+++ b/crates/nvisy-engine/src/operation/context/mod.rs
@@ -19,15 +19,13 @@
 //! [`Operation::Input`]: crate::operation::Operation::Input
 //! [`Operation::Output`]: crate::operation::Operation::Output
 
-mod envelope;
 mod parallel;
 mod sequential;
 mod shared;
 
-pub use envelope::DocumentEnvelope;
-pub use parallel::ParallelContext;
-pub use sequential::SequentialContext;
-pub use shared::SharedContext;
+pub use self::parallel::ParallelContext;
+pub use self::sequential::SequentialContext;
+pub use self::shared::SharedContext;
 
 pub(crate) mod private {
     pub trait Sealed {}
diff --git a/crates/nvisy-engine/src/operation/envelope/apply.rs b/crates/nvisy-engine/src/operation/envelope/apply.rs
new file mode 100644
index 00000000..3e05a6c5
--- /dev/null
+++ b/crates/nvisy-engine/src/operation/envelope/apply.rs
@@ -0,0 +1,43 @@
+//! The [`ApplyPatch`] trait and blanket implementations.
+
+use super::DocumentEnvelope;
+
+/// A value that can be applied to a [`DocumentEnvelope`], merging
+/// operation results into the shared pipeline state.
+///
+/// Each operation returns a concrete patch type; the orchestrator
+/// calls [`apply`](ApplyPatch::apply) to fold it into the envelope
+/// without needing to know the operation's internals.
+pub trait ApplyPatch {
+    /// Merge this patch into the envelope.
+    fn apply(self, envelope: &mut DocumentEnvelope);
+}
+
+/// A no-op patch for operations that don't modify the envelope.
+impl ApplyPatch for () {
+    fn apply(self, _envelope: &mut DocumentEnvelope) {}
+}
+
+/// Apply multiple patches of the same type in sequence.
+impl<P: ApplyPatch> ApplyPatch for Vec<P> {
+    fn apply(self, envelope: &mut DocumentEnvelope) {
+        for patch in self {
+            patch.apply(envelope);
+        }
+    }
+}
+
+impl<A: ApplyPatch, B: ApplyPatch> ApplyPatch for (A, B) {
+    fn apply(self, envelope: &mut DocumentEnvelope) {
+        self.0.apply(envelope);
+        self.1.apply(envelope);
+    }
+}
+
+impl<A: ApplyPatch, B: ApplyPatch, C: ApplyPatch> ApplyPatch for (A, B, C) {
+    fn apply(self, envelope: &mut DocumentEnvelope) {
+        self.0.apply(envelope);
+        self.1.apply(envelope);
+        self.2.apply(envelope);
+    }
+}
diff --git a/crates/nvisy-engine/src/operation/envelope/audit.rs b/crates/nvisy-engine/src/operation/envelope/audit.rs
new file mode 100644
index 00000000..2b5110ba
--- /dev/null
+++ b/crates/nvisy-engine/src/operation/envelope/audit.rs
@@ -0,0 +1,20 @@
+//! Audit entry patches.
+
+use super::DocumentEnvelope;
+use super::apply::ApplyPatch;
+use crate::provenance::AuditEntry;
+
+/// A single audit log entry recording what an operation did.
+pub struct OperationEntry(pub AuditEntry);
+
+impl ApplyPatch for OperationEntry {
+    fn apply(self, envelope: &mut DocumentEnvelope) {
+        envelope.audit.push_entry(self.0);
+    }
+}
+
+impl ApplyPatch for AuditEntry {
+    fn apply(self, envelope: &mut DocumentEnvelope) {
+        envelope.audit.push_entry(self);
+    }
+}
diff --git a/crates/nvisy-engine/src/operation/envelope/detection.rs b/crates/nvisy-engine/src/operation/envelope/detection.rs
new file mode 100644
index 00000000..144969f5
--- /dev/null
+++ b/crates/nvisy-engine/src/operation/envelope/detection.rs
@@ -0,0 +1,30 @@
+//! Entity detection patches.
+
+use nvisy_ontology::entity::Entities;
+
+use super::DocumentEnvelope;
+use super::apply::ApplyPatch;
+
+/// New entities discovered by a detection operation (NER, OCR, CV,
+/// pattern match, manual annotation).
+///
+/// Appended to the envelope's existing entity set.
+pub struct DetectedEntities(pub Entities);
+
+impl ApplyPatch for DetectedEntities {
+    fn apply(self, envelope: &mut DocumentEnvelope) {
+        envelope.entities.extend(self.0);
+    }
+}
+
+/// A fully recomputed entity set produced by refinement operations
+/// (deduplication, ensemble fusion).
+///
+/// Replaces the envelope's entity set entirely.
+pub struct RefinedEntities(pub Entities);
+
+impl ApplyPatch for RefinedEntities {
+    fn apply(self, envelope: &mut DocumentEnvelope) {
+        envelope.entities = self.0;
+    }
+}
diff --git a/crates/nvisy-engine/src/operation/context/envelope.rs b/crates/nvisy-engine/src/operation/envelope/mod.rs
similarity index 80%
rename from crates/nvisy-engine/src/operation/context/envelope.rs
rename to crates/nvisy-engine/src/operation/envelope/mod.rs
index 0624a163..ff211db9 100644
--- a/crates/nvisy-engine/src/operation/context/envelope.rs
+++ b/crates/nvisy-engine/src/operation/envelope/mod.rs
@@ -9,19 +9,31 @@
 //! ContentData
 //!   ↓ Import
 //! DocumentEnvelope { document, … }
-//!   ↓ OCR / NER / CV / PatternMatch
+//!   ↓ OCR / NER / CV / PatternMatch  →  DetectedEntities
 //! DocumentEnvelope { document, entities, … }
-//!   ↓ Deduplication / Ensemble
+//!   ↓ Deduplication / Ensemble        →  RefinedEntities
 //! DocumentEnvelope { document, entities (merged), … }
-//!   ↓ PolicyEvaluation
+//!   ↓ PolicyEvaluation                →  PolicyOutcome
 //! DocumentEnvelope { document, entities, audit { decisions, records }, … }
 //!   ↓ Redaction
 //! DocumentEnvelope { document (redacted), entities, audit { … } }
 //! ```
+//!
+//! Operations produce typed patch values that implement [`ApplyPatch`].
+//! The orchestrator merges each patch via [`DocumentEnvelope::apply`].
+
+mod apply;
+mod audit;
+mod detection;
+mod policy;
 
 use nvisy_codec::Document;
 use nvisy_ontology::entity::Entities;
 
+pub use self::apply::ApplyPatch;
+pub use self::audit::OperationEntry;
+pub use self::detection::{DetectedEntities, RefinedEntities};
+pub use self::policy::PolicyOutcome;
 use crate::provenance::Audit;
 
 /// Per-document state that flows through the entire pipeline.
@@ -69,6 +81,11 @@ impl DocumentEnvelope {
     pub fn entity_count(&self) -> usize {
         self.entities.len()
     }
+
+    /// Merge an operation's output into this envelope.
+    pub fn apply(&mut self, patch: impl ApplyPatch) {
+        patch.apply(self);
+    }
 }
 
 impl std::fmt::Debug for DocumentEnvelope {
diff --git a/crates/nvisy-engine/src/operation/envelope/policy.rs b/crates/nvisy-engine/src/operation/envelope/policy.rs
new file mode 100644
index 00000000..078dac4e
--- /dev/null
+++ b/crates/nvisy-engine/src/operation/envelope/policy.rs
@@ -0,0 +1,20 @@
+//! Policy evaluation patches.
+
+use super::DocumentEnvelope;
+use super::apply::ApplyPatch;
+use crate::provenance::{RedactionDecision, RedactionRecord};
+
+/// Redaction decisions and audit records produced by policy evaluation.
+pub struct PolicyOutcome {
+    /// How each entity should be redacted.
+    pub decisions: Vec<RedactionDecision>,
+    /// Audit-facing records of what was decided.
+    pub records: Vec<RedactionRecord>,
+}
+
+impl ApplyPatch for PolicyOutcome {
+    fn apply(self, envelope: &mut DocumentEnvelope) {
+        envelope.audit.decisions.extend(self.decisions);
+        envelope.audit.records.extend(self.records);
+    }
+}
diff --git a/crates/nvisy-engine/src/operation/inference/computer_vision.rs b/crates/nvisy-engine/src/operation/inference/computer_vision.rs
index 1650fb9c..5ad449ba 100644
--- a/crates/nvisy-engine/src/operation/inference/computer_vision.rs
+++ b/crates/nvisy-engine/src/operation/inference/computer_vision.rs
@@ -7,9 +7,10 @@ use nvisy_codec::Span;
 use nvisy_codec::handler::ImageData;
 use nvisy_core::math::BoundingBox;
 use nvisy_core::{Error, Result};
-use nvisy_ontology::entity::{DetectionMethod, Entities, Entity, ImageLocation};
+use nvisy_ontology::entity::{Entity, ExtractionMethod, ImageLocation, RecognitionMethod};
 use nvisy_rig::agent::{CvAgent, CvEntity, DetectionConfig};
 
+use crate::operation::envelope::DetectedEntities;
 use crate::operation::{Operation, ParallelContext};
 
 const TARGET: &str = "nvisy_engine::op::computer_vision";
@@ -26,7 +27,7 @@ impl ComputerVision {
         Self { agent, config }
     }
 
-    async fn detect(&self, spans: Vec<Span<(), ImageData>>) -> Result<Entities> {
+    async fn detect(&self, spans: Vec<Span<(), ImageData>>) -> Result<DetectedEntities> {
         tracing::debug!(target: TARGET, span_count = spans.len(), "detecting entities");
         let mut entities = Vec::new();
 
@@ -45,13 +46,13 @@ impl ComputerVision {
             }
         }
 
-        Ok(entities.into())
+        Ok(DetectedEntities(entities.into()))
     }
 }
 
 impl Operation for ComputerVision {
     type Input = ParallelContext<Vec<Span<(), ImageData>>>;
-    type Output = ParallelContext<Entities>;
+    type Output = ParallelContext<DetectedEntities>;
 
     async fn call(&self, input: Self::Input) -> Result<Self::Output> {
         input.parallel_map(|spans| self.detect(spans)).await
@@ -60,14 +61,15 @@ impl Operation for ComputerVision {
 
 /// Convert a [`CvEntity`] to an [`Entity`] with [`ImageLocation`].
 fn map_cv_entity(cv: &CvEntity) -> Entity {
-    Entity::new(
-        cv.category.clone(),
+    let mut entity = Entity::new(
+        cv.category,
         cv.entity_type,
         &cv.label,
-        DetectionMethod::ObjectDetection,
+        RecognitionMethod::Classification,
         cv.confidence,
-    )
-    .with_location(
+    );
+    entity.extraction_methods = vec![ExtractionMethod::ObjectDetection];
+    entity.with_location(
         ImageLocation {
             bounding_box: BoundingBox {
                 x: cv.bbox[0],
diff --git a/crates/nvisy-engine/src/operation/inference/mod.rs b/crates/nvisy-engine/src/operation/inference/mod.rs
index 6891511e..5a99c2cb 100644
--- a/crates/nvisy-engine/src/operation/inference/mod.rs
+++ b/crates/nvisy-engine/src/operation/inference/mod.rs
@@ -27,11 +27,11 @@ mod summarization;
 mod transcription;
 mod translation;
 
-pub use classification::Classification;
-pub use computer_vision::ComputerVision;
-pub use ner::{Ner, NerMethodParams};
-pub use ocr::Ocr;
-pub use ocr_verification::{OcrVerification, OcrVerificationInput};
-pub use summarization::Summarization;
-pub use transcription::Transcription;
-pub use translation::Translation;
+pub use self::classification::Classification;
+pub use self::computer_vision::ComputerVision;
+pub use self::ner::{Ner, NerMethodParams};
+pub use self::ocr::Ocr;
+pub use self::ocr_verification::{OcrVerification, OcrVerificationInput};
+pub use self::summarization::Summarization;
+pub use self::transcription::Transcription;
+pub use self::translation::Translation;
diff --git a/crates/nvisy-engine/src/operation/inference/ner.rs b/crates/nvisy-engine/src/operation/inference/ner.rs
index 5db48803..0e350271 100644
--- a/crates/nvisy-engine/src/operation/inference/ner.rs
+++ b/crates/nvisy-engine/src/operation/inference/ner.rs
@@ -8,15 +8,14 @@ use nvisy_codec::Span;
 use nvisy_codec::handler::TxtSpan;
 use nvisy_core::{Error, Result};
 use nvisy_http::HttpClient;
-use nvisy_ontology::entity::{
-    DetectionMethod, Entities, Entity, EntityCategory, EntityKind, TextLocation,
-};
+use nvisy_ontology::entity::{Entity, EntityCategory, EntityKind, RecognitionMethod, TextLocation};
 use nvisy_rig::agent::{
     AgentConfig, AgentProvider, DetectionConfig, KnownNerEntity, NerAgent, NerContext,
 };
 use serde::Deserialize;
 use tokio::sync::Mutex;
 
+use crate::operation::envelope::DetectedEntities;
 use crate::operation::{Operation, SequentialContext};
 
 const TARGET: &str = "nvisy_engine::op::ner";
@@ -100,7 +99,7 @@ impl Ner {
         state.known_entities.clear();
     }
 
-    async fn detect(&self, spans: Vec<Span<TxtSpan, String>>) -> Result<Entities> {
+    async fn detect(&self, spans: Vec<Span<TxtSpan, String>>) -> Result<DetectedEntities> {
         tracing::debug!(target: TARGET, span_count = spans.len(), "running NER");
         let mut entities = Vec::new();
 
@@ -119,7 +118,7 @@ impl Ner {
 
             for ner_entity in &ner_entities {
                 let category: EntityCategory = match ner_entity.category {
-                    Some(ref c) => c.clone(),
+                    Some(c) => c,
                     None => continue,
                 };
                 let entity_kind = match ner_entity.entity_type {
@@ -135,7 +134,7 @@ impl Ner {
                     category,
                     entity_kind,
                     &ner_entity.value,
-                    DetectionMethod::Ner,
+                    RecognitionMethod::Ner,
                     confidence,
                 );
 
@@ -169,13 +168,13 @@ impl Ner {
             state.known_entities = merge_ctx.known_entities;
         }
 
-        Ok(entities.into())
+        Ok(DetectedEntities(entities.into()))
     }
 }
 
 impl Operation for Ner {
     type Input = SequentialContext<Vec<Span<TxtSpan, String>>>;
-    type Output = SequentialContext<Entities>;
+    type Output = SequentialContext<DetectedEntities>;
 
     async fn call(&self, input: Self::Input) -> Result<Self::Output> {
         input.sequential_map(|spans| self.detect(spans)).await
diff --git a/crates/nvisy-engine/src/operation/inference/ocr_verification.rs b/crates/nvisy-engine/src/operation/inference/ocr_verification.rs
index d71df01a..d5029ccf 100644
--- a/crates/nvisy-engine/src/operation/inference/ocr_verification.rs
+++ b/crates/nvisy-engine/src/operation/inference/ocr_verification.rs
@@ -12,6 +12,7 @@ use nvisy_core::{Error, Result};
 use nvisy_ontology::entity::Entities;
 use nvisy_rig::agent::OcrAgent;
 
+use crate::operation::envelope::DetectedEntities;
 use crate::operation::{Operation, ParallelContext};
 
 const TARGET: &str = "nvisy_engine::op::ocr_verification";
@@ -38,16 +39,16 @@ impl OcrVerification {
         Self { agent }
     }
 
-    async fn verify(&self, data: OcrVerificationInput) -> Result<Entities> {
+    async fn verify(&self, data: OcrVerificationInput) -> Result<DetectedEntities> {
         if data.entities.is_empty() {
             tracing::debug!(target: TARGET, "no entities to verify");
-            return Ok(Entities::new());
+            return Ok(DetectedEntities(Entities::new()));
         }
         tracing::debug!(target: TARGET, entity_count = data.entities.len(), "verifying entities");
 
         let image_bytes = match data.image_spans.first() {
             Some(span) => span.data.encode_png()?,
-            None => return Ok(data.entities),
+            None => return Ok(DetectedEntities(data.entities)),
         };
 
         let entities = self
@@ -56,13 +57,13 @@ impl OcrVerification {
             .await
             .map_err(|e| Error::runtime(e.to_string(), "ocr-verification", e.is_retryable()))?;
 
-        Ok(entities.into())
+        Ok(DetectedEntities(entities.into()))
     }
 }
 
 impl Operation for OcrVerification {
     type Input = ParallelContext<OcrVerificationInput>;
-    type Output = ParallelContext<Entities>;
+    type Output = ParallelContext<DetectedEntities>;
 
     async fn call(&self, input: Self::Input) -> Result<Self::Output> {
         input.parallel_map(|data| self.verify(data)).await
diff --git a/crates/nvisy-engine/src/operation/lifecycle/encryption.rs b/crates/nvisy-engine/src/operation/lifecycle/encryption.rs
index 4fcb843b..35cd0102 100644
--- a/crates/nvisy-engine/src/operation/lifecycle/encryption.rs
+++ b/crates/nvisy-engine/src/operation/lifecycle/encryption.rs
@@ -7,7 +7,7 @@ use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
 use nvisy_core::{Error, ErrorKind, Result};
 use rand::RngExt;
 
-use crate::operation::context::DocumentEnvelope;
+use crate::operation::envelope::DocumentEnvelope;
 use crate::operation::utility::crypto::{
     EncryptedContent, EncryptionAlgorithm, KeyProvider, NONCE_SIZE, WireEnvelope,
 };
diff --git a/crates/nvisy-engine/src/operation/lifecycle/mod.rs b/crates/nvisy-engine/src/operation/lifecycle/mod.rs
index d6b0bb4d..65d17a00 100644
--- a/crates/nvisy-engine/src/operation/lifecycle/mod.rs
+++ b/crates/nvisy-engine/src/operation/lifecycle/mod.rs
@@ -24,10 +24,10 @@ mod encryption;
 mod export;
 mod import;
 
-pub use compression::Compression;
-pub use conversion::Conversion;
-pub use decompression::Decompression;
-pub use decryption::Decryption;
-pub use encryption::Encryption;
-pub use export::Export;
-pub use import::Import;
+pub use self::compression::Compression;
+pub use self::conversion::Conversion;
+pub use self::decompression::Decompression;
+pub use self::decryption::Decryption;
+pub use self::encryption::Encryption;
+pub use self::export::Export;
+pub use self::import::Import;
diff --git a/crates/nvisy-engine/src/operation/mod.rs b/crates/nvisy-engine/src/operation/mod.rs
index 1e8e20a2..14473fd5 100644
--- a/crates/nvisy-engine/src/operation/mod.rs
+++ b/crates/nvisy-engine/src/operation/mod.rs
@@ -15,6 +15,7 @@
 //! | Lifecycle      | [`lifecycle`] | Content I/O (import, export, encrypt)    |
 
 mod context;
+pub mod envelope;
 pub mod inference;
 pub mod lifecycle;
 pub mod processing;
@@ -22,11 +23,11 @@ pub mod utility;
 
 use std::future::Future;
 
-pub use context::{
-    DocumentEnvelope, OperationContext, ParallelContext, SequentialContext, SharedContext,
-};
 use nvisy_core::Result;
 
+pub use self::context::{OperationContext, ParallelContext, SequentialContext, SharedContext};
+pub use self::envelope::DocumentEnvelope;
+
 /// A single unit of work in the redaction pipeline.
 ///
 /// Operations are stateless and composable. The engine calls [`Operation::call`]
diff --git a/crates/nvisy-engine/src/operation/processing/deduplication.rs b/crates/nvisy-engine/src/operation/processing/deduplication.rs
index 6a8a37d1..23c5b18b 100644
--- a/crates/nvisy-engine/src/operation/processing/deduplication.rs
+++ b/crates/nvisy-engine/src/operation/processing/deduplication.rs
@@ -2,11 +2,12 @@
 //!
 //! Merges entities that share the same `entity_kind`, `value`, and
 //! overlapping location into a single entity with the highest
-//! confidence and `DetectionMethod::Composite` when methods differ.
+//! confidence and combined recognition methods.
 
 use nvisy_core::Result;
-use nvisy_ontology::entity::{DetectionMethod, Entities, Entity, Location};
+use nvisy_ontology::entity::{Entities, Entity, Location, RefinementMethod};
 
+use crate::operation::envelope::RefinedEntities;
 use crate::operation::{Operation, ParallelContext};
 
 const TARGET: &str = "nvisy_engine::op::deduplication";
@@ -18,16 +19,16 @@ const TARGET: &str = "nvisy_engine::op::deduplication";
 ///
 /// When merging:
 /// - The highest confidence score is kept.
-/// - If the detection methods differ, the merged entity uses
-///   `DetectionMethod::Composite`.
+/// - Recognition methods are combined into an ordered vector.
+/// - [`RefinementMethod::Deduplication`] is recorded on the merged entity.
 pub struct Deduplication;
 
 impl Deduplication {
-    async fn deduplicate(&self, entities: Entities) -> Result<Entities> {
+    async fn deduplicate(&self, entities: Entities) -> Result<RefinedEntities> {
         let before = entities.len();
         let result = Self::execute(entities);
         tracing::debug!(target: TARGET, before, after = result.len(), "deduplicated entities");
-        Ok(result)
+        Ok(RefinedEntities(result))
     }
 
     /// Deduplicate and merge overlapping entities.
@@ -50,8 +51,18 @@ impl Deduplication {
                     if entity.confidence > existing.confidence {
                         existing.confidence = entity.confidence;
                     }
-                    if existing.detection_method != entity.detection_method {
-                        existing.detection_method = DetectionMethod::Composite;
+                    for m in entity.recognition_methods {
+                        if !existing.recognition_methods.contains(&m) {
+                            existing.recognition_methods.push(m);
+                        }
+                    }
+                    if !existing
+                        .refinement_methods
+                        .contains(&RefinementMethod::Deduplication)
+                    {
+                        existing
+                            .refinement_methods
+                            .push(RefinementMethod::Deduplication);
                     }
                 }
                 None => {
@@ -66,7 +77,7 @@ impl Deduplication {
 
 impl Operation for Deduplication {
     type Input = ParallelContext<Entities>;
-    type Output = ParallelContext<Entities>;
+    type Output = ParallelContext<RefinedEntities>;
 
     async fn call(&self, input: Self::Input) -> Result<Self::Output> {
         input.parallel_map(|data| self.deduplicate(data)).await
@@ -87,19 +98,19 @@ fn locations_overlap(a: &Option<Location>, b: &Option<Location>) -> bool {
 
 #[cfg(test)]
 mod tests {
-    use nvisy_ontology::entity::{EntityCategory, EntityKind, TextLocation};
+    use nvisy_ontology::entity::{EntityCategory, EntityKind, RecognitionMethod, TextLocation};
 
     use super::*;
 
     fn text_entity(
         value: &str,
-        method: DetectionMethod,
+        method: RecognitionMethod,
         confidence: f64,
         start: usize,
         end: usize,
     ) -> Entity {
         Entity::new(
-            EntityCategory::Pii,
+            EntityCategory::PersonalIdentity,
             EntityKind::PersonName,
             value,
             method,
@@ -118,34 +129,44 @@ mod tests {
     #[test]
     fn duplicates_merged_same_method() {
         let entities: Entities = vec![
-            text_entity("John", DetectionMethod::Regex, 0.8, 0, 4),
-            text_entity("John", DetectionMethod::Regex, 0.9, 0, 4),
+            text_entity("John", RecognitionMethod::Regex, 0.8, 0, 4),
+            text_entity("John", RecognitionMethod::Regex, 0.9, 0, 4),
         ]
         .into();
         let result = Deduplication::execute(entities);
         assert_eq!(result.len(), 1);
         assert!((result[0].confidence - 0.9).abs() < f64::EPSILON);
-        assert_eq!(result[0].detection_method, DetectionMethod::Regex);
+        assert_eq!(
+            result[0].recognition_methods,
+            vec![RecognitionMethod::Regex]
+        );
+        assert_eq!(
+            result[0].refinement_methods,
+            vec![RefinementMethod::Deduplication]
+        );
     }
 
     #[test]
-    fn different_methods_become_composite() {
+    fn different_methods_are_combined() {
         let entities: Entities = vec![
-            text_entity("John", DetectionMethod::Regex, 0.8, 0, 4),
-            text_entity("John", DetectionMethod::Ner, 0.85, 0, 4),
+            text_entity("John", RecognitionMethod::Regex, 0.8, 0, 4),
+            text_entity("John", RecognitionMethod::Ner, 0.85, 0, 4),
         ]
         .into();
         let result = Deduplication::execute(entities);
         assert_eq!(result.len(), 1);
-        assert_eq!(result[0].detection_method, DetectionMethod::Composite);
+        assert_eq!(
+            result[0].recognition_methods,
+            vec![RecognitionMethod::Regex, RecognitionMethod::Ner]
+        );
         assert!((result[0].confidence - 0.85).abs() < f64::EPSILON);
     }
 
     #[test]
     fn non_overlapping_preserved() {
         let entities: Entities = vec![
-            text_entity("John", DetectionMethod::Regex, 0.8, 0, 4),
-            text_entity("John", DetectionMethod::Regex, 0.9, 10, 14),
+            text_entity("John", RecognitionMethod::Regex, 0.8, 0, 4),
+            text_entity("John", RecognitionMethod::Regex, 0.9, 10, 14),
         ]
         .into();
         let result = Deduplication::execute(entities);
@@ -155,8 +176,8 @@ mod tests {
     #[test]
     fn different_values_not_merged() {
         let entities: Entities = vec![
-            text_entity("John", DetectionMethod::Regex, 0.8, 0, 4),
-            text_entity("Jane", DetectionMethod::Regex, 0.9, 0, 4),
+            text_entity("John", RecognitionMethod::Regex, 0.8, 0, 4),
+            text_entity("Jane", RecognitionMethod::Regex, 0.9, 0, 4),
         ]
         .into();
         let result = Deduplication::execute(entities);
@@ -172,7 +193,7 @@ mod tests {
     #[test]
     fn single_entity_unchanged() {
         let entities: Entities =
-            vec![text_entity("John", DetectionMethod::Regex, 0.8, 0, 4)].into();
+            vec![text_entity("John", RecognitionMethod::Regex, 0.8, 0, 4)].into();
         let result = Deduplication::execute(entities);
         assert_eq!(result.len(), 1);
     }
@@ -181,12 +202,15 @@ mod tests {
     fn overlapping_ranges_merge() {
         // Partially overlapping: 0..6 and 3..9.
         let entities: Entities = vec![
-            text_entity("John Doe", DetectionMethod::Regex, 0.7, 0, 6),
-            text_entity("John Doe", DetectionMethod::Ner, 0.9, 3, 9),
+            text_entity("John Doe", RecognitionMethod::Regex, 0.7, 0, 6),
+            text_entity("John Doe", RecognitionMethod::Ner, 0.9, 3, 9),
         ]
         .into();
         let result = Deduplication::execute(entities);
         assert_eq!(result.len(), 1);
-        assert_eq!(result[0].detection_method, DetectionMethod::Composite);
+        assert_eq!(
+            result[0].recognition_methods,
+            vec![RecognitionMethod::Regex, RecognitionMethod::Ner]
+        );
     }
 }
diff --git a/crates/nvisy-engine/src/operation/processing/ensemble_fusion.rs b/crates/nvisy-engine/src/operation/processing/ensemble_fusion.rs
index 75155a55..4dbbaa04 100644
--- a/crates/nvisy-engine/src/operation/processing/ensemble_fusion.rs
+++ b/crates/nvisy-engine/src/operation/processing/ensemble_fusion.rs
@@ -4,8 +4,9 @@
 use std::collections::HashMap;
 
 use nvisy_core::Result;
-use nvisy_ontology::entity::{DetectionMethod, Entities, Entity, Location};
+use nvisy_ontology::entity::{Entities, Entity, Location, RecognitionMethod, RefinementMethod};
 
+use crate::operation::envelope::RefinedEntities;
 use crate::operation::{Operation, ParallelContext};
 
 const TARGET: &str = "nvisy_engine::op::ensemble";
@@ -15,9 +16,9 @@ const TARGET: &str = "nvisy_engine::op::ensemble";
 pub enum FusionStrategy {
     /// Take the maximum confidence across all detectors.
     MaxConfidence,
-    /// Weighted average by detection method.
+    /// Weighted average by recognition method.
     WeightedAverage {
-        weights: HashMap<DetectionMethod, f64>,
+        weights: HashMap<RecognitionMethod, f64>,
     },
     /// Noisy-OR: `P = 1 − ∏(1 − pᵢ)` for independent detectors.
     NoisyOr,
@@ -35,11 +36,11 @@ impl Ensemble {
         Self { strategy }
     }
 
-    async fn fuse(&self, entities: Entities) -> Result<Entities> {
+    async fn fuse(&self, entities: Entities) -> Result<RefinedEntities> {
         let before = entities.len();
         let result = self.merge(entities);
         tracing::debug!(target: TARGET, before, after = result.len(), "fused entities");
-        Ok(result)
+        Ok(RefinedEntities(result))
     }
 
     /// Group entities by `(kind, value, overlapping location)` then fuse
@@ -87,7 +88,9 @@ impl Ensemble {
                 let mut total_weight = 0.0;
                 let mut weighted_sum = 0.0;
                 for e in &group {
-                    let w = weights.get(&e.detection_method).copied().unwrap_or(1.0);
+                    // Use the first recognition method for weight lookup.
+                    let primary = e.recognition_methods.first();
+                    let w = primary.and_then(|m| weights.get(m)).copied().unwrap_or(1.0);
                     weighted_sum += e.confidence * w;
                     total_weight += w;
                 }
@@ -104,17 +107,30 @@ impl Ensemble {
             }
         };
 
-        // Use the first entity as the base and update confidence/method.
+        // Collect all recognition methods from the group in order.
+        let mut merged_methods = Vec::new();
+        for e in &group {
+            for m in &e.recognition_methods {
+                if !merged_methods.contains(m) {
+                    merged_methods.push(*m);
+                }
+            }
+        }
+
+        // Use the first entity as the base and update confidence/methods.
         let mut result = group.into_iter().next().unwrap();
         result.confidence = fused_confidence;
-        result.detection_method = DetectionMethod::Composite;
+        result.recognition_methods = merged_methods;
+        result
+            .refinement_methods
+            .push(RefinementMethod::EnsembleFusion);
         result
     }
 }
 
 impl Operation for Ensemble {
     type Input = ParallelContext<Entities>;
-    type Output = ParallelContext<Entities>;
+    type Output = ParallelContext<RefinedEntities>;
 
     async fn call(&self, input: Self::Input) -> Result<Self::Output> {
         input.parallel_map(|data| self.fuse(data)).await
@@ -138,13 +154,13 @@ mod tests {
 
     fn text_entity(
         value: &str,
-        method: DetectionMethod,
+        method: RecognitionMethod,
         confidence: f64,
         start: usize,
         end: usize,
     ) -> Entity {
         Entity::new(
-            EntityCategory::Pii,
+            EntityCategory::PersonalIdentity,
             EntityKind::PersonName,
             value,
             method,
@@ -164,22 +180,29 @@ mod tests {
     fn max_confidence_strategy() {
         let merge = Ensemble::new(FusionStrategy::MaxConfidence);
         let entities: Entities = vec![
-            text_entity("John", DetectionMethod::Regex, 0.7, 0, 4),
-            text_entity("John", DetectionMethod::Ner, 0.85, 0, 4),
+            text_entity("John", RecognitionMethod::Regex, 0.7, 0, 4),
+            text_entity("John", RecognitionMethod::Ner, 0.85, 0, 4),
         ]
         .into();
         let result = merge.merge(entities);
         assert_eq!(result.len(), 1);
         assert!((result[0].confidence - 0.85).abs() < f64::EPSILON);
-        assert_eq!(result[0].detection_method, DetectionMethod::Composite);
+        assert_eq!(
+            result[0].recognition_methods,
+            vec![RecognitionMethod::Regex, RecognitionMethod::Ner]
+        );
+        assert_eq!(
+            result[0].refinement_methods,
+            vec![RefinementMethod::EnsembleFusion]
+        );
     }
 
     #[test]
     fn noisy_or_strategy() {
         let merge = Ensemble::new(FusionStrategy::NoisyOr);
         let entities: Entities = vec![
-            text_entity("John", DetectionMethod::Regex, 0.7, 0, 4),
-            text_entity("John", DetectionMethod::Ner, 0.8, 0, 4),
+            text_entity("John", RecognitionMethod::Regex, 0.7, 0, 4),
+            text_entity("John", RecognitionMethod::Ner, 0.8, 0, 4),
         ]
         .into();
         let result = merge.merge(entities);
@@ -191,13 +214,13 @@ mod tests {
     #[test]
     fn weighted_average_strategy() {
         let mut weights = HashMap::new();
-        weights.insert(DetectionMethod::Regex, 1.0);
-        weights.insert(DetectionMethod::Ner, 2.0);
+        weights.insert(RecognitionMethod::Regex, 1.0);
+        weights.insert(RecognitionMethod::Ner, 2.0);
 
         let merge = Ensemble::new(FusionStrategy::WeightedAverage { weights });
         let entities: Entities = vec![
-            text_entity("John", DetectionMethod::Regex, 0.6, 0, 4),
-            text_entity("John", DetectionMethod::Ner, 0.9, 0, 4),
+            text_entity("John", RecognitionMethod::Regex, 0.6, 0, 4),
+            text_entity("John", RecognitionMethod::Ner, 0.9, 0, 4),
         ]
         .into();
         let result = merge.merge(entities);
@@ -210,8 +233,8 @@ mod tests {
     fn non_overlapping_not_merged() {
         let merge = Ensemble::new(FusionStrategy::NoisyOr);
         let entities: Entities = vec![
-            text_entity("John", DetectionMethod::Regex, 0.7, 0, 4),
-            text_entity("John", DetectionMethod::Ner, 0.8, 10, 14),
+            text_entity("John", RecognitionMethod::Regex, 0.7, 0, 4),
+            text_entity("John", RecognitionMethod::Ner, 0.8, 10, 14),
         ]
         .into();
         let result = merge.merge(entities);
@@ -222,11 +245,14 @@ mod tests {
     fn single_entity_unchanged() {
         let merge = Ensemble::new(FusionStrategy::NoisyOr);
         let entities: Entities =
-            vec![text_entity("John", DetectionMethod::Regex, 0.7, 0, 4)].into();
+            vec![text_entity("John", RecognitionMethod::Regex, 0.7, 0, 4)].into();
         let result = merge.merge(entities);
         assert_eq!(result.len(), 1);
         assert!((result[0].confidence - 0.7).abs() < f64::EPSILON);
-        assert_eq!(result[0].detection_method, DetectionMethod::Regex);
+        assert_eq!(
+            result[0].recognition_methods,
+            vec![RecognitionMethod::Regex]
+        );
     }
 
     #[test]
diff --git a/crates/nvisy-engine/src/operation/processing/manual_detection.rs b/crates/nvisy-engine/src/operation/processing/manual_detection.rs
index f5cd427e..59ce3b70 100644
--- a/crates/nvisy-engine/src/operation/processing/manual_detection.rs
+++ b/crates/nvisy-engine/src/operation/processing/manual_detection.rs
@@ -5,7 +5,7 @@
 
 use nvisy_core::Result;
 use nvisy_ontology::entity::{
-    Annotation, AnnotationKind, DetectionMethod, Entities, Entity, Location,
+    Annotation, AnnotationKind, Entities, Entity, Location, RecognitionMethod,
 };
 use serde::Deserialize;
 
@@ -37,7 +37,7 @@ pub struct ManualOutput {
 }
 
 /// Converts each inclusion [`Annotation`] into a full [`Entity`] with
-/// `DetectionMethod::Manual` and confidence 1.0.  Collects exclusion
+/// `RecognitionMethod::Manual` and confidence 1.0.  Collects exclusion
 /// annotations for downstream filtering.
 pub struct ManualDetection;
 
@@ -54,8 +54,8 @@ impl ManualDetection {
         for ann in &annotations {
             match ann.kind {
                 AnnotationKind::Inclusion => {
-                    let category = match &ann.category {
-                        Some(c) => c.clone(),
+                    let category = match ann.category {
+                        Some(c) => c,
                         None => continue,
                     };
                     let entity_kind = match ann.entity_kind {
@@ -65,7 +65,7 @@ impl ManualDetection {
                     let value = ann.value.clone().unwrap_or_default();
 
                     let mut entity =
-                        Entity::new(category, entity_kind, value, DetectionMethod::Manual, 1.0);
+                        Entity::new(category, entity_kind, value, RecognitionMethod::Manual, 1.0);
                     entity.location = ann.location.clone();
                     entities.push(entity);
                 }
@@ -128,10 +128,10 @@ mod tests {
 
     fn make_entity(value: &str, start: usize, end: usize) -> Entity {
         Entity::new(
-            EntityCategory::Pii,
+            EntityCategory::PersonalIdentity,
             EntityKind::PersonName,
             value,
-            DetectionMethod::Manual,
+            RecognitionMethod::Manual,
             1.0,
         )
         .with_location(
@@ -199,7 +199,7 @@ mod tests {
         let annotations = vec![
             Annotation {
                 kind: AnnotationKind::Inclusion,
-                category: Some(EntityCategory::Pii),
+                category: Some(EntityCategory::PersonalIdentity),
                 entity_kind: Some(EntityKind::PersonName),
                 value: Some("Alice".into()),
                 location: None,
diff --git a/crates/nvisy-engine/src/operation/processing/mod.rs b/crates/nvisy-engine/src/operation/processing/mod.rs
index 32eb85b9..a3e9564b 100644
--- a/crates/nvisy-engine/src/operation/processing/mod.rs
+++ b/crates/nvisy-engine/src/operation/processing/mod.rs
@@ -22,12 +22,12 @@ mod policy_evaluation;
 mod redaction;
 mod validation;
 
-pub use deduplication::Deduplication;
-pub use ensemble_fusion::{Ensemble, FusionStrategy};
-pub use manual_detection::{
+pub use self::deduplication::Deduplication;
+pub use self::ensemble_fusion::{Ensemble, FusionStrategy};
+pub use self::manual_detection::{
     Exclusion, ManualDetection, ManualDetectionParams, ManualOutput, is_excluded,
 };
-pub use pattern_match::{PatternDetectionParams, PatternInput, PatternMatch};
-pub use policy_evaluation::{EvaluatePolicy, EvaluatePolicyParams};
-pub use redaction::{Redaction, RedactionInput, RedactionOutput};
-pub use validation::Validation;
+pub use self::pattern_match::{PatternDetectionParams, PatternMatch};
+pub use self::policy_evaluation::{EvaluatePolicy, EvaluatePolicyParams};
+pub use self::redaction::{Redaction, RedactionInput, RedactionOutput};
+pub use self::validation::Validation;
diff --git a/crates/nvisy-engine/src/operation/processing/pattern_match.rs b/crates/nvisy-engine/src/operation/processing/pattern_match.rs
index 3174432d..d41d745a 100644
--- a/crates/nvisy-engine/src/operation/processing/pattern_match.rs
+++ b/crates/nvisy-engine/src/operation/processing/pattern_match.rs
@@ -1,19 +1,17 @@
 //! Pattern-based PII/PHI entity detection operation.
 //!
-//! Operates on text, CSV, HTML, and JSON spans, running both compiled
+//! Scans type-erased text spans (`Span<usize, TextData>`) using compiled
 //! regex patterns and dictionary automata via [`PatternEngine`].
 
 use nvisy_codec::Span;
-use nvisy_codec::handler::{CsvSpan, HtmlSpan, JsonPath, TxtSpan};
+use nvisy_codec::handler::TextData;
 use nvisy_core::{Error, Result};
-use nvisy_ontology::entity::{DetectionMethod, Entities, Entity, TabularLocation, TextLocation};
-use nvisy_pattern::{
-    ContextRule, DetectionSource, PatternEngine, PatternEngineBuilder,
-    PatternMatch as PatternMatchResult,
-};
+use nvisy_ontology::entity::TextLocation;
+use nvisy_pattern::patterns::ContextRule;
+use nvisy_pattern::{PatternEngine, PatternEngineBuilder, RawMatch, ScanContext};
 use serde::Deserialize;
-use serde_json::Value;
 
+use crate::operation::envelope::DetectedEntities;
 use crate::operation::{Operation, ParallelContext};
 
 const TARGET: &str = "nvisy_engine::op::pattern_match";
@@ -28,19 +26,13 @@ pub struct PatternDetectionParams {
     pub patterns: Option<Vec<String>>,
 }
 
-/// Multi-modality input for pattern matching.
-pub enum PatternInput {
-    Text(Vec<Span<TxtSpan, String>>),
-    Csv(Vec<Span<CsvSpan, String>>),
-
-    Html(Vec<Span<HtmlSpan, String>>),
-    Json(Vec<Span<JsonPath, Value>>),
-}
-
 /// Pattern detection operation backed by [`PatternEngine`].
 ///
-/// Handles both regex and dictionary matches, replacing the former
-/// separate `DictionaryDetection`.
+/// Accepts type-erased text spans from any [`TextHandler`] (plain text,
+/// CSV, HTML, JSON, etc.) and detects entities using regex and dictionary
+/// patterns with co-occurrence boosting.
+///
+/// [`TextHandler`]: nvisy_codec::handler::TextHandler
 pub struct PatternMatch {
     engine: PatternEngine,
 }
@@ -58,232 +50,59 @@ impl PatternMatch {
             .map_err(|e| Error::validation(e.to_string(), "pattern-detection"))?;
         Ok(Self { engine })
     }
-}
-
-impl PatternMatch {
-    async fn scan(&self, data: PatternInput) -> Result<Entities> {
-        tracing::debug!(target: TARGET, "scanning for patterns");
-        match data {
-            PatternInput::Text(spans) => self.detect_text(spans),
-            PatternInput::Csv(spans) => self.detect_csv(spans),
-            PatternInput::Html(spans) => self.detect_html(spans),
-            PatternInput::Json(spans) => self.detect_json(spans),
-        }
-    }
-}
-
-impl Operation for PatternMatch {
-    type Input = ParallelContext<PatternInput>;
-    type Output = ParallelContext<Entities>;
-
-    async fn call(&self, input: Self::Input) -> Result<Self::Output> {
-        input.parallel_map(|data| self.scan(data)).await
-    }
-}
-
-impl PatternMatch {
-    fn detect_text(&self, spans: Vec<Span<TxtSpan, String>>) -> Result<Entities> {
-        // Phase 1: collect raw matches per span index.
-        let span_data: Vec<&str> = spans.iter().map(|s| s.data.as_str()).collect();
-        let mut raw_matches: Vec<(usize, PatternMatchResult)> = Vec::new();
-
-        for (idx, span) in spans.iter().enumerate() {
-            for m in self.engine.scan_text(&span.data) {
-                raw_matches.push((idx, m));
-            }
-        }
-
-        // Phase 2: apply co-occurrence boost and build entities.
-        let mut entities = Vec::new();
-        for (span_idx, m) in &raw_matches {
-            let confidence = if let Some(ref ctx) = m.context {
-                apply_cooccurrence(&span_data, *span_idx, ctx, m.confidence)
-            } else {
-                m.confidence
-            };
-
-            let method = detection_method(m.source);
-
-            let entity = Entity::new(
-                m.category.clone(),
-                m.entity_kind,
-                &m.value,
-                method,
-                confidence,
-            )
-            .with_location(
-                TextLocation {
-                    start_offset: m.start,
-                    end_offset: m.end,
-                    element_id: Some(spans[*span_idx].id.0.to_string()),
-                    ..Default::default()
-                }
-                .into(),
-            )
-            .with_parent(&spans[*span_idx].source);
-
-            entities.push(entity);
-        }
-
-        Ok(entities.into())
-    }
-
-    fn detect_csv(&self, spans: Vec<Span<CsvSpan, String>>) -> Result<Entities> {
-        // Collect all span data (including headers) for co-occurrence window.
-        let span_data: Vec<&str> = spans.iter().map(|s| s.data.as_str()).collect();
-
-        // Phase 1: collect raw matches per span index (skip headers).
-        let mut raw_matches: Vec<(usize, PatternMatchResult)> = Vec::new();
-        for (idx, span) in spans.iter().enumerate() {
-            if span.id.header || span.data.is_empty() {
-                continue;
-            }
-            for m in self.engine.scan_text(&span.data) {
-                raw_matches.push((idx, m));
-            }
-        }
-
-        // Phase 2: apply co-occurrence boost and build entities.
-        let mut entities = Vec::new();
-        for (span_idx, m) in &raw_matches {
-            let confidence = if let Some(ref ctx) = m.context {
-                apply_cooccurrence(&span_data, *span_idx, ctx, m.confidence)
-            } else {
-                m.confidence
-            };
-
-            let method = detection_method(m.source);
-            let span = &spans[*span_idx];
-
-            let entity = Entity::new(
-                m.category.clone(),
-                m.entity_kind,
-                &m.value,
-                method,
-                confidence,
-            )
-            .with_location(
-                TabularLocation {
-                    row_index: span.id.row,
-                    column_index: span.id.col,
-                    start_offset: Some(m.start),
-                    end_offset: Some(m.end),
-                    column_name: None,
-                    sheet_name: None,
-                }
-                .into(),
-            )
-            .with_parent(&span.source);
-
-            entities.push(entity);
-        }
 
-        Ok(entities.into())
-    }
+    fn detect(&self, spans: Vec<Span<usize, TextData>>) -> Result<DetectedEntities> {
+        tracing::debug!(target: TARGET, span_count = spans.len(), "scanning for patterns");
 
-    fn detect_html(&self, spans: Vec<Span<HtmlSpan, String>>) -> Result<Entities> {
         let span_data: Vec<&str> = spans.iter().map(|s| s.data.as_str()).collect();
-        let mut raw_matches: Vec<(usize, PatternMatchResult)> = Vec::new();
+        let mut raw_matches: Vec<(usize, RawMatch)> = Vec::new();
+        let scan_ctx = ScanContext::default();
 
         for (idx, span) in spans.iter().enumerate() {
-            for m in self.engine.scan_text(&span.data) {
+            for m in self.engine.scan_text(span.data.as_str(), &scan_ctx) {
                 raw_matches.push((idx, m));
             }
         }
 
         let mut entities = Vec::new();
-        for (span_idx, m) in &raw_matches {
-            let confidence = if let Some(ref ctx) = m.context {
-                apply_cooccurrence(&span_data, *span_idx, ctx, m.confidence)
-            } else {
-                m.confidence
-            };
-
-            let method = detection_method(m.source);
-
-            let entity = Entity::new(
-                m.category.clone(),
-                m.entity_kind,
-                &m.value,
-                method,
-                confidence,
-            )
-            .with_location(
-                TextLocation {
-                    start_offset: m.start,
-                    end_offset: m.end,
-                    element_id: Some(spans[*span_idx].id.0.to_string()),
-                    ..Default::default()
-                }
-                .into(),
-            )
-            .with_parent(&spans[*span_idx].source);
-
-            entities.push(entity);
-        }
-
-        Ok(entities.into())
-    }
-
-    fn detect_json(&self, spans: Vec<Span<JsonPath, Value>>) -> Result<Entities> {
-        // Filter to string-valued spans and collect text for co-occurrence.
-        let string_spans: Vec<(usize, &str)> = spans
-            .iter()
-            .enumerate()
-            .filter_map(|(idx, s)| s.data.as_str().map(|text| (idx, text)))
-            .collect();
-
-        let span_data: Vec<&str> = string_spans.iter().map(|(_, text)| *text).collect();
-        let mut raw_matches: Vec<(usize, PatternMatchResult)> = Vec::new();
-
-        for (co_idx, (_, text)) in string_spans.iter().enumerate() {
-            for m in self.engine.scan_text(text) {
-                raw_matches.push((co_idx, m));
-            }
-        }
-
-        let mut entities = Vec::new();
-        for (co_idx, m) in &raw_matches {
+        for (span_idx, m) in raw_matches {
             let confidence = if let Some(ref ctx) = m.context {
-                apply_cooccurrence(&span_data, *co_idx, ctx, m.confidence)
+                apply_cooccurrence(&span_data, span_idx, ctx, m.confidence)
             } else {
                 m.confidence
             };
-
-            let method = detection_method(m.source);
-            let (orig_idx, _) = string_spans[*co_idx];
-
-            let entity = Entity::new(
-                m.category.clone(),
-                m.entity_kind,
-                &m.value,
-                method,
-                confidence,
-            )
-            .with_location(
-                TextLocation {
-                    start_offset: m.start,
-                    end_offset: m.end,
-                    element_id: Some(spans[orig_idx].id.pointer.clone()),
-                    ..Default::default()
-                }
-                .into(),
-            )
-            .with_parent(&spans[orig_idx].source);
+            let start = m.start;
+            let end = m.end;
+            let element_id = spans[span_idx].id.to_string();
+            let source = spans[span_idx].source;
+
+            let mut entity = m.into_entity();
+            entity.confidence = confidence;
+            let entity = entity
+                .with_location(
+                    TextLocation {
+                        start_offset: start,
+                        end_offset: end,
+                        element_id: Some(element_id),
+                        ..Default::default()
+                    }
+                    .into(),
+                )
+                .with_parent(&source);
 
             entities.push(entity);
         }
 
-        Ok(entities.into())
+        Ok(DetectedEntities(entities.into()))
     }
 }
 
-/// Map a [`DetectionSource`] to a [`DetectionMethod`].
-fn detection_method(source: DetectionSource) -> DetectionMethod {
-    match source {
-        DetectionSource::Regex => DetectionMethod::Regex,
-        DetectionSource::Dictionary => DetectionMethod::Dictionary,
-        DetectionSource::DenyList => DetectionMethod::Dictionary,
+impl Operation for PatternMatch {
+    type Input = ParallelContext<Vec<Span<usize, TextData>>>;
+    type Output = ParallelContext<DetectedEntities>;
+
+    async fn call(&self, input: Self::Input) -> Result<Self::Output> {
+        input.parallel_map(|data| async { self.detect(data) }).await
     }
 }
 
diff --git a/crates/nvisy-engine/src/operation/processing/policy_evaluation.rs b/crates/nvisy-engine/src/operation/processing/policy_evaluation.rs
index 519ae369..9e93793e 100644
--- a/crates/nvisy-engine/src/operation/processing/policy_evaluation.rs
+++ b/crates/nvisy-engine/src/operation/processing/policy_evaluation.rs
@@ -13,6 +13,7 @@ use nvisy_ontology::entity::{Entities, Entity};
 use nvisy_ontology::policy::{PolicyRule, RuleAction, Strategy, TextStrategy};
 use serde::Deserialize;
 
+use crate::operation::envelope::PolicyOutcome;
 use crate::operation::{Operation, ParallelContext};
 use crate::provenance::{RedactionDecision, RedactionRecord};
 
@@ -40,14 +41,6 @@ fn default_threshold() -> f64 {
     0.5
 }
 
-/// Output of policy evaluation: both pipeline decisions and audit records.
-pub struct EvaluatePolicyOutput {
-    /// Pipeline-facing redaction decisions.
-    pub decisions: Vec<RedactionDecision>,
-    /// Audit-facing redaction records.
-    pub records: Vec<RedactionRecord>,
-}
-
 /// Evaluates policy rules against detected entities and produces
 /// [`RedactionDecision`] and [`RedactionRecord`] pairs.
 ///
@@ -64,7 +57,7 @@ impl EvaluatePolicy {
         Ok(Self { params })
     }
 
-    pub async fn execute(&self, entities: Entities) -> Result<EvaluatePolicyOutput> {
+    pub async fn execute(&self, entities: Entities) -> Result<PolicyOutcome> {
         tracing::debug!(target: TARGET, entity_count = entities.len(), "evaluating policies");
         let default_spec = &self.params.default_spec;
         let default_threshold = self.params.default_confidence_threshold;
@@ -116,13 +109,13 @@ impl EvaluatePolicy {
             records.push(record);
         }
 
-        Ok(EvaluatePolicyOutput { decisions, records })
+        Ok(PolicyOutcome { decisions, records })
     }
 }
 
 impl Operation for EvaluatePolicy {
     type Input = ParallelContext<Entities>;
-    type Output = ParallelContext<EvaluatePolicyOutput>;
+    type Output = ParallelContext<PolicyOutcome>;
 
     async fn call(&self, input: Self::Input) -> Result<Self::Output> {
         input.parallel_map(|data| self.execute(data)).await
diff --git a/crates/nvisy-engine/src/pipeline/mod.rs b/crates/nvisy-engine/src/pipeline/mod.rs
index dd15995b..350a0986 100644
--- a/crates/nvisy-engine/src/pipeline/mod.rs
+++ b/crates/nvisy-engine/src/pipeline/mod.rs
@@ -16,17 +16,19 @@ mod runs;
 
 use std::future::Future;
 
-pub use config::{EngineSection, LlmSection, OcrSection, RuntimeConfig, SttSection, TtsSection};
-pub use default::DefaultEngine;
-pub use executor::{NodeOutput, RunOutput};
 use nvisy_core::Error;
 use nvisy_ontology::context::Contexts;
 use nvisy_ontology::entity::DetectionOutput;
 use nvisy_ontology::policy::{Policies, RedactionSummary};
-pub use ontology::{Explainable, Explanation};
-pub use runs::{NodeProgress, RunManager, RunState, RunStatus, RunSummary};
 use uuid::Uuid;
 
+pub use self::config::{
+    EngineSection, LlmSection, OcrSection, RuntimeConfig, SttSection, TtsSection,
+};
+pub use self::default::DefaultEngine;
+pub use self::executor::{NodeOutput, RunOutput};
+pub use self::ontology::{Explainable, Explanation};
+pub use self::runs::{NodeProgress, RunManager, RunState, RunStatus, RunSummary};
 use crate::compiler::Graph;
 use crate::provenance::{Audit, PolicyEvaluation, RedactionMap};
 
diff --git a/crates/nvisy-engine/src/pipeline/ontology.rs b/crates/nvisy-engine/src/pipeline/ontology.rs
index e26765a3..fefb1503 100644
--- a/crates/nvisy-engine/src/pipeline/ontology.rs
+++ b/crates/nvisy-engine/src/pipeline/ontology.rs
@@ -1,10 +1,10 @@
 //! Explainability metadata for data protection decisions.
 //!
-//! An [`Explanation`] records why an action was taken — which model, rule,
+//! An [`Explanation`] records why an action was taken: which model, rule,
 //! and confidence level were involved. Types that carry this metadata
 //! implement the [`Explainable`] trait.
 
-use nvisy_ontology::entity::{DetectionMethod, ModelInfo};
+use nvisy_ontology::entity::{ModelInfo, RecognitionMethod};
 use schemars::JsonSchema;
 use semver::Version;
 use serde::{Deserialize, Serialize};
@@ -31,9 +31,9 @@ pub struct Explanation {
     /// Detection confidence score.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub confidence: Option<f64>,
-    /// Detection method used.
+    /// Recognition method used.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub detection_method: Option<DetectionMethod>,
+    pub recognition_method: Option<RecognitionMethod>,
     /// Human-readable reason for the action.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub reason: Option<String>,
diff --git a/crates/nvisy-engine/src/pipeline/policy/mod.rs b/crates/nvisy-engine/src/pipeline/policy/mod.rs
index 5fd868ac..a5b29ec7 100644
--- a/crates/nvisy-engine/src/pipeline/policy/mod.rs
+++ b/crates/nvisy-engine/src/pipeline/policy/mod.rs
@@ -9,5 +9,5 @@
 mod retry;
 mod timeout;
 
-pub use retry::CompiledRetryPolicy;
-pub use timeout::CompiledTimeoutPolicy;
+pub use self::retry::CompiledRetryPolicy;
+pub use self::timeout::CompiledTimeoutPolicy;
diff --git a/crates/nvisy-engine/src/provenance/action/mod.rs b/crates/nvisy-engine/src/provenance/action/mod.rs
index 3fac7e84..ffba31f8 100644
--- a/crates/nvisy-engine/src/provenance/action/mod.rs
+++ b/crates/nvisy-engine/src/provenance/action/mod.rs
@@ -7,6 +7,6 @@ mod inference;
 mod lifecycle;
 mod processing;
 
-pub use inference::{InferenceAction, InferenceActionBuilder};
-pub use lifecycle::{LifecycleAction, LifecycleActionBuilder};
-pub use processing::{ProcessingAction, ProcessingActionBuilder};
+pub use self::inference::{InferenceAction, InferenceActionBuilder};
+pub use self::lifecycle::{LifecycleAction, LifecycleActionBuilder};
+pub use self::processing::{ProcessingAction, ProcessingActionBuilder};
diff --git a/crates/nvisy-engine/src/provenance/kind.rs b/crates/nvisy-engine/src/provenance/kind.rs
index 16cb6024..ef87446a 100644
--- a/crates/nvisy-engine/src/provenance/kind.rs
+++ b/crates/nvisy-engine/src/provenance/kind.rs
@@ -1,6 +1,6 @@
 //! Two-level tagged enum discriminating audit entry categories.
 
-use nvisy_ontology::entity::DetectionMethod;
+use nvisy_ontology::entity::ExtractionMethod;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
@@ -27,16 +27,18 @@ pub enum InferenceKind {
 }
 
 impl InferenceKind {
-    /// Returns the [`DetectionMethod`] that corresponds to this inference kind.
-    pub fn detection_method(&self) -> DetectionMethod {
+    /// Returns the [`ExtractionMethod`] for inference kinds that perform
+    /// content extraction. Returns `None` for pure recognition or
+    /// non-extraction operations.
+    pub fn extraction_method(&self) -> Option<ExtractionMethod> {
         match self {
-            Self::Ocr(_) => DetectionMethod::Ocr,
-            Self::Transcription(_) => DetectionMethod::SpeechTranscript,
-            Self::Ner(_) => DetectionMethod::Ner,
-            Self::ComputerVision(_) => DetectionMethod::ObjectDetection,
-            Self::Translation(_) | Self::Classification(_) | Self::Summarization(_) => {
-                DetectionMethod::ContextualNlp
-            }
+            Self::Ocr(_) => Some(ExtractionMethod::OpticalCharacterRecognition),
+            Self::Transcription(_) => Some(ExtractionMethod::Transcription),
+            Self::ComputerVision(_) => Some(ExtractionMethod::ObjectDetection),
+            Self::Ner(_)
+            | Self::Translation(_)
+            | Self::Classification(_)
+            | Self::Summarization(_) => None,
         }
     }
 }
diff --git a/crates/nvisy-engine/src/provenance/mod.rs b/crates/nvisy-engine/src/provenance/mod.rs
index ad54b5cd..bc888b20 100644
--- a/crates/nvisy-engine/src/provenance/mod.rs
+++ b/crates/nvisy-engine/src/provenance/mod.rs
@@ -19,14 +19,14 @@ mod kind;
 mod action;
 mod record;
 
-pub use action::{
+pub use self::action::{
     InferenceAction, InferenceActionBuilder, LifecycleAction, LifecycleActionBuilder,
     ProcessingAction, ProcessingActionBuilder,
 };
-pub use audit::Audit;
-pub use entry::{AuditEntry, AuditEntryBuilder, AuditEntryBuilderError, AuditEntryStatus};
-pub use kind::{AuditEntryKind, InferenceKind, LifecycleKind, ProcessingKind};
-pub use record::{
+pub use self::audit::Audit;
+pub use self::entry::{AuditEntry, AuditEntryBuilder, AuditEntryBuilderError, AuditEntryStatus};
+pub use self::kind::{AuditEntryKind, InferenceKind, LifecycleKind, ProcessingKind};
+pub use self::record::{
     PolicyEvaluation, RedactionDecision, RedactionMap, RedactionMapEntry, RedactionRecord,
     ReviewDecision, ReviewStatus,
 };
diff --git a/crates/nvisy-engine/src/provenance/record/mod.rs b/crates/nvisy-engine/src/provenance/record/mod.rs
index 7d9cdac6..e65bf958 100644
--- a/crates/nvisy-engine/src/provenance/record/mod.rs
+++ b/crates/nvisy-engine/src/provenance/record/mod.rs
@@ -7,8 +7,8 @@ mod map;
 mod redaction;
 mod review;
 
-pub use decision::RedactionDecision;
-pub use evaluation::PolicyEvaluation;
-pub use map::{RedactionMap, RedactionMapEntry};
-pub use redaction::RedactionRecord;
-pub use review::{ReviewDecision, ReviewStatus};
+pub use self::decision::RedactionDecision;
+pub use self::evaluation::PolicyEvaluation;
+pub use self::map::{RedactionMap, RedactionMapEntry};
+pub use self::redaction::RedactionRecord;
+pub use self::review::{ReviewDecision, ReviewStatus};
diff --git a/crates/nvisy-http/src/lib.rs b/crates/nvisy-http/src/lib.rs
index 7ab3e9bd..b6aaad6b 100644
--- a/crates/nvisy-http/src/lib.rs
+++ b/crates/nvisy-http/src/lib.rs
@@ -5,7 +5,7 @@
 mod client;
 mod middleware;
 
-pub use client::{HttpClient, HttpConfig};
+pub use self::client::{HttpClient, HttpConfig};
 
 #[doc(hidden)]
 pub mod prelude;
diff --git a/crates/nvisy-ocr/src/backend/mod.rs b/crates/nvisy-ocr/src/backend/mod.rs
index f3ad815f..10adbcd9 100644
--- a/crates/nvisy-ocr/src/backend/mod.rs
+++ b/crates/nvisy-ocr/src/backend/mod.rs
@@ -3,13 +3,14 @@
 mod input;
 mod output;
 
-pub use input::ImageInput;
 use nvisy_core::Error;
 pub use nvisy_core::media::ImageFormat;
-pub use output::{Block, BlockKind, ImageOutput, Line, Page, Word};
 use reqwest_middleware::reqwest::Response;
 use reqwest_middleware::reqwest::multipart::Part;
 
+pub use self::input::ImageInput;
+pub use self::output::{Block, BlockKind, ImageOutput, Line, Page, Word};
+
 /// Build a multipart [`Part`] from an [`ImageInput`].
 pub(crate) fn image_part(image: &ImageInput) -> Result<Part, Error> {
     let filename = format!("image.{}", image.format.extension());
diff --git a/crates/nvisy-ocr/src/engine/mod.rs b/crates/nvisy-ocr/src/engine/mod.rs
index 6a6c8024..7893219d 100644
--- a/crates/nvisy-ocr/src/engine/mod.rs
+++ b/crates/nvisy-ocr/src/engine/mod.rs
@@ -6,9 +6,9 @@ use std::fmt;
 use std::sync::Arc;
 
 use nvisy_core::Error;
-pub use params::OcrProvider;
 use tracing::instrument;
 
+pub use self::params::OcrProvider;
 use crate::backend::{Backend, ImageInput, ImageOutput, RunParams};
 
 /// Type-erased OCR engine wrapping any [`Backend`] implementation.
diff --git a/crates/nvisy-ocr/src/lib.rs b/crates/nvisy-ocr/src/lib.rs
index ae0dfbaf..e1078f74 100644
--- a/crates/nvisy-ocr/src/lib.rs
+++ b/crates/nvisy-ocr/src/lib.rs
@@ -9,7 +9,7 @@ pub mod provider;
 #[doc(hidden)]
 pub mod prelude;
 
-pub use backend::{
+pub use self::backend::{
     Backend, Block, BlockKind, ImageFormat, ImageInput, ImageOutput, Line, Page, RunParams, Word,
 };
-pub use engine::{OcrEngine, OcrProvider};
+pub use self::engine::{OcrEngine, OcrProvider};
diff --git a/crates/nvisy-ocr/src/provider/aws_textract/mod.rs b/crates/nvisy-ocr/src/provider/aws_textract/mod.rs
index 26d9ca77..2bae9383 100644
--- a/crates/nvisy-ocr/src/provider/aws_textract/mod.rs
+++ b/crates/nvisy-ocr/src/provider/aws_textract/mod.rs
@@ -6,5 +6,5 @@
 mod backend;
 mod params;
 
-pub use backend::AwsTextractBackend;
-pub use params::AwsTextractParams;
+pub use self::backend::AwsTextractBackend;
+pub use self::params::AwsTextractParams;
diff --git a/crates/nvisy-ocr/src/provider/azure_docai/mod.rs b/crates/nvisy-ocr/src/provider/azure_docai/mod.rs
index 24f46228..5c09835f 100644
--- a/crates/nvisy-ocr/src/provider/azure_docai/mod.rs
+++ b/crates/nvisy-ocr/src/provider/azure_docai/mod.rs
@@ -6,5 +6,5 @@
 mod backend;
 mod params;
 
-pub use backend::AzureDocaiBackend;
-pub use params::AzureDocaiParams;
+pub use self::backend::AzureDocaiBackend;
+pub use self::params::AzureDocaiParams;
diff --git a/crates/nvisy-ocr/src/provider/datalab_surya/mod.rs b/crates/nvisy-ocr/src/provider/datalab_surya/mod.rs
index bb595da0..793fe5c8 100644
--- a/crates/nvisy-ocr/src/provider/datalab_surya/mod.rs
+++ b/crates/nvisy-ocr/src/provider/datalab_surya/mod.rs
@@ -6,5 +6,5 @@
 mod backend;
 mod params;
 
-pub use backend::SuryaBackend;
-pub use params::SuryaParams;
+pub use self::backend::SuryaBackend;
+pub use self::params::SuryaParams;
diff --git a/crates/nvisy-ocr/src/provider/google_vision/mod.rs b/crates/nvisy-ocr/src/provider/google_vision/mod.rs
index 803dda9b..e45502ec 100644
--- a/crates/nvisy-ocr/src/provider/google_vision/mod.rs
+++ b/crates/nvisy-ocr/src/provider/google_vision/mod.rs
@@ -6,5 +6,5 @@
 mod backend;
 mod params;
 
-pub use backend::GoogleVisionBackend;
-pub use params::GoogleVisionParams;
+pub use self::backend::GoogleVisionBackend;
+pub use self::params::GoogleVisionParams;
diff --git a/crates/nvisy-ocr/src/provider/mod.rs b/crates/nvisy-ocr/src/provider/mod.rs
index bf4b7831..b4509b4a 100644
--- a/crates/nvisy-ocr/src/provider/mod.rs
+++ b/crates/nvisy-ocr/src/provider/mod.rs
@@ -3,8 +3,8 @@
 mod datalab_surya;
 mod paddle_paddlex;
 
-pub use datalab_surya::{SuryaBackend, SuryaParams};
-pub use paddle_paddlex::{PaddleXBackend, PaddleXParams};
+pub use self::datalab_surya::{SuryaBackend, SuryaParams};
+pub use self::paddle_paddlex::{PaddleXBackend, PaddleXParams};
 
 #[cfg(feature = "aws-textract")]
 #[cfg_attr(docsrs, doc(cfg(feature = "aws-textract")))]
@@ -18,10 +18,10 @@ mod google_vision;
 
 #[cfg(feature = "aws-textract")]
 #[cfg_attr(docsrs, doc(cfg(feature = "aws-textract")))]
-pub use aws_textract::{AwsTextractBackend, AwsTextractParams};
+pub use self::aws_textract::{AwsTextractBackend, AwsTextractParams};
 #[cfg(feature = "azure-docai")]
 #[cfg_attr(docsrs, doc(cfg(feature = "azure-docai")))]
-pub use azure_docai::{AzureDocaiBackend, AzureDocaiParams};
+pub use self::azure_docai::{AzureDocaiBackend, AzureDocaiParams};
 #[cfg(feature = "google-vision")]
 #[cfg_attr(docsrs, doc(cfg(feature = "google-vision")))]
-pub use google_vision::{GoogleVisionBackend, GoogleVisionParams};
+pub use self::google_vision::{GoogleVisionBackend, GoogleVisionParams};
diff --git a/crates/nvisy-ocr/src/provider/paddle_paddlex/mod.rs b/crates/nvisy-ocr/src/provider/paddle_paddlex/mod.rs
index 5c02e823..7d6f28ec 100644
--- a/crates/nvisy-ocr/src/provider/paddle_paddlex/mod.rs
+++ b/crates/nvisy-ocr/src/provider/paddle_paddlex/mod.rs
@@ -6,5 +6,5 @@
 mod backend;
 mod params;
 
-pub use backend::PaddleXBackend;
-pub use params::PaddleXParams;
+pub use self::backend::PaddleXBackend;
+pub use self::params::PaddleXParams;
diff --git a/crates/nvisy-ontology/src/context/analytic/mod.rs b/crates/nvisy-ontology/src/context/analytic/mod.rs
index 841d9a93..5b2c7aef 100644
--- a/crates/nvisy-ontology/src/context/analytic/mod.rs
+++ b/crates/nvisy-ontology/src/context/analytic/mod.rs
@@ -3,11 +3,12 @@
 mod embedding;
 mod pattern;
 
-pub use embedding::EmbeddingData;
-pub use pattern::PatternData;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
+pub use self::embedding::EmbeddingData;
+pub use self::pattern::PatternData;
+
 /// Analytic computation variants.
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 #[serde(tag = "kind", rename_all = "snake_case")]
diff --git a/crates/nvisy-ontology/src/context/biometric/mod.rs b/crates/nvisy-ontology/src/context/biometric/mod.rs
index 79f9decc..de235e5a 100644
--- a/crates/nvisy-ontology/src/context/biometric/mod.rs
+++ b/crates/nvisy-ontology/src/context/biometric/mod.rs
@@ -3,10 +3,11 @@
 mod face;
 mod voice;
 
-pub use face::FaceData;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
-pub use voice::VoiceData;
+
+pub use self::face::FaceData;
+pub use self::voice::VoiceData;
 
 /// Biometric identity verification variants.
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
diff --git a/crates/nvisy-ontology/src/context/document/mod.rs b/crates/nvisy-ontology/src/context/document/mod.rs
index 736aaa56..b34d15e5 100644
--- a/crates/nvisy-ontology/src/context/document/mod.rs
+++ b/crates/nvisy-ontology/src/context/document/mod.rs
@@ -5,8 +5,9 @@ mod template;
 
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
-pub use signature::SignatureData;
-pub use template::TemplateData;
+
+pub use self::signature::SignatureData;
+pub use self::template::TemplateData;
 
 /// Document-related reference variants.
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
diff --git a/crates/nvisy-ontology/src/context/geospatial/mod.rs b/crates/nvisy-ontology/src/context/geospatial/mod.rs
index e3426517..0e81d0ba 100644
--- a/crates/nvisy-ontology/src/context/geospatial/mod.rs
+++ b/crates/nvisy-ontology/src/context/geospatial/mod.rs
@@ -4,12 +4,13 @@ mod address;
 mod coordinates;
 mod region;
 
-pub use address::AddressData;
-pub use coordinates::GeoCoordinate;
-pub use region::{GeoBounds, GeoShape, RegionData};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
+pub use self::address::AddressData;
+pub use self::coordinates::GeoCoordinate;
+pub use self::region::{GeoBounds, GeoShape, RegionData};
+
 /// Geospatial location variants.
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 #[serde(tag = "kind", rename_all = "snake_case")]
diff --git a/crates/nvisy-ontology/src/context/mod.rs b/crates/nvisy-ontology/src/context/mod.rs
index e49872f1..9c293157 100644
--- a/crates/nvisy-ontology/src/context/mod.rs
+++ b/crates/nvisy-ontology/src/context/mod.rs
@@ -12,11 +12,12 @@ pub mod geospatial;
 pub mod reference;
 pub mod temporal;
 
-pub use entry::{ContextEntry, ContextEntryData};
 use nvisy_core::content::ContentSource;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
+pub use self::entry::{ContextEntry, ContextEntryData};
+
 /// A collection of [`Context`]s attached to a pipeline run.
 #[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
 pub struct Contexts {
diff --git a/crates/nvisy-ontology/src/context/reference/mod.rs b/crates/nvisy-ontology/src/context/reference/mod.rs
index d6ba0167..e7b0be2e 100644
--- a/crates/nvisy-ontology/src/context/reference/mod.rs
+++ b/crates/nvisy-ontology/src/context/reference/mod.rs
@@ -5,12 +5,13 @@ mod image;
 mod tag;
 mod text;
 
-pub use credential::CredentialData;
-pub use image::ImageData;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
-pub use tag::TagData;
-pub use text::{TextData, TextEntry};
+
+pub use self::credential::CredentialData;
+pub use self::image::ImageData;
+pub use self::tag::TagData;
+pub use self::text::{TextData, TextEntry};
 
 /// Direct comparison reference variants.
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
diff --git a/crates/nvisy-ontology/src/context/temporal/mod.rs b/crates/nvisy-ontology/src/context/temporal/mod.rs
index 5fa8f727..7055d353 100644
--- a/crates/nvisy-ontology/src/context/temporal/mod.rs
+++ b/crates/nvisy-ontology/src/context/temporal/mod.rs
@@ -2,10 +2,11 @@
 
 mod date;
 
-pub use date::DateData;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
+pub use self::date::DateData;
+
 /// Temporal matching variants.
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 #[serde(tag = "kind", rename_all = "snake_case")]
diff --git a/crates/nvisy-ontology/src/entity/category.rs b/crates/nvisy-ontology/src/entity/category.rs
index 4e394892..5460051e 100644
--- a/crates/nvisy-ontology/src/entity/category.rs
+++ b/crates/nvisy-ontology/src/entity/category.rs
@@ -1,31 +1,55 @@
-//! Shared entity category tag.
+//! Broad entity category classification.
 //!
-//! [`EntityCategory`] classifies detected sensitive data into broad
-//! categories used by both detection and pattern matching crates.
+//! [`EntityCategory`] groups related [`EntityKind`](super::EntityKind)
+//! variants into policy-addressable buckets.  Policy selectors can
+//! target an entire category (e.g. "redact all financial data") without
+//! enumerating individual kinds.
 
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use strum::{Display, EnumString};
 
-/// Category of sensitive data an entity belongs to.
-#[derive(Debug, Clone, PartialEq, Eq, Hash, Display, EnumString)]
-#[derive(Serialize, Deserialize, JsonSchema)]
+/// Broad category of sensitive data.
+///
+/// Each [`EntityKind`](super::EntityKind) maps to exactly one category
+/// via [`EntityKind::category()`](super::EntityKind::category).
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Display, EnumString, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "snake_case")]
 #[strum(serialize_all = "snake_case")]
 pub enum EntityCategory {
-    /// Personally Identifiable Information (names, SSNs, addresses, etc.).
-    Pii,
-    /// Protected Health Information (HIPAA-regulated data).
-    Phi,
-    /// Financial data (credit card numbers, bank accounts, etc.).
+    /// Personal identity: names, government IDs, dates of birth, and
+    /// other attributes that directly identify a natural person.
+    PersonalIdentity,
+    /// Contact information: email addresses, phone numbers, physical
+    /// addresses, postal codes, and URLs.
+    ContactInfo,
+    /// Demographic attributes: age, gender, ethnicity, religion,
+    /// nationality, and citizenship.
+    Demographic,
+    /// Financial instruments and accounts: payment cards, bank
+    /// accounts, routing numbers, IBAN, crypto addresses, and
+    /// monetary amounts.
     Financial,
-    /// Secrets and credentials (API keys, passwords, tokens).
-    Credentials,
-    /// Legal documents and privileged communications.
-    Legal,
-    /// Biometric data (fingerprints, iris scans, voiceprints).
+    /// Protected health information: medical record numbers,
+    /// insurance IDs, prescriptions, diagnoses, and medications.
+    Health,
+    /// Biometric identifiers: fingerprints, voiceprints, retina
+    /// scans, and facial geometry templates.
     Biometric,
-    /// User-defined or plugin-specific category.
-    #[strum(default)]
-    Custom(String),
+    /// Secrets and credentials: passwords, API keys, authentication
+    /// tokens, and private cryptographic keys.
+    Credentials,
+    /// Network and device identifiers: IP addresses, MAC addresses,
+    /// device IDs, and usernames.
+    NetworkIdentifier,
+    /// Geographic and spatial data: GPS coordinates and geolocation
+    /// metadata.
+    Location,
+    /// Sensitive visual elements detected in images or video:
+    /// faces, handwriting, signatures, logos, and barcodes.
+    Visual,
+    /// Organizational identifiers: company names, departments,
+    /// facilities, and institutional reference numbers.
+    Organizational,
 }
diff --git a/crates/nvisy-ontology/src/entity/kind.rs b/crates/nvisy-ontology/src/entity/kind.rs
index dbf0a2ea..3c74d0a4 100644
--- a/crates/nvisy-ontology/src/entity/kind.rs
+++ b/crates/nvisy-ontology/src/entity/kind.rs
@@ -4,8 +4,9 @@
 //! can detect or redact.  Each variant maps to a stable `snake_case`
 //! string for serialization and display.
 //!
-//! Every variant also maps to an [`EntityCategory`] via [`EntityKind::category`]
-//! and an [`EntitySensitivity`] via [`EntityKind::sensitivity`].
+//! Every variant also maps to:
+//! - an [`EntityCategory`] via [`EntityKind::category`],
+//! - an [`EntitySensitivity`] via [`EntityKind::sensitivity`].
 
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
@@ -20,7 +21,11 @@ use super::sensitivity::EntitySensitivity;
 #[serde(rename_all = "snake_case")]
 #[strum(serialize_all = "snake_case")]
 pub enum EntityKind {
-    // Identity documents:
+    // Personal identity
+    /// Person name (full, first, or last).
+    PersonName,
+    /// Date of birth.
+    DateOfBirth,
     /// Government-issued identification number (SSN, SIN, Aadhaar, national ID, etc.).
     GovernmentId,
     /// Tax identification number (ITIN, EIN, TIN, etc.).
@@ -29,44 +34,42 @@ pub enum EntityKind {
     DriversLicense,
     /// Passport number.
     PassportNumber,
+    /// National insurance or social-security equivalent (NI, BSN, AHVN, etc.).
+    NationalInsuranceNumber,
     /// Vehicle identification number (VIN).
     VehicleId,
     /// License plate number.
     LicensePlate,
 
-    // Personal information:
-    /// Person name (full, first, or last).
-    PersonName,
-    /// Date of birth.
-    DateOfBirth,
-    /// Age value.
-    Age,
-    /// Demographic attribute (gender, race/ethnicity, religion, orientation, etc.).
-    Demographic,
-
-    // Contact information:
+    // Contact information
     /// Email address.
     EmailAddress,
     /// Phone number.
     PhoneNumber,
-    /// Physical / mailing address.
+    /// Physical or mailing address.
     Address,
     /// Postal or ZIP code.
     PostalCode,
     /// URL or hyperlink.
     Url,
 
-    // Network & device identifiers:
-    /// IP address (v4 or v6).
-    IpAddress,
-    /// MAC (hardware) address.
-    MacAddress,
-    /// Device identifier (IMEI, IDFA, etc.).
-    DeviceId,
-    /// Username or online handle.
-    Username,
-
-    // Financial:
+    // Demographic
+    /// Age value.
+    Age,
+    /// Gender identity.
+    Gender,
+    /// Racial or ethnic background.
+    Ethnicity,
+    /// Religious affiliation.
+    Religion,
+    /// Nationality.
+    Nationality,
+    /// Citizenship status.
+    Citizenship,
+    /// Language or dialect spoken.
+    Language,
+
+    // Financial
     /// Payment card number (credit or debit).
     PaymentCard,
     /// Payment card security code (CVV/CVC).
@@ -75,26 +78,40 @@ pub enum EntityKind {
     CardExpiry,
     /// Bank account number.
     BankAccount,
-    /// Bank routing / transit number.
+    /// Bank routing or transit number.
     BankRouting,
     /// International Bank Account Number (IBAN).
     Iban,
     /// SWIFT / BIC code.
     SwiftCode,
-    /// Monetary amount.
-    Amount,
     /// Cryptocurrency wallet address.
     CryptoAddress,
+    /// Monetary amount.
+    Amount,
 
-    // Health:
+    // Health
     /// Medical or patient identifier.
     MedicalId,
     /// Insurance policy number.
     InsuranceId,
     /// Prescription number.
     PrescriptionId,
+    /// Medical diagnosis or condition.
+    Diagnosis,
+    /// Drug or medication name in a patient context.
+    Medication,
+
+    // Biometric
+    /// Fingerprint template or minutiae data.
+    Fingerprint,
+    /// Voiceprint or speaker embedding.
+    Voiceprint,
+    /// Retina or iris scan data.
+    RetinaScan,
+    /// Facial geometry or face embedding (not a photo: see [`Face`](Self::Face)).
+    FacialGeometry,
 
-    // Credentials:
+    // Credentials
     /// Password or passphrase.
     Password,
     /// API key.
@@ -104,31 +121,23 @@ pub enum EntityKind {
     /// Private cryptographic key.
     PrivateKey,
 
-    // Biometric:
-    /// Fingerprint template or minutiae data.
-    Fingerprint,
-    /// Voiceprint / speaker embedding.
-    Voiceprint,
-    /// Retina or iris scan data.
-    RetinaScan,
-    /// Facial geometry / face embedding (not a photo — see [`Face`](Self::Face)).
-    FacialGeometry,
+    // Network and device identifiers
+    /// IP address (v4 or v6).
+    IpAddress,
+    /// MAC (hardware) address.
+    MacAddress,
+    /// Device identifier (IMEI, IDFA, etc.).
+    DeviceId,
+    /// Username or online handle.
+    Username,
 
-    // Location:
+    // Location
     /// GPS coordinates (latitude / longitude).
     Coordinates,
     /// Geolocation metadata (EXIF, cell tower, etc.).
     GeolocationMetadata,
 
-    // Dates & times:
-    /// Date and/or time value.
-    DateTime,
-
-    // Organizations:
-    /// Company or organisation name.
-    OrganizationName,
-
-    // Visual / image entities:
+    // Visual
     /// Detected human face in an image.
     Face,
     /// Handwritten text region.
@@ -139,35 +148,54 @@ pub enum EntityKind {
     Logo,
     /// Barcode (1D) or QR code (2D).
     Barcode,
+
+    // Organizational
+    /// Company or institution name.
+    OrganizationName,
+    /// Internal division or department name.
+    DepartmentName,
+    /// Physical facility name (hospital, office, school).
+    FacilityName,
+    /// Legal or administrative case identifier.
+    CaseNumber,
+    /// Internal reference number (invoice, contract, PO, employee number, membership ID).
+    InternalId,
+
+    // Temporal
+    /// Date, time, or datetime value.
+    DateTime,
 }
 
 impl EntityKind {
     /// Returns the [`EntityCategory`] this entity kind belongs to.
     pub fn category(&self) -> EntityCategory {
         match self {
-            // Identity & personal
-            Self::GovernmentId
+            // Personal identity
+            Self::PersonName
+            | Self::DateOfBirth
+            | Self::GovernmentId
             | Self::TaxId
             | Self::DriversLicense
             | Self::PassportNumber
+            | Self::NationalInsuranceNumber
             | Self::VehicleId
-            | Self::LicensePlate
-            | Self::PersonName
-            | Self::DateOfBirth
-            | Self::Age
-            | Self::Demographic => EntityCategory::Pii,
+            | Self::LicensePlate => EntityCategory::PersonalIdentity,
 
             // Contact
             Self::EmailAddress
             | Self::PhoneNumber
             | Self::Address
             | Self::PostalCode
-            | Self::Url => EntityCategory::Pii,
+            | Self::Url => EntityCategory::ContactInfo,
 
-            // Network & device
-            Self::IpAddress | Self::MacAddress | Self::DeviceId | Self::Username => {
-                EntityCategory::Pii
-            }
+            // Demographic
+            Self::Age
+            | Self::Gender
+            | Self::Ethnicity
+            | Self::Religion
+            | Self::Nationality
+            | Self::Citizenship
+            | Self::Language => EntityCategory::Demographic,
 
             // Financial
             Self::PaymentCard
@@ -177,35 +205,50 @@ impl EntityKind {
             | Self::BankRouting
             | Self::Iban
             | Self::SwiftCode
-            | Self::Amount
-            | Self::CryptoAddress => EntityCategory::Financial,
+            | Self::CryptoAddress
+            | Self::Amount => EntityCategory::Financial,
 
             // Health
-            Self::MedicalId | Self::InsuranceId | Self::PrescriptionId => EntityCategory::Phi,
+            Self::MedicalId
+            | Self::InsuranceId
+            | Self::PrescriptionId
+            | Self::Diagnosis
+            | Self::Medication => EntityCategory::Health,
+
+            // Biometric
+            Self::Fingerprint | Self::Voiceprint | Self::RetinaScan | Self::FacialGeometry => {
+                EntityCategory::Biometric
+            }
 
             // Credentials
             Self::Password | Self::ApiKey | Self::AuthToken | Self::PrivateKey => {
                 EntityCategory::Credentials
             }
 
-            // Biometric
-            Self::Fingerprint
-            | Self::Voiceprint
-            | Self::RetinaScan
-            | Self::FacialGeometry
-            | Self::Face => EntityCategory::Biometric,
+            // Network
+            Self::IpAddress | Self::MacAddress | Self::DeviceId | Self::Username => {
+                EntityCategory::NetworkIdentifier
+            }
 
             // Location
-            Self::Coordinates | Self::GeolocationMetadata => EntityCategory::Pii,
-
-            // Dates & times
-            Self::DateTime => EntityCategory::Pii,
+            Self::Coordinates | Self::GeolocationMetadata => EntityCategory::Location,
 
-            // Organizations
-            Self::OrganizationName => EntityCategory::Pii,
+            // Visual
+            Self::Face | Self::Handwriting | Self::Signature | Self::Logo | Self::Barcode => {
+                EntityCategory::Visual
+            }
 
-            // Visual / image
-            Self::Handwriting | Self::Signature | Self::Logo | Self::Barcode => EntityCategory::Pii,
+            // Organizational
+            Self::OrganizationName
+            | Self::DepartmentName
+            | Self::FacilityName
+            | Self::CaseNumber
+            | Self::InternalId => EntityCategory::Organizational,
+
+            // Temporal (grouped under PersonalIdentity: bare dates most
+            // commonly appear alongside personal data and are regulated
+            // as PII by GDPR/CCPA)
+            Self::DateTime => EntityCategory::PersonalIdentity,
         }
     }
 
@@ -215,6 +258,7 @@ impl EntityKind {
             // Critical: irrevocable identifiers, secrets, biometrics
             Self::GovernmentId
             | Self::PassportNumber
+            | Self::NationalInsuranceNumber
             | Self::PaymentCard
             | Self::CardSecurityCode
             | Self::BankAccount
@@ -238,33 +282,46 @@ impl EntityKind {
             | Self::MedicalId
             | Self::InsuranceId
             | Self::PrescriptionId
+            | Self::Diagnosis
+            | Self::Medication
             | Self::Iban
             | Self::CryptoAddress
             | Self::Face
-            | Self::Signature => EntitySensitivity::High,
+            | Self::Signature
+            | Self::Coordinates => EntitySensitivity::High,
 
             // Medium: indirectly identifying
             Self::Age
-            | Self::Demographic
+            | Self::Gender
+            | Self::Ethnicity
+            | Self::Religion
+            | Self::Nationality
+            | Self::Citizenship
+            | Self::Language
             | Self::PostalCode
             | Self::IpAddress
             | Self::MacAddress
             | Self::DeviceId
             | Self::Username
-            | Self::Coordinates
-            | Self::GeolocationMetadata
             | Self::CardExpiry
             | Self::BankRouting
             | Self::SwiftCode
             | Self::VehicleId
             | Self::LicensePlate
+            | Self::GeolocationMetadata
             | Self::DateTime
-            | Self::Handwriting => EntitySensitivity::Medium,
+            | Self::Handwriting
+            | Self::CaseNumber
+            | Self::InternalId => EntitySensitivity::Medium,
 
-            // Low: quasi-public
-            Self::Url | Self::Amount | Self::OrganizationName | Self::Logo | Self::Barcode => {
-                EntitySensitivity::Low
-            }
+            // Low: quasi-public or context-dependent
+            Self::Url
+            | Self::Amount
+            | Self::OrganizationName
+            | Self::DepartmentName
+            | Self::FacilityName
+            | Self::Logo
+            | Self::Barcode => EntitySensitivity::Low,
         }
     }
 }
@@ -302,10 +359,38 @@ mod tests {
     }
 
     #[test]
-    fn category_pii() {
-        assert_eq!(EntityKind::GovernmentId.category(), EntityCategory::Pii);
-        assert_eq!(EntityKind::PersonName.category(), EntityCategory::Pii);
-        assert_eq!(EntityKind::Address.category(), EntityCategory::Pii);
+    fn category_personal_identity() {
+        assert_eq!(
+            EntityKind::GovernmentId.category(),
+            EntityCategory::PersonalIdentity
+        );
+        assert_eq!(
+            EntityKind::PersonName.category(),
+            EntityCategory::PersonalIdentity
+        );
+        assert_eq!(
+            EntityKind::DateOfBirth.category(),
+            EntityCategory::PersonalIdentity
+        );
+    }
+
+    #[test]
+    fn category_contact_info() {
+        assert_eq!(
+            EntityKind::EmailAddress.category(),
+            EntityCategory::ContactInfo
+        );
+        assert_eq!(EntityKind::Address.category(), EntityCategory::ContactInfo);
+    }
+
+    #[test]
+    fn category_demographic() {
+        assert_eq!(EntityKind::Gender.category(), EntityCategory::Demographic);
+        assert_eq!(
+            EntityKind::Ethnicity.category(),
+            EntityCategory::Demographic
+        );
+        assert_eq!(EntityKind::Religion.category(), EntityCategory::Demographic);
     }
 
     #[test]
@@ -318,23 +403,16 @@ mod tests {
     }
 
     #[test]
-    fn category_phi() {
-        assert_eq!(EntityKind::MedicalId.category(), EntityCategory::Phi);
-        assert_eq!(EntityKind::PrescriptionId.category(), EntityCategory::Phi);
+    fn category_health() {
+        assert_eq!(EntityKind::MedicalId.category(), EntityCategory::Health);
+        assert_eq!(EntityKind::Diagnosis.category(), EntityCategory::Health);
+        assert_eq!(EntityKind::Medication.category(), EntityCategory::Health);
     }
 
     #[test]
     fn category_credentials() {
         assert_eq!(EntityKind::Password.category(), EntityCategory::Credentials);
         assert_eq!(EntityKind::ApiKey.category(), EntityCategory::Credentials);
-        assert_eq!(
-            EntityKind::AuthToken.category(),
-            EntityCategory::Credentials
-        );
-        assert_eq!(
-            EntityKind::PrivateKey.category(),
-            EntityCategory::Credentials
-        );
     }
 
     #[test]
@@ -345,11 +423,23 @@ mod tests {
         );
         assert_eq!(EntityKind::Voiceprint.category(), EntityCategory::Biometric);
         assert_eq!(EntityKind::RetinaScan.category(), EntityCategory::Biometric);
+        assert_eq!(EntityKind::Face.category(), EntityCategory::Visual);
+    }
+
+    #[test]
+    fn category_organizational() {
         assert_eq!(
-            EntityKind::FacialGeometry.category(),
-            EntityCategory::Biometric
+            EntityKind::OrganizationName.category(),
+            EntityCategory::Organizational
+        );
+        assert_eq!(
+            EntityKind::CaseNumber.category(),
+            EntityCategory::Organizational
+        );
+        assert_eq!(
+            EntityKind::InternalId.category(),
+            EntityCategory::Organizational
         );
-        assert_eq!(EntityKind::Face.category(), EntityCategory::Biometric);
     }
 
     #[test]
@@ -383,6 +473,7 @@ mod tests {
             EntitySensitivity::High
         );
         assert_eq!(EntityKind::MedicalId.sensitivity(), EntitySensitivity::High);
+        assert_eq!(EntityKind::Diagnosis.sensitivity(), EntitySensitivity::High);
     }
 
     #[test]
diff --git a/crates/nvisy-ontology/src/entity/location/mod.rs b/crates/nvisy-ontology/src/entity/location/mod.rs
index 14a8b196..3bf5cfcf 100644
--- a/crates/nvisy-ontology/src/entity/location/mod.rs
+++ b/crates/nvisy-ontology/src/entity/location/mod.rs
@@ -5,13 +5,14 @@ mod image;
 mod tabular;
 mod text;
 
-pub use audio::AudioLocation;
 use derive_more::From;
-pub use image::ImageLocation;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
-pub use tabular::TabularLocation;
-pub use text::TextLocation;
+
+pub use self::audio::AudioLocation;
+pub use self::image::ImageLocation;
+pub use self::tabular::TabularLocation;
+pub use self::text::TextLocation;
 
 /// A modality-specific location for a detected entity.
 ///
diff --git a/crates/nvisy-ontology/src/entity/method.rs b/crates/nvisy-ontology/src/entity/method.rs
index 028664de..42839d46 100644
--- a/crates/nvisy-ontology/src/entity/method.rs
+++ b/crates/nvisy-ontology/src/entity/method.rs
@@ -1,35 +1,146 @@
-//! Detection method classification.
+//! Extraction, recognition, and refinement method classification.
+//!
+//! These enums form the provenance record for every detected entity,
+//! documenting how content was extracted from its source modality,
+//! how sensitive data was identified, and what post-detection
+//! refinements were applied.
 
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use strum::{Display, EnumString};
 
-/// Method used to detect a sensitive entity.
+/// How content was extracted from its source modality into analyzable form.
+///
+/// Each variant names the technique that transformed raw content
+/// (image pixels, audio samples, binary file formats) into a
+/// representation suitable for entity recognition.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 #[derive(Display, EnumString, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "snake_case")]
 #[strum(serialize_all = "snake_case")]
-pub enum DetectionMethod {
-    /// Regular expression pattern matching.
+pub enum ExtractionMethod {
+    // Text
+    /// Structural parsing of document formats (PDF, DOCX, HTML)
+    /// into text and layout primitives.
+    DocumentParsing,
+    /// Inference of field semantics from column names, types, or
+    /// positional conventions in tabular data.
+    SchemaInference,
+
+    // Image / Video
+    /// Optical character recognition: converts raster text
+    /// (printed or handwritten) into machine-readable characters.
+    OpticalCharacterRecognition,
+    /// Object detection: locates and labels regions of interest
+    /// within an image or video frame.
+    ObjectDetection,
+    /// Scene text detection: localises text embedded in natural
+    /// images (signs, screens, whiteboards) prior to OCR.
+    SceneTextDetection,
+    /// Table extraction: recovers row/column structure from images
+    /// or scanned PDFs, preserving cell relationships that plain
+    /// OCR loses.
+    TableExtraction,
+    /// Document layout analysis: identifies structural regions
+    /// (headers, footers, signature blocks, form fields) by spatial
+    /// arrangement rather than content.
+    LayoutAnalysis,
+    /// Metadata extraction: reads EXIF, PDF properties, or other
+    /// embedded metadata that may contain PII (author, GPS, device info).
+    MetadataExtraction,
+    /// Frame extraction: samples individual frames from video
+    /// streams for downstream image analysis.
+    FrameExtraction,
+
+    // Audio / Video
+    /// Speech-to-text transcription: converts audio into text.
+    Transcription,
+    /// Speaker diarization: segments audio by speaker identity
+    /// to attribute utterances before recognition.
+    Diarization,
+}
+
+/// Technique used to identify a sensitive entity within extracted content.
+///
+/// Each variant names a self-contained recognition strategy.
+/// An entity's `recognition_methods` vector records every technique
+/// that contributed to its identification, ordered by application time.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Display, EnumString, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
+pub enum RecognitionMethod {
+    // Pattern
+    /// Regular expression matching against known PII formats.
     Regex,
-    /// Lookup in a known-value dictionary.
+    /// Mathematical validation of a candidate value
+    /// (Luhn, IBAN mod-97, SSN area rules).
+    Checksum,
+    /// Exact-match lookup in a curated value list.
     Dictionary,
-    /// Named-entity recognition via AI model.
+    /// Co-occurrence analysis: keywords near a candidate raise or
+    /// lower confidence (e.g. "SSN" adjacent to a 9-digit number).
+    ContextualAnalysis,
+    /// Format heuristics: entropy, character distribution, or
+    /// structural cues that suggest a value is sensitive without
+    /// an explicit regex.
+    Heuristic,
+
+    // Model
+    /// Named-entity recognition via language model.
     Ner,
-    /// Contextual NLP analysis (discourse-level understanding).
-    ContextualNlp,
-    /// OCR text extraction with bounding boxes.
-    Ocr,
-    /// Face detection in images.
-    FaceDetection,
-    /// Object detection in images.
-    ObjectDetection,
-    /// Entity detection from speech transcription.
-    SpeechTranscript,
-    /// Speaker-identified audio segment for redaction.
-    SpeakerRedaction,
-    /// Multiple methods combined to produce a single detection.
-    Composite,
-    /// User-provided annotations.
+    /// Document or field-level classification
+    /// (e.g. "this column contains SSNs").
+    Classification,
+    /// Semantic similarity search via vector embeddings.
+    Embedding,
+    /// Matching extracted values against an external identity or
+    /// record database.
+    CrossReference,
+
+    // Biometric
+    /// Biometric identification: face recognition, voiceprint
+    /// matching, or other physiological/behavioral trait analysis.
+    Biometric,
+
+    // Human
+    /// User-provided annotation.
     Manual,
 }
+
+/// Post-detection refinement applied to an entity before final output.
+///
+/// Refinement methods do not discover new entities: they adjust
+/// confidence, merge duplicates, or verify existing detections.
+/// Recorded on the entity to explain why its final state may differ
+/// from the initial detection.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Display, EnumString, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
+pub enum RefinementMethod {
+    /// Cross-detector deduplication: merges overlapping entities
+    /// from independent detectors, combining their confidence and
+    /// attribution.
+    Deduplication,
+    /// Ensemble fusion: combines confidence scores from multiple
+    /// detectors using a voting or averaging strategy.
+    EnsembleFusion,
+    /// Model-based verification: a secondary model (typically VLM)
+    /// reviews detections against source content to confirm, correct,
+    /// or reject.
+    ModelVerification,
+    /// Policy evaluation: applies business rules, thresholds, or
+    /// per-category overrides to filter or re-score detections.
+    PolicyEvaluation,
+    /// Human review: a reviewer confirmed, corrected, or rejected
+    /// the detection.
+    HumanReview,
+    /// Confidence calibration: adjusts raw model scores to align
+    /// with empirical precision targets.
+    ConfidenceCalibration,
+    /// Contextual promotion/demotion: surrounding document context
+    /// upgrades or downgrades an entity's confidence after initial
+    /// detection.
+    ContextualAdjustment,
+}
diff --git a/crates/nvisy-ontology/src/entity/mod.rs b/crates/nvisy-ontology/src/entity/mod.rs
index 84c9404f..6fa6f6e8 100644
--- a/crates/nvisy-ontology/src/entity/mod.rs
+++ b/crates/nvisy-ontology/src/entity/mod.rs
@@ -13,20 +13,21 @@ mod model;
 mod output;
 mod sensitivity;
 
-pub use annotation::{Annotation, AnnotationKind, AnnotationLabel, AnnotationScope};
-pub use category::EntityCategory;
 use derive_more::{Deref, DerefMut, From, IntoIterator};
-pub use kind::EntityKind;
-pub use location::{AudioLocation, ImageLocation, Location, TabularLocation, TextLocation};
-pub use method::DetectionMethod;
-pub use model::{ModelInfo, ModelKind};
 use nvisy_core::content::ContentSource;
-pub use output::DetectionOutput;
 use schemars::JsonSchema;
-pub use sensitivity::EntitySensitivity;
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;
 
+pub use self::annotation::{Annotation, AnnotationKind, AnnotationLabel, AnnotationScope};
+pub use self::category::EntityCategory;
+pub use self::kind::EntityKind;
+pub use self::location::{AudioLocation, ImageLocation, Location, TabularLocation, TextLocation};
+pub use self::method::{ExtractionMethod, RecognitionMethod, RefinementMethod};
+pub use self::model::{ModelInfo, ModelKind};
+pub use self::output::DetectionOutput;
+pub use self::sensitivity::EntitySensitivity;
+
 /// A detected sensitive data occurrence within a document.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "camelCase")]
@@ -40,8 +41,14 @@ pub struct Entity {
     pub entity_kind: EntityKind,
     /// The matched text or value.
     pub value: String,
-    /// How this entity was detected.
-    pub detection_method: DetectionMethod,
+    /// How content was extracted from its source modality, ordered by application time.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub extraction_methods: Vec<ExtractionMethod>,
+    /// Techniques used to identify this entity, ordered by application time.
+    pub recognition_methods: Vec<RecognitionMethod>,
+    /// Post-detection refinements applied to this entity, ordered by application time.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub refinement_methods: Vec<RefinementMethod>,
     /// Detection confidence score in the range `[0.0, 1.0]`.
     pub confidence: f64,
     /// Modality-specific location of the entity.
@@ -61,12 +68,15 @@ impl Entity {
         self.source.as_uuid()
     }
 
-    /// Create a new entity with the given detection details.
+    /// Create a new entity with the given recognition method and confidence.
+    ///
+    /// The `category` is derived from `entity_kind` via
+    /// [`EntityKind::category()`] when not supplied explicitly.
     pub fn new(
         category: EntityCategory,
         entity_kind: EntityKind,
         value: impl Into<String>,
-        detection_method: DetectionMethod,
+        recognition_method: RecognitionMethod,
         confidence: f64,
     ) -> Self {
         Self {
@@ -74,7 +84,9 @@ impl Entity {
             category,
             entity_kind,
             value: value.into(),
-            detection_method,
+            extraction_methods: Vec::new(),
+            recognition_methods: vec![recognition_method],
+            refinement_methods: Vec::new(),
             confidence,
             location: None,
             language: None,
@@ -82,6 +94,22 @@ impl Entity {
         }
     }
 
+    /// Create a new entity, deriving the category from the entity kind.
+    pub fn from_kind(
+        entity_kind: EntityKind,
+        value: impl Into<String>,
+        recognition_method: RecognitionMethod,
+        confidence: f64,
+    ) -> Self {
+        Self::new(
+            entity_kind.category(),
+            entity_kind,
+            value,
+            recognition_method,
+            confidence,
+        )
+    }
+
     /// Set the modality-specific location on this entity.
     pub fn with_location(mut self, location: Location) -> Self {
         self.location = Some(location);
@@ -151,11 +179,20 @@ impl Entities {
             .collect()
     }
 
-    /// Retain only entities matching the given detection method.
-    pub fn by_method(&self, method: DetectionMethod) -> Self {
+    /// Retain only entities that were recognised (at least partly) by the given method.
+    pub fn by_recognition_method(&self, method: RecognitionMethod) -> Self {
+        self.0
+            .iter()
+            .filter(|e| e.recognition_methods.contains(&method))
+            .cloned()
+            .collect()
+    }
+
+    /// Retain only entities whose content was extracted by the given method.
+    pub fn by_extraction_method(&self, method: ExtractionMethod) -> Self {
         self.0
             .iter()
-            .filter(|e| e.detection_method == method)
+            .filter(|e| e.extraction_methods.contains(&method))
             .cloned()
             .collect()
     }
diff --git a/crates/nvisy-ontology/src/policy/mod.rs b/crates/nvisy-ontology/src/policy/mod.rs
index 39696ffc..de53365f 100644
--- a/crates/nvisy-ontology/src/policy/mod.rs
+++ b/crates/nvisy-ontology/src/policy/mod.rs
@@ -7,9 +7,9 @@ mod strategy;
 mod summary;
 mod types;
 
-pub use retention::{Retention, RetentionPolicy, RetentionScope};
-pub use rule::{PolicyRule, RuleAction, RuleCondition};
-pub use selector::EntitySelector;
-pub use strategy::{AudioStrategy, ImageStrategy, Strategy, TextStrategy};
-pub use summary::RedactionSummary;
-pub use types::{Policies, Policy};
+pub use self::retention::{Retention, RetentionPolicy, RetentionScope};
+pub use self::rule::{PolicyRule, RuleAction, RuleCondition};
+pub use self::selector::EntitySelector;
+pub use self::strategy::{AudioStrategy, ImageStrategy, Strategy, TextStrategy};
+pub use self::summary::RedactionSummary;
+pub use self::types::{Policies, Policy};
diff --git a/crates/nvisy-ontology/src/policy/strategy/mod.rs b/crates/nvisy-ontology/src/policy/strategy/mod.rs
index a43dd0e1..a75968c8 100644
--- a/crates/nvisy-ontology/src/policy/strategy/mod.rs
+++ b/crates/nvisy-ontology/src/policy/strategy/mod.rs
@@ -9,12 +9,13 @@ mod audio;
 mod image;
 mod text;
 
-pub use audio::AudioStrategy;
 use derive_more::From;
-pub use image::ImageStrategy;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
-pub use text::TextStrategy;
+
+pub use self::audio::AudioStrategy;
+pub use self::image::ImageStrategy;
+pub use self::text::TextStrategy;
 
 /// Unified redaction strategy across all modalities.
 ///
diff --git a/crates/nvisy-ontology/src/prelude.rs b/crates/nvisy-ontology/src/prelude.rs
index 1d2b4428..92a1eab4 100644
--- a/crates/nvisy-ontology/src/prelude.rs
+++ b/crates/nvisy-ontology/src/prelude.rs
@@ -2,7 +2,7 @@
 
 pub use crate::context::{Context, ContextEntry, ContextEntryData};
 pub use crate::entity::{
-    Annotation, AnnotationKind, DetectionMethod, DetectionOutput, Entities, Entity, EntityCategory,
-    EntityKind, EntitySensitivity, Location,
+    Annotation, AnnotationKind, DetectionOutput, Entities, Entity, EntityCategory, EntityKind,
+    EntitySensitivity, ExtractionMethod, Location, RecognitionMethod, RefinementMethod,
 };
 pub use crate::policy::{Policies, Policy, PolicyRule, Strategy};
diff --git a/crates/nvisy-pattern/Cargo.toml b/crates/nvisy-pattern/Cargo.toml
index 18ebacbb..34ce0493 100644
--- a/crates/nvisy-pattern/Cargo.toml
+++ b/crates/nvisy-pattern/Cargo.toml
@@ -44,3 +44,6 @@ aho-corasick = { workspace = true, features = [] }
 
 # Observability
 tracing = { workspace = true, features = [] }
+
+[dev-dependencies]
+tempfile = { workspace = true }
diff --git a/crates/nvisy-pattern/README.md b/crates/nvisy-pattern/README.md
index ee38370d..139c173d 100644
--- a/crates/nvisy-pattern/README.md
+++ b/crates/nvisy-pattern/README.md
@@ -17,7 +17,7 @@ Detection runs in three phases:
    dictionary are injected as synthetic matches with confidence `1.0`.
 
 Allow-list filtering is applied inline during phases 1 and 2. All three phases
-feed into a unified `Vec<PatternMatch>`.
+feed into a unified `Vec<RawMatch>`.
 
 ### Pattern JSON schema
 
@@ -27,7 +27,7 @@ Patterns are JSON definition files embedded at compile time from
 ```json
 {
   "name": "ssn",
-  "category": "pii",
+  "category": "personal_identity",
   "entity_type": "government_id",
   "pattern": {
     "regex": "\\b(\\d{3})-(\\d{2})-(\\d{4})\\b",
@@ -56,15 +56,15 @@ Patterns are JSON definition files embedded at compile time from
 
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
-| `regex` | string | — | Regular expression string |
-| `validator` | string | — | Post-match validator name resolved via `ValidatorResolver` |
+| `regex` | string | required | Regular expression string |
+| `validator` | string | none | Post-match validator name resolved via `ValidatorResolver` |
 | `case_sensitive` | bool | `false` | Whether matching is case-sensitive |
 
 ### `dictionary` object (dictionary match source)
 
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
-| `name` | string | — | Named dictionary from `DictionaryRegistry` |
+| `name` | string | required | Named dictionary from `DictionaryRegistry` |
 | `case_sensitive` | bool | `false` | Whether matching is case-sensitive |
 
 ### Context rule (co-occurrence scoring)
@@ -76,7 +76,7 @@ increased by `boost`, clamped to `[0.0, 1.0]`.
 
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
-| `keywords` | string[] | — | Strings to search for in nearby spans |
+| `keywords` | string[] | required | Strings to search for in nearby spans |
 | `window` | int | `3` | Number of spans before/after the match to examine |
 | `boost` | float | `0.1` | Confidence increase when a keyword is found |
 | `case_sensitive` | bool | `false` | Whether keyword matching is case-sensitive |
@@ -88,36 +88,37 @@ adjacent spans.
 
 ## Allow/deny lists
 
-The `PatternEngineBuilder` supports exact-match allow and deny lists via the
-[`AllowList`] and [`DenyList`] types:
+Allow and deny lists are configured per-scan via [`ScanContext`], not on the
+engine itself:
 
 ```rust,ignore
-let allow = AllowList::new()
-    .with("123-45-6789")             // suppress known test SSN
-    .with("000-00-0000");
-
-let deny = DenyList::new()
-    .with("John Doe", EntityCategory::Pii, EntityKind::PersonName);
-
-let engine = PatternEngine::builder()
-    .with_allow(allow)
-    .with_deny(deny)
-    .build()?;
+use nvisy_pattern::prelude::*;
+use nvisy_ontology::entity::{EntityCategory, EntityKind, RecognitionMethod};
+
+let ctx = ScanContext::new()
+    .with_allow(AllowList::new()
+        .with("123-45-6789")         // suppress known test SSN
+        .with("000-00-0000"))
+    .with_deny(DenyList::new()
+        .with("John Doe", DenyRule {
+            category: EntityCategory::PersonalIdentity,
+            entity_kind: EntityKind::PersonName,
+            method: RecognitionMethod::Ner,
+        }));
+
+let matches = PatternEngine::instance().scan_text("...", &ctx);
 ```
 
 - **Allow list** (`AllowList`): matched values that appear in the allow list
   are silently dropped during `scan_text`.
 - **Deny list** (`DenyList`): if a deny-list value is found in the text but
   was not matched by any regex or dictionary pattern, it is injected as a
-  synthetic `PatternMatch` with confidence `1.0` and source
-  `DetectionSource::DenyList`.
-
-Both types implement `FromIterator` for easy construction from iterators.
+  synthetic `RawMatch` with confidence `1.0` and `pattern_name: None`.
 
 ## Validators
 
 Validators are post-match checks resolved by name through `ValidatorResolver`.
-Regex patterns reference a validator by name in their `pattern.validator` field;
+Regex patterns reference a validator by name in their `pattern.validator` field:
 the engine runs the validator on each raw match and drops values that fail.
 
 ## Documentation
diff --git a/crates/nvisy-pattern/assets/patterns/date_of_birth.json b/crates/nvisy-pattern/assets/patterns/date_of_birth.json
index 26ecd523..18ff6058 100644
--- a/crates/nvisy-pattern/assets/patterns/date_of_birth.json
+++ b/crates/nvisy-pattern/assets/patterns/date_of_birth.json
@@ -1,6 +1,6 @@
 {
   "name": "date-of-birth",
-  "category": "pii",
+  "category": "personal_identity",
   "entity_type": "date_of_birth",
   "pattern": {
     "regex": "\\b(?:0[1-9]|1[0-2])[/\\-](?:0[1-9]|[12]\\d|3[01])[/\\-](?:19|20)\\d{2}\\b",
diff --git a/crates/nvisy-pattern/assets/patterns/email.json b/crates/nvisy-pattern/assets/patterns/email.json
index 87485389..eee1fb7f 100644
--- a/crates/nvisy-pattern/assets/patterns/email.json
+++ b/crates/nvisy-pattern/assets/patterns/email.json
@@ -1,6 +1,6 @@
 {
   "name": "email",
-  "category": "pii",
+  "category": "contact_info",
   "entity_type": "email_address",
   "pattern": {
     "regex": "\\b[a-zA-Z0-9._%+\\-]+@[a-zA-Z0-9.\\-]+\\.[a-zA-Z]{2,}\\b",
diff --git a/crates/nvisy-pattern/assets/patterns/ipv4.json b/crates/nvisy-pattern/assets/patterns/ipv4.json
index 971ccd91..c6358231 100644
--- a/crates/nvisy-pattern/assets/patterns/ipv4.json
+++ b/crates/nvisy-pattern/assets/patterns/ipv4.json
@@ -1,6 +1,6 @@
 {
   "name": "ipv4",
-  "category": "pii",
+  "category": "network_identifier",
   "entity_type": "ip_address",
   "pattern": {
     "regex": "\\b(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\b",
diff --git a/crates/nvisy-pattern/assets/patterns/ipv6.json b/crates/nvisy-pattern/assets/patterns/ipv6.json
index ce096fd2..82e7f20c 100644
--- a/crates/nvisy-pattern/assets/patterns/ipv6.json
+++ b/crates/nvisy-pattern/assets/patterns/ipv6.json
@@ -1,6 +1,6 @@
 {
   "name": "ipv6",
-  "category": "pii",
+  "category": "network_identifier",
   "entity_type": "ip_address",
   "pattern": {
     "regex": "\\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b|(?:[0-9a-fA-F]{1,4}:){1,7}:|::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\\b",
diff --git a/crates/nvisy-pattern/assets/patterns/languages.json b/crates/nvisy-pattern/assets/patterns/languages.json
index 5d460af0..9b4d52e5 100644
--- a/crates/nvisy-pattern/assets/patterns/languages.json
+++ b/crates/nvisy-pattern/assets/patterns/languages.json
@@ -1,7 +1,7 @@
 {
   "name": "languages",
-  "category": "pii",
-  "entity_type": "demographic",
+  "category": "demographic",
+  "entity_type": "language",
   "dictionary": {
     "name": "languages",
     "confidence": [0.85, 0.45]
diff --git a/crates/nvisy-pattern/assets/patterns/mac_address.json b/crates/nvisy-pattern/assets/patterns/mac_address.json
index fd8fe8eb..8d62b60f 100644
--- a/crates/nvisy-pattern/assets/patterns/mac_address.json
+++ b/crates/nvisy-pattern/assets/patterns/mac_address.json
@@ -1,6 +1,6 @@
 {
   "name": "mac-address",
-  "category": "pii",
+  "category": "network_identifier",
   "entity_type": "mac_address",
   "pattern": {
     "regex": "\\b(?:[0-9A-Fa-f]{2}[:\\-]){5}[0-9A-Fa-f]{2}\\b",
diff --git a/crates/nvisy-pattern/assets/patterns/nationalities.json b/crates/nvisy-pattern/assets/patterns/nationalities.json
index a32593c0..bec7c867 100644
--- a/crates/nvisy-pattern/assets/patterns/nationalities.json
+++ b/crates/nvisy-pattern/assets/patterns/nationalities.json
@@ -1,7 +1,7 @@
 {
   "name": "nationalities",
-  "category": "pii",
-  "entity_type": "demographic",
+  "category": "demographic",
+  "entity_type": "nationality",
   "dictionary": {
     "name": "nationalities",
     "confidence": 0.85
diff --git a/crates/nvisy-pattern/assets/patterns/phone.json b/crates/nvisy-pattern/assets/patterns/phone.json
index 5380e94d..484cc5b5 100644
--- a/crates/nvisy-pattern/assets/patterns/phone.json
+++ b/crates/nvisy-pattern/assets/patterns/phone.json
@@ -1,6 +1,6 @@
 {
   "name": "phone",
-  "category": "pii",
+  "category": "contact_info",
   "entity_type": "phone_number",
   "pattern": {
     "regex": "(?:\\+\\d{1,3}[\\s.\\-]?)?\\(?\\d{2,4}\\)?[\\s.\\-]?\\d{3,4}[\\s.\\-]?\\d{4}\\b",
diff --git a/crates/nvisy-pattern/assets/patterns/religions.json b/crates/nvisy-pattern/assets/patterns/religions.json
index bb3d2f2b..cf038496 100644
--- a/crates/nvisy-pattern/assets/patterns/religions.json
+++ b/crates/nvisy-pattern/assets/patterns/religions.json
@@ -1,7 +1,7 @@
 {
   "name": "religions",
-  "category": "pii",
-  "entity_type": "demographic",
+  "category": "demographic",
+  "entity_type": "religion",
   "dictionary": {
     "name": "religions",
     "confidence": 0.85
diff --git a/crates/nvisy-pattern/assets/patterns/ssn.json b/crates/nvisy-pattern/assets/patterns/ssn.json
index 12aeb750..21c887f5 100644
--- a/crates/nvisy-pattern/assets/patterns/ssn.json
+++ b/crates/nvisy-pattern/assets/patterns/ssn.json
@@ -1,6 +1,6 @@
 {
   "name": "ssn",
-  "category": "pii",
+  "category": "personal_identity",
   "entity_type": "government_id",
   "pattern": {
     "regex": "\\b(\\d{3})-(\\d{2})-(\\d{4})\\b",
diff --git a/crates/nvisy-pattern/assets/patterns/url.json b/crates/nvisy-pattern/assets/patterns/url.json
index d7bebc56..6e9907fa 100644
--- a/crates/nvisy-pattern/assets/patterns/url.json
+++ b/crates/nvisy-pattern/assets/patterns/url.json
@@ -1,6 +1,6 @@
 {
   "name": "url",
-  "category": "pii",
+  "category": "contact_info",
   "entity_type": "url",
   "pattern": {
     "regex": "\\bhttps?://[^\\s/$.?#][^\\s]*\\b",
diff --git a/crates/nvisy-pattern/assets/patterns/us_drivers_license.json b/crates/nvisy-pattern/assets/patterns/us_drivers_license.json
index 1c1709a4..fc39bdcf 100644
--- a/crates/nvisy-pattern/assets/patterns/us_drivers_license.json
+++ b/crates/nvisy-pattern/assets/patterns/us_drivers_license.json
@@ -1,6 +1,6 @@
 {
   "name": "us-drivers-license",
-  "category": "pii",
+  "category": "personal_identity",
   "entity_type": "drivers_license",
   "pattern": {
     "regex": "\\b[A-Z]\\d{3}-\\d{4}-\\d{4}\\b",
diff --git a/crates/nvisy-pattern/assets/patterns/us_passport.json b/crates/nvisy-pattern/assets/patterns/us_passport.json
index bf055a8f..0e0c5c57 100644
--- a/crates/nvisy-pattern/assets/patterns/us_passport.json
+++ b/crates/nvisy-pattern/assets/patterns/us_passport.json
@@ -1,6 +1,6 @@
 {
   "name": "us-passport",
-  "category": "pii",
+  "category": "personal_identity",
   "entity_type": "passport_number",
   "pattern": {
     "regex": "\\b[A-Z]\\d{8}\\b",
diff --git a/crates/nvisy-pattern/assets/patterns/us_postal_code.json b/crates/nvisy-pattern/assets/patterns/us_postal_code.json
index b6269565..33c7c8d0 100644
--- a/crates/nvisy-pattern/assets/patterns/us_postal_code.json
+++ b/crates/nvisy-pattern/assets/patterns/us_postal_code.json
@@ -1,6 +1,6 @@
 {
   "name": "us-postal-code",
-  "category": "pii",
+  "category": "contact_info",
   "entity_type": "postal_code",
   "pattern": {
     "regex": "\\b\\d{5}(?:-\\d{4})?\\b",
diff --git a/crates/nvisy-pattern/src/dictionaries/csv_dictionary.rs b/crates/nvisy-pattern/src/dictionaries/csv_dictionary.rs
index 96e55a98..e2baa979 100644
--- a/crates/nvisy-pattern/src/dictionaries/csv_dictionary.rs
+++ b/crates/nvisy-pattern/src/dictionaries/csv_dictionary.rs
@@ -1,17 +1,20 @@
-//! CSV dictionary: one row per entity, each cell is a matchable variant.
+//! CSV dictionary: one row per entity, each cell becomes a matchable variant.
 
-use super::Dictionary;
+use std::path::Path;
+
+use super::{CsvDictionaryError, Dictionary, DictionaryLoadError, DictionaryTerm};
 
 /// A dictionary parsed from a CSV file.
 ///
 /// Each row may contain multiple columns (e.g. name, symbol, code).
-/// Every non-empty cell becomes a matchable term.
-#[derive(Debug, Clone)]
+/// Every non-empty cell becomes a matchable term whose [`column`]
+/// records which CSV column it came from.
+///
+/// [`column`]: DictionaryTerm::column
+#[derive(Debug)]
 pub struct CsvDictionary {
     name: String,
-    entries: Vec<String>,
-    /// Source column index for each entry (parallel to `entries`).
-    columns: Vec<usize>,
+    terms: Vec<DictionaryTerm>,
 }
 
 impl CsvDictionary {
@@ -21,11 +24,14 @@ impl CsvDictionary {
     /// `text` is the CSV content where each non-empty cell becomes a matchable term.
     /// The column index of each cell is preserved so that per-column confidence
     /// scores can be applied at detection time.
-    pub fn new(name: impl Into<String>, text: &str) -> Self {
+    ///
+    /// # Errors
+    ///
+    /// Returns [`CsvDictionaryError`] if any CSV record cannot be parsed.
+    pub fn new(name: impl Into<String>, text: &str) -> Result<Self, CsvDictionaryError> {
         let name = name.into();
 
-        let mut entries = Vec::new();
-        let mut columns = Vec::new();
+        let mut terms = Vec::new();
         let mut reader = csv::ReaderBuilder::new()
             .has_headers(false)
             .flexible(true)
@@ -33,21 +39,46 @@ impl CsvDictionary {
             .from_reader(text.as_bytes());
 
         for result in reader.records() {
-            let record = result.expect("failed to parse CSV record");
+            let record = result.map_err(|source| CsvDictionaryError {
+                name: name.clone(),
+                source,
+            })?;
             for (col, field) in record.iter().enumerate() {
-                let trimmed = field.trim();
-                if !trimmed.is_empty() {
-                    entries.push(trimmed.to_owned());
-                    columns.push(col);
+                if !field.is_empty() {
+                    terms.push(DictionaryTerm {
+                        value: field.to_owned(),
+                        column: Some(col as u32),
+                    });
                 }
             }
         }
 
-        Self {
-            name,
-            entries,
-            columns,
-        }
+        Ok(Self { name, terms })
+    }
+
+    /// Load a CSV dictionary from a file path.
+    ///
+    /// The dictionary name is derived from the file stem.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`DictionaryLoadError`] if the file cannot be read or
+    /// the CSV content cannot be parsed.
+    pub fn from_path(path: impl AsRef<Path>) -> Result<Self, DictionaryLoadError> {
+        let path = path.as_ref();
+        let name = path
+            .file_stem()
+            .and_then(|s| s.to_str())
+            .unwrap_or_default();
+        let text =
+            std::fs::read_to_string(path).map_err(|source| DictionaryLoadError::ReadFile {
+                path: path.to_owned(),
+                source,
+            })?;
+        Self::new(name, &text).map_err(|source| DictionaryLoadError::CsvParse {
+            path: path.to_owned(),
+            source,
+        })
     }
 }
 
@@ -56,12 +87,8 @@ impl Dictionary for CsvDictionary {
         &self.name
     }
 
-    fn entries(&self) -> &[String] {
-        &self.entries
-    }
-
-    fn columns(&self) -> Option<&[usize]> {
-        Some(&self.columns)
+    fn terms(&self) -> &[DictionaryTerm] {
+        &self.terms
     }
 }
 
@@ -71,20 +98,31 @@ mod tests {
 
     #[test]
     fn parses_rows_with_variants() {
-        let dict = CsvDictionary::new("test", "US Dollar,USD\nEuro,EUR\n");
+        let dict = CsvDictionary::new("test", "US Dollar,USD\nEuro,EUR\n").unwrap();
         assert_eq!(dict.name(), "test");
-        assert_eq!(dict.entries(), &["US Dollar", "USD", "Euro", "EUR"]);
+
+        let values: Vec<&str> = dict.terms().iter().map(|t| t.value.as_str()).collect();
+        assert_eq!(values, &["US Dollar", "USD", "Euro", "EUR"]);
     }
 
     #[test]
     fn handles_variable_columns() {
-        let dict = CsvDictionary::new("test", "a,b,c\nd,e\n");
-        assert_eq!(dict.entries(), &["a", "b", "c", "d", "e"]);
+        let dict = CsvDictionary::new("test", "a,b,c\nd,e\n").unwrap();
+        let values: Vec<&str> = dict.terms().iter().map(|t| t.value.as_str()).collect();
+        assert_eq!(values, &["a", "b", "c", "d", "e"]);
     }
 
     #[test]
     fn skips_empty_fields() {
-        let dict = CsvDictionary::new("test", "a,,b\n");
-        assert_eq!(dict.entries(), &["a", "b"]);
+        let dict = CsvDictionary::new("test", "a,,b\n").unwrap();
+        let values: Vec<&str> = dict.terms().iter().map(|t| t.value.as_str()).collect();
+        assert_eq!(values, &["a", "b"]);
+    }
+
+    #[test]
+    fn column_indices_are_tracked() {
+        let dict = CsvDictionary::new("test", "a,b,c\nd,e\n").unwrap();
+        let columns: Vec<Option<u32>> = dict.terms().iter().map(|t| t.column).collect();
+        assert_eq!(columns, &[Some(0), Some(1), Some(2), Some(0), Some(1)]);
     }
 }
diff --git a/crates/nvisy-pattern/src/dictionaries/csv_error.rs b/crates/nvisy-pattern/src/dictionaries/csv_error.rs
new file mode 100644
index 00000000..9db84f9c
--- /dev/null
+++ b/crates/nvisy-pattern/src/dictionaries/csv_error.rs
@@ -0,0 +1,19 @@
+//! Error type for CSV dictionary parsing.
+
+use nvisy_core::{Error, ErrorKind};
+
+/// Error returned when a CSV dictionary cannot be parsed.
+#[derive(Debug, thiserror::Error)]
+#[error("failed to parse CSV record in dictionary '{name}': {source}")]
+pub struct CsvDictionaryError {
+    pub(crate) name: String,
+    pub(crate) source: csv::Error,
+}
+
+impl From<CsvDictionaryError> for Error {
+    fn from(err: CsvDictionaryError) -> Self {
+        Error::new(ErrorKind::Validation, err.to_string())
+            .with_component("nvisy-pattern::dictionaries")
+            .with_source(err)
+    }
+}
diff --git a/crates/nvisy-pattern/src/dictionaries/dictionary.rs b/crates/nvisy-pattern/src/dictionaries/dictionary.rs
index 8edc63f4..e32f882b 100644
--- a/crates/nvisy-pattern/src/dictionaries/dictionary.rs
+++ b/crates/nvisy-pattern/src/dictionaries/dictionary.rs
@@ -1,12 +1,27 @@
-//! Core [`Dictionary`] trait and [`BoxDictionary`] alias.
+//! Core [`Dictionary`] trait, [`DictionaryTerm`], and [`BoxDictionary`] type alias.
+
+/// A single matchable term within a [`Dictionary`].
+///
+/// Each term carries its matched value and, for multi-column sources like
+/// CSV files, the column index it originated from. Plain-text dictionaries
+/// leave `column` as `None` (logically equivalent to column 0).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct DictionaryTerm {
+    /// The matchable text value.
+    pub value: String,
+    /// Source column index for CSV dictionaries.
+    ///
+    /// `None` for plain-text dictionaries where column position is
+    /// not meaningful.
+    pub column: Option<u32>,
+}
 
 /// A named collection of matchable terms (e.g. nationalities, currencies).
 ///
-/// Implementors load their entries from an asset file at compile time.
 /// Two built-in implementations are provided:
 ///
 /// - [`TxtDictionary`]: plain text, one entry per line.
-/// - [`CsvDictionary`]: CSV, each cell is a term.
+/// - [`CsvDictionary`]: CSV, each cell is a term with its column index.
 ///
 /// [`TxtDictionary`]: super::TxtDictionary
 /// [`CsvDictionary`]: super::CsvDictionary
@@ -15,16 +30,7 @@ pub trait Dictionary: Send + Sync {
     fn name(&self) -> &str;
 
     /// All matchable terms produced by this dictionary.
-    fn entries(&self) -> &[String];
-
-    /// Column index for each entry, parallel to [`entries`](Self::entries).
-    ///
-    /// Returns `Some` for CSV dictionaries where each cell tracks its
-    /// source column. Returns `None` for plain-text dictionaries (all
-    /// entries are logically in column 0).
-    fn columns(&self) -> Option<&[usize]> {
-        None
-    }
+    fn terms(&self) -> &[DictionaryTerm];
 }
 
 /// Type-erased boxed [`Dictionary`].
diff --git a/crates/nvisy-pattern/src/dictionaries/dictionary_error.rs b/crates/nvisy-pattern/src/dictionaries/dictionary_error.rs
new file mode 100644
index 00000000..dc34f40b
--- /dev/null
+++ b/crates/nvisy-pattern/src/dictionaries/dictionary_error.rs
@@ -0,0 +1,42 @@
+//! Error type for dictionary filesystem loading.
+
+use nvisy_core::{Error, ErrorKind};
+
+use super::CsvDictionaryError;
+
+/// Error returned when loading dictionaries from the filesystem.
+#[derive(Debug, thiserror::Error)]
+pub enum DictionaryLoadError {
+    /// The directory could not be read.
+    #[error("failed to read dictionary directory '{}': {source}", path.display())]
+    ReadDir {
+        path: std::path::PathBuf,
+        source: std::io::Error,
+    },
+    /// A dictionary file could not be read.
+    #[error("failed to read dictionary file '{}': {source}", path.display())]
+    ReadFile {
+        path: std::path::PathBuf,
+        source: std::io::Error,
+    },
+    /// A CSV dictionary file failed to parse.
+    #[error("failed to parse CSV dictionary '{}': {source}", path.display())]
+    CsvParse {
+        path: std::path::PathBuf,
+        source: CsvDictionaryError,
+    },
+}
+
+impl From<DictionaryLoadError> for Error {
+    fn from(err: DictionaryLoadError) -> Self {
+        let kind = match &err {
+            DictionaryLoadError::ReadDir { .. } | DictionaryLoadError::ReadFile { .. } => {
+                ErrorKind::Internal
+            }
+            DictionaryLoadError::CsvParse { .. } => ErrorKind::Validation,
+        };
+        Error::new(kind, err.to_string())
+            .with_component("nvisy-pattern::dictionaries")
+            .with_source(err)
+    }
+}
diff --git a/crates/nvisy-pattern/src/dictionaries/dictionary_registry.rs b/crates/nvisy-pattern/src/dictionaries/dictionary_registry.rs
new file mode 100644
index 00000000..27c53185
--- /dev/null
+++ b/crates/nvisy-pattern/src/dictionaries/dictionary_registry.rs
@@ -0,0 +1,326 @@
+//! [`DictionaryRegistry`]: named dictionary collection with O(log n) lookup.
+
+use std::collections::BTreeMap;
+use std::path::Path;
+use std::sync::LazyLock;
+
+use include_dir::{Dir, include_dir};
+
+use super::{BoxDictionary, CsvDictionary, Dictionary, DictionaryLoadError, TxtDictionary};
+
+const TARGET: &str = "nvisy_pattern::dictionaries";
+
+/// A registry of named [`Dictionary`] instances with O(log n) lookup.
+///
+/// Use [`load_builtins`] to create a registry pre-populated with
+/// the compile-time-embedded dictionary files, or [`load_dir`] to
+/// load from a filesystem directory at runtime.
+///
+/// [`load_builtins`]: Self::load_builtins
+/// [`load_dir`]: Self::load_dir
+#[derive(Default)]
+pub struct DictionaryRegistry {
+    inner: BTreeMap<String, BoxDictionary>,
+}
+
+impl std::fmt::Debug for DictionaryRegistry {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let names: Vec<&str> = self.inner.keys().map(|s| s.as_str()).collect();
+        f.debug_struct("DictionaryRegistry")
+            .field("len", &self.inner.len())
+            .field("names", &names)
+            .finish()
+    }
+}
+
+impl DictionaryRegistry {
+    /// Create an empty registry.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Insert a dictionary, keyed by its [`Dictionary::name`].
+    pub fn insert(&mut self, dict: BoxDictionary) {
+        let name = dict.name().to_owned();
+        self.inner.insert(name, dict);
+    }
+
+    /// Look up a dictionary by name.
+    #[must_use]
+    pub fn get(&self, name: &str) -> Option<&dyn Dictionary> {
+        self.inner.get(name).map(|b| b.as_ref())
+    }
+
+    /// Iterate over all registered dictionaries as `(name, &dyn Dictionary)` pairs.
+    pub fn iter(&self) -> impl Iterator<Item = (&str, &dyn Dictionary)> {
+        self.inner.iter().map(|(k, v)| (k.as_str(), v.as_ref()))
+    }
+
+    /// Iterate over all registered dictionary names.
+    pub fn names(&self) -> impl Iterator<Item = &str> {
+        self.inner.keys().map(|s| s.as_str())
+    }
+
+    /// Total number of registered dictionaries.
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    /// Whether the registry contains no dictionaries.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
+    /// Load all `.txt` and `.csv` files from the embedded
+    /// `assets/dictionaries/` directory into this registry.
+    ///
+    /// Unrecognised file extensions are logged as warnings and skipped.
+    #[tracing::instrument(target = TARGET, name = "dictionaries.load_builtins", skip(self), fields(count))]
+    pub fn load_builtins(&mut self) {
+        static DICT_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/assets/dictionaries");
+
+        for file in DICT_DIR.files() {
+            let path = file.path();
+            let text = file
+                .contents_utf8()
+                .expect("dictionary file is not valid UTF-8");
+
+            let name = path
+                .file_stem()
+                .expect("dictionary path has no file stem")
+                .to_string_lossy();
+
+            let dict: BoxDictionary = match path.extension().and_then(|e| e.to_str()) {
+                Some("txt") => Box::new(TxtDictionary::new(name.as_ref(), text)),
+                Some("csv") => Box::new(
+                    CsvDictionary::new(name.as_ref(), text)
+                        .expect("built-in CSV dictionary must parse"),
+                ),
+                other => {
+                    tracing::warn!(
+                        target: TARGET,
+                        path = %path.display(),
+                        extension = ?other,
+                        "skipping unrecognised dictionary file",
+                    );
+                    continue;
+                }
+            };
+
+            tracing::trace!(
+                target: TARGET,
+                name = dict.name(),
+                terms = dict.terms().len(),
+                "dictionary loaded",
+            );
+            self.insert(dict);
+        }
+
+        tracing::Span::current().record("count", self.len());
+        tracing::debug!(target: TARGET, "built-in dictionaries loaded");
+    }
+
+    /// Load a single `.txt` or `.csv` dictionary file and insert it.
+    ///
+    /// The dictionary name is derived from the file stem.
+    /// Files with unrecognised extensions are logged as warnings and
+    /// ignored (no error is returned).
+    ///
+    /// # Errors
+    ///
+    /// Returns [`nvisy_core::Error`] if the file cannot be read or
+    /// a CSV file fails to parse.
+    #[tracing::instrument(target = TARGET, name = "dictionaries.load_file", skip_all, fields(path = %path.as_ref().display()))]
+    pub fn load_file(&mut self, path: impl AsRef<Path>) -> nvisy_core::Result<()> {
+        let path = path.as_ref();
+
+        let dict: BoxDictionary = match path.extension().and_then(|e| e.to_str()) {
+            Some("txt") => {
+                let d = TxtDictionary::from_path(path).map_err(|source| {
+                    DictionaryLoadError::ReadFile {
+                        path: path.to_owned(),
+                        source,
+                    }
+                })?;
+                Box::new(d)
+            }
+            Some("csv") => Box::new(CsvDictionary::from_path(path)?),
+            other => {
+                tracing::warn!(
+                    target: TARGET,
+                    path = %path.display(),
+                    extension = ?other,
+                    "skipping unrecognised dictionary file",
+                );
+                return Ok(());
+            }
+        };
+
+        tracing::trace!(
+            target: TARGET,
+            name = dict.name(),
+            terms = dict.terms().len(),
+            "dictionary loaded from filesystem",
+        );
+        self.insert(dict);
+        Ok(())
+    }
+
+    /// Load all `.txt` and `.csv` files from a filesystem directory.
+    ///
+    /// Files with unrecognised extensions are logged as warnings and
+    /// skipped. Loaded dictionaries are inserted into `self`, so this
+    /// can be called after [`load_builtins`](Self::load_builtins) to
+    /// layer user-provided dictionaries on top of the built-ins.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`nvisy_core::Error`] if the directory cannot be read,
+    /// a file cannot be read, or a CSV file fails to parse.
+    #[tracing::instrument(target = TARGET, name = "dictionaries.load_dir", skip_all, fields(path = %dir.as_ref().display(), count))]
+    pub fn load_dir(&mut self, dir: impl AsRef<Path>) -> nvisy_core::Result<()> {
+        let dir = dir.as_ref();
+
+        let entries = std::fs::read_dir(dir).map_err(|source| DictionaryLoadError::ReadDir {
+            path: dir.to_owned(),
+            source,
+        })?;
+
+        let mut count = 0usize;
+        for entry in entries {
+            let entry = entry.map_err(|source| DictionaryLoadError::ReadDir {
+                path: dir.to_owned(),
+                source,
+            })?;
+            let path = entry.path();
+
+            if !path.is_file() {
+                continue;
+            }
+
+            self.load_file(&path)?;
+            count += 1;
+        }
+
+        tracing::Span::current().record("count", count);
+        tracing::debug!(target: TARGET, "filesystem dictionaries loaded");
+        Ok(())
+    }
+}
+
+static BUILTIN_REGISTRY: LazyLock<DictionaryRegistry> = LazyLock::new(|| {
+    let mut reg = DictionaryRegistry::new();
+    reg.load_builtins();
+    reg
+});
+
+/// Return a reference to the lazily-initialised built-in [`DictionaryRegistry`].
+pub fn builtin_registry() -> &'static DictionaryRegistry {
+    &BUILTIN_REGISTRY
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashSet;
+
+    use super::*;
+
+    fn registry() -> &'static DictionaryRegistry {
+        builtin_registry()
+    }
+
+    #[test]
+    fn builtins_load_and_are_nonempty() {
+        let reg = registry();
+        assert!(!reg.is_empty());
+        for (_, dict) in reg.iter() {
+            assert!(!dict.terms().is_empty(), "{} is empty", dict.name());
+        }
+    }
+
+    #[test]
+    fn terms_are_trimmed_and_nonempty() {
+        for (_, dict) in registry().iter() {
+            let name = dict.name();
+            for term in dict.terms() {
+                assert!(!term.value.is_empty(), "empty term in {name}");
+                assert_eq!(
+                    term.value,
+                    term.value.trim(),
+                    "untrimmed term in {name}: {:?}",
+                    term.value,
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn no_duplicate_terms_per_dictionary() {
+        for (_, dict) in registry().iter() {
+            let mut seen = HashSet::new();
+            for term in dict.terms() {
+                assert!(
+                    seen.insert(term.value.as_str()),
+                    "duplicate term {:?} in dictionary {}",
+                    term.value,
+                    dict.name(),
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn registry_names_are_sorted() {
+        let keys: Vec<&str> = registry().names().collect();
+        let mut sorted = keys.clone();
+        sorted.sort();
+        assert_eq!(keys, sorted);
+    }
+
+    #[test]
+    fn registry_insert_and_get() {
+        let mut reg = DictionaryRegistry::new();
+        let dict: BoxDictionary = Box::new(TxtDictionary::new("test", "foo\nbar\n"));
+        reg.insert(dict);
+
+        assert_eq!(reg.len(), 1);
+
+        let dict = reg.get("test").unwrap();
+        assert_eq!(dict.name(), "test");
+
+        let values: Vec<&str> = dict.terms().iter().map(|t| t.value.as_str()).collect();
+        assert_eq!(values, &["foo", "bar"]);
+    }
+
+    #[test]
+    fn load_dir_reads_filesystem() {
+        let dir = tempfile::tempdir().unwrap();
+
+        std::fs::write(dir.path().join("colors.txt"), "red\nblue\ngreen\n").unwrap();
+        std::fs::write(dir.path().join("sizes.csv"), "small,S\nmedium,M\nlarge,L\n").unwrap();
+        // Should be skipped.
+        std::fs::write(dir.path().join("readme.md"), "ignore me").unwrap();
+
+        let mut reg = DictionaryRegistry::new();
+        reg.load_dir(dir.path()).unwrap();
+
+        assert_eq!(reg.len(), 2);
+
+        let colors = reg.get("colors").unwrap();
+        let color_values: Vec<&str> = colors.terms().iter().map(|t| t.value.as_str()).collect();
+        assert_eq!(color_values, &["red", "blue", "green"]);
+
+        let sizes = reg.get("sizes").unwrap();
+        let size_values: Vec<&str> = sizes.terms().iter().map(|t| t.value.as_str()).collect();
+        assert_eq!(size_values, &["small", "S", "medium", "M", "large", "L"]);
+    }
+
+    #[test]
+    fn load_dir_missing_directory() {
+        let mut reg = DictionaryRegistry::new();
+        let result = reg.load_dir("/nonexistent/path");
+        assert!(result.is_err());
+    }
+}
diff --git a/crates/nvisy-pattern/src/dictionaries/mod.rs b/crates/nvisy-pattern/src/dictionaries/mod.rs
index 6c5d5ba6..176843c2 100644
--- a/crates/nvisy-pattern/src/dictionaries/mod.rs
+++ b/crates/nvisy-pattern/src/dictionaries/mod.rs
@@ -1,19 +1,19 @@
 //! Built-in dictionaries for entity matching.
 //!
 //! Dictionaries are asset files under `assets/dictionaries/` containing
-//! matchable terms (nationalities, religions, currencies, etc.).  They are
+//! matchable terms (nationalities, religions, currencies, etc.). They are
 //! embedded at compile time and loaded lazily on first access.
 //!
 //! Two file formats are supported:
 //!
 //! - **Plain text** (`.txt`): one entry per line, see [`TxtDictionary`].
-//! - **CSV** (`.csv`): each row holds variants of a single entity
-//!   (e.g. `US Dollar,USD`), see [`CsvDictionary`].
+//! - **CSV** (`.csv`): each row holds variants of a single entity (e.g.
+//!   `US Dollar,USD`), see [`CsvDictionary`].
 //!
 //! # Key types
 //!
 //! - [`Dictionary`]: trait implemented by every dictionary.
-//! - [`DictionaryRegistry`]: sorted collection with O(log n) lookup by name.
+//! - [`DictionaryRegistry`]: sorted collection with O(log n) lookup.
 //!
 //! [`TxtDictionary`]: crate::dictionaries::TxtDictionary
 //! [`CsvDictionary`]: crate::dictionaries::CsvDictionary
@@ -21,171 +21,15 @@
 //! [`DictionaryRegistry`]: crate::dictionaries::DictionaryRegistry
 
 mod csv_dictionary;
+mod csv_error;
 mod dictionary;
+mod dictionary_error;
+mod dictionary_registry;
 mod text_dictionary;
 
-use std::collections::BTreeMap;
-use std::sync::LazyLock;
-
-pub use csv_dictionary::CsvDictionary;
-pub use dictionary::{BoxDictionary, Dictionary};
-use include_dir::{Dir, include_dir};
-pub use text_dictionary::TxtDictionary;
-
-/// A registry of named [`Dictionary`] instances with O(log n) lookup.
-///
-/// Use [`load_builtins`] to create a registry pre-populated with
-/// the compile-time-embedded dictionary files.
-///
-/// [`load_builtins`]: Self::load_builtins
-pub struct DictionaryRegistry {
-    inner: BTreeMap<String, BoxDictionary>,
-}
-
-impl std::fmt::Debug for DictionaryRegistry {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let names: Vec<&str> = self.inner.keys().map(|s| s.as_str()).collect();
-        f.debug_struct("DictionaryRegistry")
-            .field("len", &self.inner.len())
-            .field("names", &names)
-            .finish()
-    }
-}
-
-impl DictionaryRegistry {
-    /// Create an empty registry.
-    pub fn new() -> Self {
-        Self {
-            inner: BTreeMap::new(),
-        }
-    }
-
-    /// Insert a dictionary, keyed by its [`Dictionary::name`].
-    pub fn insert(&mut self, dict: BoxDictionary) {
-        let name = dict.name().to_owned();
-        self.inner.insert(name, dict);
-    }
-
-    /// Look up a dictionary by name.
-    #[must_use]
-    pub fn get(&self, name: &str) -> Option<&dyn Dictionary> {
-        self.inner.get(name).map(|b| b.as_ref())
-    }
-
-    /// Total number of registered dictionaries.
-    #[must_use]
-    pub fn len(&self) -> usize {
-        self.inner.len()
-    }
-
-    /// Load all `.txt` and `.csv` files from the embedded
-    /// `assets/dictionaries/` directory and return a populated registry.
-    ///
-    /// Unrecognised file extensions are logged as warnings and skipped.
-    #[tracing::instrument(name = "dictionaries.load_builtins", fields(count))]
-    pub fn load_builtins() -> Self {
-        static DICT_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/assets/dictionaries");
-
-        let mut reg = Self::new();
-
-        for file in DICT_DIR.files() {
-            let path = file.path();
-            let text = file
-                .contents_utf8()
-                .expect("dictionary file is not valid UTF-8");
-
-            let name = path
-                .file_stem()
-                .expect("dictionary path has no file stem")
-                .to_string_lossy();
-
-            let dict: BoxDictionary = match path.extension().and_then(|e| e.to_str()) {
-                Some("txt") => Box::new(TxtDictionary::new(name.as_ref(), text)),
-                Some("csv") => Box::new(CsvDictionary::new(name.as_ref(), text)),
-                other => {
-                    tracing::warn!(
-                        path = %path.display(),
-                        extension = ?other,
-                        "skipping unrecognised dictionary file",
-                    );
-                    continue;
-                }
-            };
-
-            tracing::trace!(
-                name = dict.name(),
-                entries = dict.entries().len(),
-                "dictionary loaded",
-            );
-            reg.insert(dict);
-        }
-
-        tracing::Span::current().record("count", reg.len());
-        tracing::debug!("built-in dictionaries loaded");
-        reg
-    }
-}
-
-impl Default for DictionaryRegistry {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-static BUILTIN_REGISTRY: LazyLock<DictionaryRegistry> =
-    LazyLock::new(DictionaryRegistry::load_builtins);
-
-/// Return a reference to the lazily-initialised built-in [`DictionaryRegistry`].
-pub fn builtin_registry() -> &'static DictionaryRegistry {
-    &BUILTIN_REGISTRY
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn registry() -> &'static DictionaryRegistry {
-        builtin_registry()
-    }
-
-    #[test]
-    fn builtins_load_and_are_nonempty() {
-        let reg = registry();
-        assert!(reg.len() > 0);
-        for (_, dict) in &reg.inner {
-            assert!(!dict.entries().is_empty(), "{} is empty", dict.name());
-        }
-    }
-
-    #[test]
-    fn entries_are_trimmed_and_nonempty() {
-        for (_, dict) in &registry().inner {
-            let name = dict.name();
-            for entry in dict.entries() {
-                assert!(!entry.is_empty(), "empty entry in {name}");
-                assert_eq!(*entry, entry.trim(), "untrimmed entry in {name}: {entry:?}");
-            }
-        }
-    }
-
-    #[test]
-    fn registry_names_are_sorted() {
-        let keys: Vec<&str> = registry().inner.keys().map(|s| s.as_str()).collect();
-        let mut sorted = keys.clone();
-        sorted.sort();
-        assert_eq!(keys, sorted);
-    }
-
-    #[test]
-    fn registry_insert_and_get() {
-        let mut reg = DictionaryRegistry::new();
-        let dict: BoxDictionary = Box::new(TxtDictionary::new("test", "foo\nbar\n"));
-        reg.insert(dict);
-
-        assert_eq!(reg.len(), 1);
-
-        let dict = reg.get("test").unwrap();
-        assert_eq!(dict.name(), "test");
-        assert_eq!(dict.entries(), &["foo", "bar"]);
-    }
-}
+pub use self::csv_dictionary::CsvDictionary;
+pub(crate) use self::csv_error::CsvDictionaryError;
+pub use self::dictionary::{BoxDictionary, Dictionary, DictionaryTerm};
+pub(crate) use self::dictionary_error::DictionaryLoadError;
+pub use self::dictionary_registry::{DictionaryRegistry, builtin_registry};
+pub use self::text_dictionary::TxtDictionary;
diff --git a/crates/nvisy-pattern/src/dictionaries/text_dictionary.rs b/crates/nvisy-pattern/src/dictionaries/text_dictionary.rs
index 6273470d..4916aecf 100644
--- a/crates/nvisy-pattern/src/dictionaries/text_dictionary.rs
+++ b/crates/nvisy-pattern/src/dictionaries/text_dictionary.rs
@@ -1,12 +1,14 @@
-//! Plain-text dictionary: one matchable entry per line.
+//! Plain-text dictionary: one entry per line.
 
-use super::Dictionary;
+use std::path::Path;
+
+use super::{Dictionary, DictionaryTerm};
 
 /// A dictionary parsed from a plain-text file (one entry per line).
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub struct TxtDictionary {
     name: String,
-    entries: Vec<String>,
+    terms: Vec<DictionaryTerm>,
 }
 
 impl TxtDictionary {
@@ -17,14 +19,34 @@ impl TxtDictionary {
     pub fn new(name: impl Into<String>, text: &str) -> Self {
         let name = name.into();
 
-        let entries = text
+        let terms = text
             .lines()
             .map(|l| l.trim())
             .filter(|l| !l.is_empty())
-            .map(String::from)
+            .map(|l| DictionaryTerm {
+                value: l.to_owned(),
+                column: None,
+            })
             .collect();
 
-        Self { name, entries }
+        Self { name, terms }
+    }
+
+    /// Load a plain-text dictionary from a file path.
+    ///
+    /// The dictionary name is derived from the file stem.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`std::io::Error`] if the file cannot be read.
+    pub fn from_path(path: impl AsRef<Path>) -> std::io::Result<Self> {
+        let path = path.as_ref();
+        let name = path
+            .file_stem()
+            .and_then(|s| s.to_str())
+            .unwrap_or_default();
+        let text = std::fs::read_to_string(path)?;
+        Ok(Self::new(name, &text))
     }
 }
 
@@ -33,8 +55,8 @@ impl Dictionary for TxtDictionary {
         &self.name
     }
 
-    fn entries(&self) -> &[String] {
-        &self.entries
+    fn terms(&self) -> &[DictionaryTerm] {
+        &self.terms
     }
 }
 
@@ -46,6 +68,10 @@ mod tests {
     fn parses_lines() {
         let dict = TxtDictionary::new("test", "alpha\n  beta \n\ngamma\n");
         assert_eq!(dict.name(), "test");
-        assert_eq!(dict.entries(), &["alpha", "beta", "gamma"]);
+
+        let values: Vec<&str> = dict.terms().iter().map(|t| t.value.as_str()).collect();
+        assert_eq!(values, &["alpha", "beta", "gamma"]);
+
+        assert!(dict.terms().iter().all(|t| t.column.is_none()));
     }
 }
diff --git a/crates/nvisy-pattern/src/engine/allow_list.rs b/crates/nvisy-pattern/src/engine/allow_list.rs
index 24ee8b9b..f08b6cc2 100644
--- a/crates/nvisy-pattern/src/engine/allow_list.rs
+++ b/crates/nvisy-pattern/src/engine/allow_list.rs
@@ -1,4 +1,4 @@
-//! [`AllowList`] — exact-match suppression of known false positives.
+//! [`AllowList`]: exact-match suppression of known false positives.
 
 use std::collections::HashSet;
 
diff --git a/crates/nvisy-pattern/src/engine/builder.rs b/crates/nvisy-pattern/src/engine/builder.rs
index 3cfb4f8d..acb56a34 100644
--- a/crates/nvisy-pattern/src/engine/builder.rs
+++ b/crates/nvisy-pattern/src/engine/builder.rs
@@ -1,25 +1,21 @@
-//! [`PatternEngineBuilder`] — configures and compiles a [`PatternEngine`].
+//! [`PatternEngineBuilder`]: configures and compiles a [`PatternEngine`].
 
 use regex::{Regex, RegexSet};
 
-use super::allow_list::AllowList;
-use super::deny_list::DenyList;
 use super::error::PatternEngineError;
-use super::{DictEntry, PatternEngine, RegexEntry};
+use super::{DictEntry, PatternEngine, RegexEntry, TARGET};
 use crate::dictionaries;
-use crate::patterns::{self, MatchSource, Pattern};
+use crate::patterns::{MatchSource, Pattern};
 use crate::validators::ValidatorResolver;
 
 /// Builder for [`PatternEngine`].
 ///
 /// By default all built-in patterns are included. Use
-/// [`patterns`](Self::patterns) to restrict to a subset.
+/// [`with_patterns`](Self::with_patterns) to restrict to a subset.
 #[derive(Default)]
 pub struct PatternEngineBuilder {
     pattern_names: Option<Vec<String>>,
     confidence_threshold: f64,
-    allow_list: AllowList,
-    deny_list: DenyList,
 }
 
 impl PatternEngineBuilder {
@@ -37,46 +33,27 @@ impl PatternEngineBuilder {
     /// Set the minimum confidence score for matches.
     ///
     /// Matches with confidence below this value are discarded during
-    /// [`scan_text`](PatternEngine::scan_text).  Defaults to `0.0`.
+    /// [`scan_text`](PatternEngine::scan_text). Defaults to `0.0`.
     pub fn with_confidence_threshold(mut self, threshold: f64) -> Self {
         self.confidence_threshold = threshold;
         self
     }
 
-    /// Set the allow list.
-    ///
-    /// Matches whose exact value appears in the allow list are suppressed
-    /// (dropped) during [`scan_text`](PatternEngine::scan_text).
-    pub fn with_allow(mut self, list: AllowList) -> Self {
-        self.allow_list = list;
-        self
-    }
-
-    /// Set the deny list.
-    ///
-    /// If a deny-list value is found in the scanned text but was not matched
-    /// by any regex or dictionary pattern, it is injected as a synthetic match
-    /// with confidence `1.0`.
-    pub fn with_deny(mut self, list: DenyList) -> Self {
-        self.deny_list = list;
-        self
-    }
-
     /// Compile all selected patterns and build the engine.
     ///
     /// # Errors
     ///
-    /// Returns [`PatternEngineError`] if a regex fails to compile, a
+    /// Returns [`nvisy_core::Error`] if a regex fails to compile, a
     /// referenced dictionary is missing, or the Aho-Corasick automaton
     /// cannot be built.
-    #[tracing::instrument(name = "PatternEngine::build", skip(self))]
-    pub fn build(self) -> Result<PatternEngine, PatternEngineError> {
-        let pat_reg = patterns::builtin_registry();
+    #[tracing::instrument(target = TARGET, name = "PatternEngine::build", skip(self))]
+    pub fn build(self) -> nvisy_core::Result<PatternEngine> {
+        let pat_reg = crate::patterns::builtin_registry();
         let dict_reg = dictionaries::builtin_registry();
 
         let active: Vec<&dyn Pattern> = match &self.pattern_names {
             Some(names) => names.iter().filter_map(|n| pat_reg.get(n)).collect(),
-            None => pat_reg.values(),
+            None => pat_reg.iter().collect(),
         };
 
         let mut regex_entries = Vec::new();
@@ -86,15 +63,16 @@ impl PatternEngineBuilder {
         for p in &active {
             match p.match_source() {
                 MatchSource::Regex(rp) => {
+                    let effective = rp.effective_regex();
                     let compiled =
-                        Regex::new(&rp.regex).map_err(|e| PatternEngineError::RegexCompile {
+                        Regex::new(&effective).map_err(|e| PatternEngineError::RegexCompile {
                             name: p.name().to_owned(),
                             source: e,
                         })?;
-                    regex_strings.push(rp.regex.clone());
+                    regex_strings.push(effective);
                     regex_entries.push(RegexEntry {
                         pattern_name: p.name().to_owned(),
-                        category: p.category().clone(),
+                        category: p.category(),
                         entity_kind: p.entity_kind(),
                         confidence: rp.confidence,
                         validator_name: rp.validator.clone(),
@@ -109,11 +87,12 @@ impl PatternEngineBuilder {
                             dictionary: dp.name.clone(),
                         }
                     })?;
-                    let values: Vec<String> = dict.entries().to_vec();
-                    if values.is_empty() {
+                    let terms = dict.terms();
+                    if terms.is_empty() {
                         continue;
                     }
-                    let columns = dict.columns().map(|c| c.to_vec());
+                    let values: Vec<String> = terms.iter().map(|t| t.value.clone()).collect();
+                    let columns: Vec<Option<u32>> = terms.iter().map(|t| t.column).collect();
                     let automaton = aho_corasick::AhoCorasickBuilder::new()
                         .ascii_case_insensitive(!dp.case_sensitive)
                         .build(&values)
@@ -123,7 +102,7 @@ impl PatternEngineBuilder {
                         })?;
                     dict_entries.push(DictEntry {
                         pattern_name: p.name().to_owned(),
-                        category: p.category().clone(),
+                        category: p.category(),
                         entity_kind: p.entity_kind(),
                         confidence: dp.confidence.clone(),
                         automaton,
@@ -140,6 +119,7 @@ impl PatternEngineBuilder {
         let validators = ValidatorResolver::builtins();
 
         tracing::debug!(
+            target: TARGET,
             regex_count = regex_entries.len(),
             dict_count = dict_entries.len(),
             "PatternEngine built",
@@ -151,8 +131,6 @@ impl PatternEngineBuilder {
             dict_entries,
             validators,
             confidence_threshold: self.confidence_threshold,
-            allow_set: self.allow_list,
-            deny_set: self.deny_list,
         })
     }
 }
diff --git a/crates/nvisy-pattern/src/engine/deny_list.rs b/crates/nvisy-pattern/src/engine/deny_list.rs
index 946784c0..9fbb2f3d 100644
--- a/crates/nvisy-pattern/src/engine/deny_list.rs
+++ b/crates/nvisy-pattern/src/engine/deny_list.rs
@@ -1,37 +1,42 @@
-//! [`DenyList`] — forced detection of known sensitive values.
+//! [`DenyList`]: forced detection of known sensitive values.
 
-use std::collections::HashMap;
+use std::collections::BTreeMap;
 
-use nvisy_ontology::entity::{EntityCategory, EntityKind};
+use nvisy_ontology::entity::{EntityCategory, EntityKind, RecognitionMethod};
 
-/// A deny-list entry: a known sensitive value that must always be detected.
+/// A deny-list rule: a known sensitive value that must always be detected.
 #[derive(Debug, Clone)]
-pub struct DenyEntry {
+pub struct DenyRule {
     /// Entity category for the injected match.
     pub category: EntityCategory,
     /// Entity kind for the injected match.
     pub entity_kind: EntityKind,
+    /// Recognition method carried from the original detection source.
+    pub method: RecognitionMethod,
 }
 
 /// Exact-match deny list for forcing detection of known sensitive values.
 ///
 /// If a deny-list value is found in the scanned text but was not already
 /// matched by any regex or dictionary pattern, it is injected as a synthetic
-/// [`PatternMatch`](super::PatternMatch) with confidence `1.0` and source
-/// [`DetectionSource::DenyList`](super::DetectionSource::DenyList).
+/// [`RawMatch`](super::RawMatch) with confidence `1.0` and
+/// `pattern_name: None`.
 ///
 /// # Examples
 ///
 /// ```rust,ignore
-/// use nvisy_ontology::entity::{EntityCategory, EntityKind};
+/// use nvisy_ontology::entity::{EntityCategory, EntityKind, RecognitionMethod};
 ///
 /// let deny = DenyList::new()
-///     .with("John Doe", EntityCategory::Pii, EntityKind::PersonName)
-///     .with("ACME Corp", EntityCategory::Pii, EntityKind::Organization);
+///     .with("John Doe", DenyRule {
+///         category: EntityCategory::PersonalIdentity,
+///         entity_kind: EntityKind::PersonName,
+///         method: RecognitionMethod::Ner,
+///     });
 /// ```
 #[derive(Debug, Clone, Default)]
 pub struct DenyList {
-    pub(crate) entries: HashMap<String, DenyEntry>,
+    pub(crate) entries: BTreeMap<String, DenyRule>,
 }
 
 impl DenyList {
@@ -40,37 +45,15 @@ impl DenyList {
         Self::default()
     }
 
-    /// Add a single entry.
-    pub fn with(
-        mut self,
-        value: impl Into<String>,
-        category: EntityCategory,
-        entity_kind: EntityKind,
-    ) -> Self {
-        self.entries.insert(
-            value.into(),
-            DenyEntry {
-                category,
-                entity_kind,
-            },
-        );
+    /// Add a single rule (builder style).
+    pub fn with(mut self, value: impl Into<String>, rule: DenyRule) -> Self {
+        self.entries.insert(value.into(), rule);
         self
     }
 
-    /// Insert an entry into this list.
-    pub fn insert(
-        &mut self,
-        value: impl Into<String>,
-        category: EntityCategory,
-        entity_kind: EntityKind,
-    ) {
-        self.entries.insert(
-            value.into(),
-            DenyEntry {
-                category,
-                entity_kind,
-            },
-        );
+    /// Insert a rule into this list.
+    pub fn insert(&mut self, value: impl Into<String>, rule: DenyRule) {
+        self.entries.insert(value.into(), rule);
     }
 
     /// Whether the list contains the given value.
@@ -79,9 +62,9 @@ impl DenyList {
         self.entries.contains_key(value)
     }
 
-    /// Look up the entry for a value.
+    /// Look up the rule for a value.
     #[must_use]
-    pub fn get(&self, value: &str) -> Option<&DenyEntry> {
+    pub fn get(&self, value: &str) -> Option<&DenyRule> {
         self.entries.get(value)
     }
 
@@ -97,18 +80,8 @@ impl DenyList {
         self.entries.is_empty()
     }
 
-    /// Iterate over (value, entry) pairs.
-    pub fn iter(&self) -> impl Iterator<Item = (&str, &DenyEntry)> {
+    /// Iterate over (value, rule) pairs.
+    pub fn iter(&self) -> impl Iterator<Item = (&str, &DenyRule)> {
         self.entries.iter().map(|(k, v)| (k.as_str(), v))
     }
 }
-
-impl<S: Into<String>> FromIterator<(S, EntityCategory, EntityKind)> for DenyList {
-    fn from_iter<I: IntoIterator<Item = (S, EntityCategory, EntityKind)>>(iter: I) -> Self {
-        let mut list = Self::new();
-        for (value, category, entity_kind) in iter {
-            list.insert(value, category, entity_kind);
-        }
-        list
-    }
-}
diff --git a/crates/nvisy-pattern/src/engine/error.rs b/crates/nvisy-pattern/src/engine/error.rs
index 4de9a389..337a18b0 100644
--- a/crates/nvisy-pattern/src/engine/error.rs
+++ b/crates/nvisy-pattern/src/engine/error.rs
@@ -1,8 +1,12 @@
-//! Errors produced during [`PatternEngine`](super::PatternEngine) construction.
+//! Errors produced during [`PatternEngine`] construction.
+//!
+//! [`PatternEngine`]: super::PatternEngine
+
+use nvisy_core::{Error, ErrorKind};
 
 /// Errors that can occur while building a [`PatternEngine`](super::PatternEngine).
 #[derive(Debug, thiserror::Error)]
-pub enum PatternEngineError {
+pub(crate) enum PatternEngineError {
     /// A regex pattern string failed to compile.
     #[error("failed to compile regex for pattern '{name}': {source}")]
     RegexCompile { name: String, source: regex::Error },
@@ -19,3 +23,11 @@ pub enum PatternEngineError {
     #[error("failed to build RegexSet pre-filter: {0}")]
     RegexSetBuild(regex::Error),
 }
+
+impl From<PatternEngineError> for Error {
+    fn from(err: PatternEngineError) -> Self {
+        Error::new(ErrorKind::Validation, err.to_string())
+            .with_component("nvisy-pattern::engine")
+            .with_source(err)
+    }
+}
diff --git a/crates/nvisy-pattern/src/engine/mod.rs b/crates/nvisy-pattern/src/engine/mod.rs
index fcf70770..0883ca07 100644
--- a/crates/nvisy-pattern/src/engine/mod.rs
+++ b/crates/nvisy-pattern/src/engine/mod.rs
@@ -2,39 +2,41 @@
 //!
 //! [`PatternEngine`] compiles all built-in (and optionally user-selected)
 //! regex patterns and dictionary automata into a single unit that can
-//! scan text in one call.  Use [`PatternEngineBuilder`] for configuration
-//! or [`default_engine`] for an out-of-the-box singleton.
+//! scan text in one call. Use [`PatternEngine::builder`] for configuration
+//! or [`PatternEngine::instance`] for an out-of-the-box singleton.
 //!
 //! # Key types
 //!
-//! - [`PatternEngine`]: the pre-compiled scanning engine.
-//! - [`PatternEngineBuilder`]: builder for configuring patterns, thresholds,
-//!   and allow/deny lists.
-//! - [`PatternMatch`]: a single match produced by scanning.
-//! - [`DetectionSource`]: how a match was produced (regex, dictionary, deny list).
+//! - [`PatternEngine`]: pre-compiled scanning engine.
+//! - [`ScanContext`]: per-scan allow/deny list configuration.
+//! - [`RawMatch`]: single match produced by scanning.
 //! - [`AllowList`] / [`DenyList`]: exact-match suppression and forced detection.
-//! - [`PatternEngineError`]: build-time errors.
+//! - [`PatternEngineBuilder`]: builder for configuring patterns and thresholds.
 
 mod allow_list;
 mod builder;
 mod deny_list;
 mod error;
 mod pattern_match;
+mod scan_context;
 
+use std::collections::HashSet;
 use std::sync::LazyLock;
 
 use aho_corasick::AhoCorasick;
-pub use allow_list::AllowList;
-pub use builder::PatternEngineBuilder;
-pub use deny_list::{DenyEntry, DenyList};
-pub use error::PatternEngineError;
-use nvisy_ontology::entity::{EntityCategory, EntityKind};
-pub use pattern_match::{DetectionSource, PatternMatch};
+use nvisy_ontology::entity::{EntityCategory, EntityKind, RecognitionMethod};
 use regex::{Regex, RegexSet};
 
+pub use self::allow_list::AllowList;
+pub use self::builder::PatternEngineBuilder;
+pub use self::deny_list::{DenyList, DenyRule};
+pub use self::pattern_match::RawMatch;
+pub use self::scan_context::ScanContext;
 use crate::patterns::{ContextRule, DictionaryConfidence};
 use crate::validators::ValidatorResolver;
 
+const TARGET: &str = "nvisy_pattern::engine";
+
 /// Metadata stored alongside each compiled regex.
 struct RegexEntry {
     pattern_name: String,
@@ -56,8 +58,8 @@ struct DictEntry {
     /// The terms used to build the automaton, indexed by pattern id.
     values: Vec<String>,
     /// Per-entry column index from the source dictionary (parallel to `values`).
-    /// `None` for plain-text dictionaries (all entries are column 0).
-    columns: Option<Vec<usize>>,
+    /// `None` entries indicate plain-text origin (logically column 0).
+    columns: Vec<Option<u32>>,
     context: Option<ContextRule>,
 }
 
@@ -66,9 +68,10 @@ impl DictEntry {
     fn resolve_confidence(&self, pattern_index: usize) -> f64 {
         let col = self
             .columns
-            .as_ref()
-            .and_then(|cols| cols.get(pattern_index).copied())
-            .unwrap_or(0);
+            .get(pattern_index)
+            .copied()
+            .flatten()
+            .unwrap_or(0) as usize;
         self.confidence.resolve(col)
     }
 }
@@ -77,25 +80,23 @@ impl DictEntry {
 ///
 /// Scanning runs in three phases:
 ///
-/// 1. **Regex** — a [`RegexSet`] pre-filter selects candidate patterns,
+/// 1. **Regex**: a [`RegexSet`] pre-filter selects candidate patterns,
 ///    then each matching regex extracts offsets and values.
-/// 2. **Dictionary** — Aho-Corasick automata perform literal multi-pattern
+/// 2. **Dictionary**: Aho-Corasick automata perform literal multi-pattern
 ///    matching against known-value dictionaries.
-/// 3. **Deny list** — known sensitive values not already matched are
+/// 3. **Deny list**: known sensitive values not already matched are
 ///    injected as synthetic matches with confidence `1.0`.
 ///
 /// Allow-list filtering is applied inline during phases 1 and 2.
 ///
-/// Build via [`PatternEngine::builder`] or use [`default_engine`] for
-/// the singleton with all built-in patterns.
+/// Build via [`PatternEngine::builder`] or use [`PatternEngine::instance`]
+/// for the singleton with all built-in patterns.
 pub struct PatternEngine {
     regex_set: RegexSet,
     regex_entries: Vec<RegexEntry>,
     dict_entries: Vec<DictEntry>,
     validators: ValidatorResolver,
     confidence_threshold: f64,
-    allow_set: AllowList,
-    deny_set: DenyList,
 }
 
 impl std::fmt::Debug for PatternEngine {
@@ -109,46 +110,37 @@ impl std::fmt::Debug for PatternEngine {
 }
 
 impl PatternEngine {
+    /// Return a reference to the lazily-initialised default engine
+    /// containing all built-in patterns.
+    pub fn instance() -> &'static Self {
+        &DEFAULT_ENGINE
+    }
+
     /// Create a new [`PatternEngineBuilder`].
     pub fn builder() -> PatternEngineBuilder {
         PatternEngineBuilder::default()
     }
 
-    /// Validate a value using the checksum associated with the entity kind.
-    ///
-    /// Returns `Some(true)` if the value passes, `Some(false)` if it fails,
-    /// or `None` if no checksum validator is registered for that entity kind.
-    pub fn validate_checksum(&self, entity_kind: EntityKind, value: &str) -> Option<bool> {
-        let validator_name = match entity_kind {
-            EntityKind::PaymentCard => "luhn",
-            EntityKind::GovernmentId => "ssn",
-            EntityKind::Iban => "iban",
-            _ => return None,
-        };
-        let validate = self.validators.resolve(validator_name)?;
-        Some(validate(value))
-    }
-
     /// Scan `text` and return all matches above the confidence threshold.
     ///
     /// Matches whose value appears in the allow list are suppressed.
     /// Deny-list values found in the text are injected as synthetic matches
     /// with confidence `1.0` when not already matched.
-    #[tracing::instrument(skip(self, text), fields(text_len = text.len(), matches))]
-    pub fn scan_text(&self, text: &str) -> Vec<PatternMatch> {
+    #[tracing::instrument(target = TARGET, skip(self, text, ctx), fields(text_len = text.len(), matches = tracing::field::Empty))]
+    pub fn scan_text(&self, text: &str, ctx: &ScanContext) -> Vec<RawMatch> {
         let mut results = Vec::new();
 
-        self.scan_regex(text, &mut results);
-        self.scan_dict(text, &mut results);
-        self.scan_deny_list(text, &mut results);
+        self.scan_regex(text, &ctx.allow, &mut results);
+        self.scan_dict(text, &ctx.allow, &mut results);
+        self.scan_deny_list(text, &ctx.deny, &mut results);
 
         tracing::Span::current().record("matches", results.len());
         results
     }
 
-    /// Phase 1: regex matches — use `RegexSet` as a pre-filter, then run
+    /// Phase 1: regex matches. Uses `RegexSet` as a pre-filter, then runs
     /// each matching regex individually to extract offsets and values.
-    fn scan_regex(&self, text: &str, results: &mut Vec<PatternMatch>) {
+    fn scan_regex(&self, text: &str, allow: &AllowList, results: &mut Vec<RawMatch>) {
         let set_matches = self.regex_set.matches(text);
         for idx in set_matches.iter() {
             let entry = &self.regex_entries[idx];
@@ -160,26 +152,30 @@ impl PatternEngine {
             for mat in entry.regex.find_iter(text) {
                 let value = mat.as_str();
 
-                if self.allow_set.contains(value) {
+                if allow.contains(value) {
                     continue;
                 }
 
+                let mut methods = vec![RecognitionMethod::Regex];
+
                 if let Some(ref vname) = entry.validator_name
                     && let Some(validate) = self.validators.resolve(vname)
-                    && !validate(value)
                 {
-                    continue;
+                    if !validate(value) {
+                        continue;
+                    }
+                    methods.push(RecognitionMethod::Checksum);
                 }
 
-                results.push(PatternMatch {
-                    pattern_name: entry.pattern_name.clone(),
-                    category: entry.category.clone(),
+                results.push(RawMatch {
+                    pattern_name: Some(entry.pattern_name.clone()),
+                    category: entry.category,
                     entity_kind: entry.entity_kind,
                     value: value.to_owned(),
                     start: mat.start(),
                     end: mat.end(),
                     confidence: entry.confidence,
-                    source: DetectionSource::Regex,
+                    recognition_methods: methods,
                     context: entry.context.clone(),
                 });
             }
@@ -187,64 +183,66 @@ impl PatternEngine {
     }
 
     /// Phase 2: dictionary matches via Aho-Corasick automata.
-    fn scan_dict(&self, text: &str, results: &mut Vec<PatternMatch>) {
+    fn scan_dict(&self, text: &str, allow: &AllowList, results: &mut Vec<RawMatch>) {
         for entry in &self.dict_entries {
             for mat in entry.automaton.find_iter(text) {
                 let pat_idx = mat.pattern().as_usize();
                 let value = &entry.values[pat_idx];
 
-                // Resolve per-entry confidence: use column override if available,
-                // otherwise fall back to the pattern's base confidence.
                 let confidence = entry.resolve_confidence(pat_idx);
 
                 if confidence < self.confidence_threshold {
                     continue;
                 }
 
-                if self.allow_set.contains(value.as_str()) {
+                if allow.contains(value.as_str()) {
                     continue;
                 }
 
-                results.push(PatternMatch {
-                    pattern_name: entry.pattern_name.clone(),
-                    category: entry.category.clone(),
+                results.push(RawMatch {
+                    pattern_name: Some(entry.pattern_name.clone()),
+                    category: entry.category,
                     entity_kind: entry.entity_kind,
                     value: value.clone(),
                     start: mat.start(),
                     end: mat.end(),
                     confidence,
-                    source: DetectionSource::Dictionary,
+                    recognition_methods: vec![RecognitionMethod::Dictionary],
                     context: entry.context.clone(),
                 });
             }
         }
     }
 
-    /// Phase 3: inject deny-list values found in `text` that were not
-    /// already matched by regex or dictionary.
-    fn scan_deny_list(&self, text: &str, results: &mut Vec<PatternMatch>) {
-        for (deny_value, deny_entry) in self.deny_set.iter() {
-            if results.iter().any(|r| r.value == deny_value) {
+    /// Phase 3: inject deny-list values found in `text` not already
+    /// matched by regex or dictionary.
+    fn scan_deny_list(&self, text: &str, deny: &DenyList, results: &mut Vec<RawMatch>) {
+        let matched_values: HashSet<&str> = results.iter().map(|r| r.value.as_str()).collect();
+
+        let mut deny_matches = Vec::new();
+        for (deny_value, deny_rule) in deny.iter() {
+            if matched_values.contains(deny_value) {
                 continue;
             }
             let mut search_start = 0;
             while let Some(pos) = text[search_start..].find(deny_value) {
                 let abs_start = search_start + pos;
                 let abs_end = abs_start + deny_value.len();
-                results.push(PatternMatch {
-                    pattern_name: String::new(),
-                    category: deny_entry.category.clone(),
-                    entity_kind: deny_entry.entity_kind,
+                deny_matches.push(RawMatch {
+                    pattern_name: None,
+                    category: deny_rule.category,
+                    entity_kind: deny_rule.entity_kind,
                     value: deny_value.to_owned(),
                     start: abs_start,
                     end: abs_end,
                     confidence: 1.0,
-                    source: DetectionSource::DenyList,
+                    recognition_methods: vec![deny_rule.method],
                     context: None,
                 });
                 search_start = abs_end;
             }
         }
+        results.extend(deny_matches);
     }
 }
 
@@ -254,28 +252,28 @@ static DEFAULT_ENGINE: LazyLock<PatternEngine> = LazyLock::new(|| {
         .expect("built-in patterns must compile")
 });
 
-/// Return a reference to the lazily-initialised default [`PatternEngine`]
-/// containing all built-in patterns.
-pub fn default_engine() -> &'static PatternEngine {
-    &DEFAULT_ENGINE
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
 
+    fn empty_ctx() -> ScanContext {
+        ScanContext::default()
+    }
+
     #[test]
     fn default_engine_builds() {
-        let engine = default_engine();
+        let engine = PatternEngine::instance();
         assert!(!engine.regex_entries.is_empty());
     }
 
     #[test]
     fn scan_text_finds_ssn() {
-        let engine = default_engine();
-        let matches = engine.scan_text("My SSN is 123-45-6789.");
+        let engine = PatternEngine::instance();
+        let matches = engine.scan_text("My SSN is 123-45-6789.", &empty_ctx());
         assert!(
-            matches.iter().any(|m| m.pattern_name == "ssn"),
+            matches
+                .iter()
+                .any(|m| m.pattern_name.as_deref() == Some("ssn")),
             "expected SSN match, got: {:?}",
             matches.iter().map(|m| &m.pattern_name).collect::<Vec<_>>()
         );
@@ -283,10 +281,12 @@ mod tests {
 
     #[test]
     fn scan_text_finds_email() {
-        let engine = default_engine();
-        let matches = engine.scan_text("Contact: alice@example.com");
+        let engine = PatternEngine::instance();
+        let matches = engine.scan_text("Contact: alice@example.com", &empty_ctx());
         assert!(
-            matches.iter().any(|m| m.pattern_name == "email"),
+            matches
+                .iter()
+                .any(|m| m.pattern_name.as_deref() == Some("email")),
             "expected email match, got: {:?}",
             matches.iter().map(|m| &m.pattern_name).collect::<Vec<_>>()
         );
@@ -298,9 +298,11 @@ mod tests {
             .with_confidence_threshold(0.99)
             .build()
             .unwrap();
-        let matches = engine.scan_text("My SSN is 123-45-6789.");
+        let matches = engine.scan_text("My SSN is 123-45-6789.", &empty_ctx());
         assert!(
-            !matches.iter().any(|m| m.pattern_name == "ssn"),
+            !matches
+                .iter()
+                .any(|m| m.pattern_name.as_deref() == Some("ssn")),
             "SSN should be filtered by 0.99 threshold"
         );
     }
@@ -317,25 +319,28 @@ mod tests {
 
     #[test]
     fn scan_text_returns_correct_offsets() {
-        let engine = default_engine();
+        let engine = PatternEngine::instance();
         let text = "SSN: 123-45-6789";
-        let matches = engine.scan_text(text);
-        let ssn_match = matches.iter().find(|m| m.pattern_name == "ssn").unwrap();
+        let matches = engine.scan_text(text, &empty_ctx());
+        let ssn_match = matches
+            .iter()
+            .find(|m| m.pattern_name.as_deref() == Some("ssn"))
+            .unwrap();
         assert_eq!(&text[ssn_match.start..ssn_match.end], "123-45-6789");
     }
 
     #[test]
     fn dictionary_matches_are_found() {
-        let engine = default_engine();
-        let matches = engine.scan_text("She is American and speaks English.");
+        let engine = PatternEngine::instance();
+        let matches = engine.scan_text("She is American and speaks English.", &empty_ctx());
         assert!(
-            matches
-                .iter()
-                .any(|m| m.source == DetectionSource::Dictionary),
+            matches.iter().any(|m| m
+                .recognition_methods
+                .contains(&RecognitionMethod::Dictionary)),
             "expected dictionary match, got: {:?}",
             matches
                 .iter()
-                .map(|m| (&m.pattern_name, &m.source))
+                .map(|m| (&m.pattern_name, &m.recognition_methods))
                 .collect::<Vec<_>>()
         );
     }
@@ -344,12 +349,14 @@ mod tests {
     fn allow_list_suppresses_match() {
         let engine = PatternEngine::builder()
             .with_patterns(&["ssn"])
-            .with_allow(AllowList::new().with("123-45-6789"))
             .build()
             .unwrap();
-        let matches = engine.scan_text("SSN: 123-45-6789");
+        let ctx = ScanContext::new().with_allow(AllowList::new().with("123-45-6789"));
+        let matches = engine.scan_text("SSN: 123-45-6789", &ctx);
         assert!(
-            !matches.iter().any(|m| m.pattern_name == "ssn"),
+            !matches
+                .iter()
+                .any(|m| m.pattern_name.as_deref() == Some("ssn")),
             "allow-listed value should be suppressed"
         );
     }
@@ -358,37 +365,46 @@ mod tests {
     fn deny_list_injects_match() {
         let deny = DenyList::new().with(
             "secret-value-42",
-            EntityCategory::Pii,
-            EntityKind::PersonName,
+            DenyRule {
+                category: EntityCategory::PersonalIdentity,
+                entity_kind: EntityKind::PersonName,
+                method: RecognitionMethod::Ner,
+            },
         );
         let engine = PatternEngine::builder()
             .with_patterns(&["email"])
-            .with_deny(deny)
             .build()
             .unwrap();
-        let matches = engine.scan_text("The secret-value-42 should be detected.");
+        let ctx = ScanContext::new().with_deny(deny);
+        let matches = engine.scan_text("The secret-value-42 should be detected.", &ctx);
         let deny_match = matches
             .iter()
-            .find(|m| m.source == DetectionSource::DenyList)
+            .find(|m| m.pattern_name.is_none())
             .expect("deny list value should be injected");
         assert_eq!(deny_match.value, "secret-value-42");
         assert_eq!(deny_match.confidence, 1.0);
         assert_eq!(deny_match.entity_kind, EntityKind::PersonName);
+        assert_eq!(deny_match.recognition_methods, vec![RecognitionMethod::Ner]);
     }
 
     #[test]
     fn deny_list_not_injected_when_absent() {
-        let deny = DenyList::new().with("not-in-text", EntityCategory::Pii, EntityKind::PersonName);
+        let deny = DenyList::new().with(
+            "not-in-text",
+            DenyRule {
+                category: EntityCategory::PersonalIdentity,
+                entity_kind: EntityKind::PersonName,
+                method: RecognitionMethod::Manual,
+            },
+        );
         let engine = PatternEngine::builder()
             .with_patterns(&["email"])
-            .with_deny(deny)
             .build()
             .unwrap();
-        let matches = engine.scan_text("Nothing special here.");
+        let ctx = ScanContext::new().with_deny(deny);
+        let matches = engine.scan_text("Nothing special here.", &ctx);
         assert!(
-            !matches
-                .iter()
-                .any(|m| m.source == DetectionSource::DenyList),
+            !matches.iter().any(|m| m.pattern_name.is_none()),
             "deny list value not in text should not be injected"
         );
     }
@@ -403,24 +419,36 @@ mod tests {
     }
 
     #[test]
-    fn deny_list_from_iterator() {
-        let deny: DenyList = [
-            ("secret", EntityCategory::Pii, EntityKind::PersonName),
-            ("other", EntityCategory::Financial, EntityKind::PaymentCard),
-        ]
-        .into_iter()
-        .collect();
+    fn deny_list_insert_and_lookup() {
+        let mut deny = DenyList::new();
+        deny.insert(
+            "secret",
+            DenyRule {
+                category: EntityCategory::PersonalIdentity,
+                entity_kind: EntityKind::PersonName,
+                method: RecognitionMethod::Ner,
+            },
+        );
+        deny.insert(
+            "other",
+            DenyRule {
+                category: EntityCategory::Financial,
+                entity_kind: EntityKind::PaymentCard,
+                method: RecognitionMethod::Manual,
+            },
+        );
         assert_eq!(deny.len(), 2);
         assert!(deny.contains("secret"));
-        let entry = deny.get("other").unwrap();
-        assert_eq!(entry.category, EntityCategory::Financial);
+        let rule = deny.get("other").unwrap();
+        assert_eq!(rule.category, EntityCategory::Financial);
+        assert_eq!(rule.method, RecognitionMethod::Manual);
     }
 
     #[test]
     fn column_confidence_applies_to_csv_dictionaries() {
-        let engine = default_engine();
+        let engine = PatternEngine::instance();
         // "US Dollar" is column 0 (full name), "USD" is column 1 (code).
-        let matches = engine.scan_text("I paid in US Dollar and also in USD.");
+        let matches = engine.scan_text("I paid in US Dollar and also in USD.", &empty_ctx());
         let full_name = matches.iter().find(|m| m.value == "US Dollar");
         let code = matches.iter().find(|m| m.value == "USD");
         assert!(full_name.is_some(), "should match 'US Dollar'");
@@ -439,15 +467,42 @@ mod tests {
             .with_patterns(&["ssn"])
             .build()
             .unwrap();
-        let matches = engine.scan_text("SSN: 123-45-6789");
-        let ssn_match = matches.iter().find(|m| m.pattern_name == "ssn").unwrap();
+        let matches = engine.scan_text("SSN: 123-45-6789", &empty_ctx());
+        let ssn_match = matches
+            .iter()
+            .find(|m| m.pattern_name.as_deref() == Some("ssn"))
+            .unwrap();
         assert!(
             ssn_match.context.is_some(),
-            "SSN pattern should carry context rule through to PatternMatch"
+            "SSN pattern should carry context rule through to RawMatch"
         );
         let ctx = ssn_match.context.as_ref().unwrap();
         assert!(!ctx.keywords.is_empty());
         assert!(ctx.window > 0);
         assert!(ctx.boost > 0.0);
     }
+
+    #[test]
+    fn into_entity_builds_entity_without_location() {
+        let raw = RawMatch {
+            pattern_name: Some("ssn".into()),
+            category: EntityCategory::PersonalIdentity,
+            entity_kind: EntityKind::GovernmentId,
+            value: "123-45-6789".into(),
+            start: 5,
+            end: 16,
+            confidence: 0.9,
+            recognition_methods: vec![RecognitionMethod::Regex, RecognitionMethod::Checksum],
+            context: None,
+        };
+        let entity = raw.into_entity();
+        assert_eq!(entity.value, "123-45-6789");
+        assert_eq!(entity.entity_kind, EntityKind::GovernmentId);
+        assert_eq!(
+            entity.recognition_methods,
+            vec![RecognitionMethod::Regex, RecognitionMethod::Checksum]
+        );
+        assert!((entity.confidence - 0.9).abs() < f64::EPSILON);
+        assert!(entity.location.is_none());
+    }
 }
diff --git a/crates/nvisy-pattern/src/engine/pattern_match.rs b/crates/nvisy-pattern/src/engine/pattern_match.rs
index a968d613..8af6afb5 100644
--- a/crates/nvisy-pattern/src/engine/pattern_match.rs
+++ b/crates/nvisy-pattern/src/engine/pattern_match.rs
@@ -1,25 +1,15 @@
-//! [`PatternMatch`] and [`DetectionSource`] — output types from pattern scanning.
+//! [`RawMatch`]: output type from pattern scanning.
 
-use nvisy_ontology::entity::{EntityCategory, EntityKind};
+use nvisy_ontology::entity::{Entity, EntityCategory, EntityKind, RecognitionMethod};
 
 use crate::patterns::ContextRule;
 
-/// How the match was produced.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum DetectionSource {
-    /// Matched by a compiled regular expression.
-    Regex,
-    /// Matched by Aho-Corasick dictionary lookup.
-    Dictionary,
-    /// Injected by the deny list (known sensitive value).
-    DenyList,
-}
-
 /// A single match produced by [`PatternEngine::scan_text`](super::PatternEngine::scan_text).
 #[derive(Debug, Clone)]
-pub struct PatternMatch {
-    /// Name of the pattern that produced this match.
-    pub pattern_name: String,
+pub struct RawMatch {
+    /// Name of the pattern that produced this match, or `None` for
+    /// deny-list–injected matches.
+    pub pattern_name: Option<String>,
     /// Entity category of the match.
     pub category: EntityCategory,
     /// Entity kind of the match.
@@ -32,8 +22,38 @@ pub struct PatternMatch {
     pub end: usize,
     /// Confidence score assigned by the pattern definition.
     pub confidence: f64,
-    /// How this match was produced (regex, dictionary, or deny list).
-    pub source: DetectionSource,
+    /// Recognition methods that produced this match, ordered by
+    /// application time (e.g. `[Regex, Checksum]` when a regex
+    /// match was confirmed by a validator).
+    pub recognition_methods: Vec<RecognitionMethod>,
     /// Optional context rule for span-level co-occurrence scoring.
     pub context: Option<ContextRule>,
 }
+
+impl RawMatch {
+    /// Build an [`Entity`] from this match.
+    ///
+    /// The returned entity has no location or parent set: the caller
+    /// should attach those from the span context via
+    /// [`Entity::with_location`] and [`Entity::with_parent`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if `recognition_methods` is empty. All engine-produced
+    /// matches always carry at least one method.
+    pub fn into_entity(self) -> Entity {
+        debug_assert!(
+            !self.recognition_methods.is_empty(),
+            "RawMatch::into_entity requires at least one recognition method"
+        );
+        let mut entity = Entity::new(
+            self.category,
+            self.entity_kind,
+            self.value,
+            self.recognition_methods[0],
+            self.confidence,
+        );
+        entity.recognition_methods = self.recognition_methods;
+        entity
+    }
+}
diff --git a/crates/nvisy-pattern/src/engine/scan_context.rs b/crates/nvisy-pattern/src/engine/scan_context.rs
new file mode 100644
index 00000000..19e81769
--- /dev/null
+++ b/crates/nvisy-pattern/src/engine/scan_context.rs
@@ -0,0 +1,50 @@
+//! [`ScanContext`]: per-scan allow/deny list configuration.
+
+use super::allow_list::AllowList;
+use super::deny_list::DenyList;
+
+/// Per-scan configuration for allow and deny lists.
+///
+/// Passed to [`PatternEngine::scan_text`](super::PatternEngine::scan_text)
+/// to control per-invocation suppression and forced detection without
+/// rebuilding the engine.
+///
+/// # Examples
+///
+/// ```rust,ignore
+/// use nvisy_pattern::prelude::*;
+/// use nvisy_ontology::entity::{EntityCategory, EntityKind, RecognitionMethod};
+///
+/// let ctx = ScanContext::new()
+///     .with_allow(AllowList::new().with("000-00-0000"))
+///     .with_deny(DenyList::new().with("secret", DenyRule {
+///         category: EntityCategory::PersonalIdentity,
+///         entity_kind: EntityKind::PersonName,
+///         method: RecognitionMethod::Manual,
+///     }));
+/// let matches = PatternEngine::instance().scan_text("text", &ctx);
+/// ```
+#[derive(Debug, Clone, Default)]
+pub struct ScanContext {
+    pub(super) allow: AllowList,
+    pub(super) deny: DenyList,
+}
+
+impl ScanContext {
+    /// Create an empty scan context (no allow/deny filtering).
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Set the allow list.
+    pub fn with_allow(mut self, list: AllowList) -> Self {
+        self.allow = list;
+        self
+    }
+
+    /// Set the deny list.
+    pub fn with_deny(mut self, list: DenyList) -> Self {
+        self.deny = list;
+        self
+    }
+}
diff --git a/crates/nvisy-pattern/src/lib.rs b/crates/nvisy-pattern/src/lib.rs
index 9f505274..f3555804 100644
--- a/crates/nvisy-pattern/src/lib.rs
+++ b/crates/nvisy-pattern/src/lib.rs
@@ -2,13 +2,14 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
 
-pub(crate) mod dictionaries;
-pub mod engine;
-pub(crate) mod patterns;
+pub mod dictionaries;
+pub(crate) mod engine;
+pub mod patterns;
 pub(crate) mod validators;
 
-pub use engine::{DetectionSource, PatternEngine, PatternEngineBuilder, PatternMatch};
-pub use patterns::ContextRule;
+pub use self::engine::{
+    AllowList, DenyList, DenyRule, PatternEngine, PatternEngineBuilder, RawMatch, ScanContext,
+};
 
 #[doc(hidden)]
 pub mod prelude;
diff --git a/crates/nvisy-pattern/src/patterns/context_rule.rs b/crates/nvisy-pattern/src/patterns/context_rule.rs
index ef0feb0d..1e596bdc 100644
--- a/crates/nvisy-pattern/src/patterns/context_rule.rs
+++ b/crates/nvisy-pattern/src/patterns/context_rule.rs
@@ -1,29 +1,59 @@
-//! [`ContextRule`] — co-occurrence context for span-level confidence boosting.
+//! [`ContextRule`]: co-occurrence context for span-level confidence boosting.
 
-use serde::{Deserialize, Serialize};
+use serde::Deserialize;
 
 /// Co-occurrence context rule for span-level confidence boosting.
 ///
 /// When a pattern match is found, nearby spans are searched for any of the
-/// `keywords`.  If at least one keyword is present within `window` spans,
+/// `keywords`. If at least one keyword is present within `window` spans,
 /// the match confidence is increased by `boost` (clamped to `[0.0, 1.0]`).
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Deserialize)]
+#[serde(try_from = "RawContextRule")]
 pub struct ContextRule {
     /// Keywords to look for in nearby spans.
     pub keywords: Vec<String>,
     /// Number of spans before and after the match span to search.
-    #[serde(default = "default_window")]
     pub window: usize,
     /// Confidence adjustment when at least one keyword is found.
-    #[serde(default = "default_boost")]
+    /// Must be in the range `[0.0, 1.0]`.
     pub boost: f64,
     /// Whether keyword matching is case-sensitive.
     ///
-    /// Defaults to `false` (case-insensitive).
-    #[serde(default)]
+    /// Defaults to `false`: case-insensitive.
     pub case_sensitive: bool,
 }
 
+/// Serde intermediary that mirrors the JSON shape before validation.
+#[derive(Debug, Clone, Deserialize)]
+struct RawContextRule {
+    keywords: Vec<String>,
+    #[serde(default = "default_window")]
+    window: usize,
+    #[serde(default = "default_boost")]
+    boost: f64,
+    #[serde(default)]
+    case_sensitive: bool,
+}
+
+impl TryFrom<RawContextRule> for ContextRule {
+    type Error = String;
+
+    fn try_from(raw: RawContextRule) -> Result<Self, Self::Error> {
+        if !(0.0..=1.0).contains(&raw.boost) {
+            return Err(format!(
+                "context rule boost must be in [0.0, 1.0], got {}",
+                raw.boost
+            ));
+        }
+        Ok(Self {
+            keywords: raw.keywords,
+            window: raw.window,
+            boost: raw.boost,
+            case_sensitive: raw.case_sensitive,
+        })
+    }
+}
+
 fn default_window() -> usize {
     3
 }
diff --git a/crates/nvisy-pattern/src/patterns/json_pattern.rs b/crates/nvisy-pattern/src/patterns/json_pattern.rs
index 750e2ac3..cc7634d3 100644
--- a/crates/nvisy-pattern/src/patterns/json_pattern.rs
+++ b/crates/nvisy-pattern/src/patterns/json_pattern.rs
@@ -1,15 +1,16 @@
-//! JSON-backed `JsonPattern` implementation.
+//! JSON-backed [`JsonPattern`] implementation.
 //!
 //! Each JSON file under `assets/patterns/` is deserialized into a
-//! `JsonPattern` via `from_bytes`.  The method returns the validated
-//! pattern together with any non-fatal `JsonPatternWarning`s so the
-//! caller can decide how to surface them.
+//! [`JsonPattern`] via [`from_bytes`](JsonPattern::from_bytes). The method
+//! returns the validated pattern together with any non-fatal
+//! [`JsonPatternWarning`]s so the caller can decide how to surface them.
 
 use nvisy_ontology::entity::{EntityCategory, EntityKind};
 use serde::Deserialize;
 
 use super::context_rule::ContextRule;
 use super::pattern::{DictionaryPattern, MatchSource, Pattern, RegexPattern};
+use crate::validators::ValidatorResolver;
 
 /// Error returned when a JSON pattern file cannot be loaded.
 #[derive(Debug, thiserror::Error)]
@@ -25,10 +26,6 @@ pub enum JsonPatternError {
 /// indicate misconfiguration (e.g. a typo in the validator name).
 #[derive(Debug)]
 pub enum JsonPatternWarning {
-    /// The `"category"` value was not a recognised variant and fell through
-    /// to [`EntityCategory::Custom`].
-    UnknownCategory { pattern: String, slug: String },
-
     /// The `"validator"` name does not match any built-in validator, so
     /// the pattern will have no post-match validation.
     UnknownValidator { pattern: String, validator: String },
@@ -37,7 +34,7 @@ pub enum JsonPatternWarning {
 /// A detection pattern deserialized from a JSON definition file.
 ///
 /// Implements the [`Pattern`] trait and is the only concrete implementation
-/// shipped with this crate.  Construct via `from_bytes`.
+/// shipped with this crate. Construct via [`from_bytes`](Self::from_bytes).
 #[derive(Debug, Clone)]
 pub struct JsonPattern {
     name: String,
@@ -50,6 +47,10 @@ pub struct JsonPattern {
 impl JsonPattern {
     /// Deserialize and validate a pattern from raw JSON bytes.
     ///
+    /// `validators` is used to check whether a referenced validator name
+    /// is registered: unrecognised names produce a [`JsonPatternWarning`]
+    /// but do not prevent loading.
+    ///
     /// On success returns the pattern together with a (possibly empty)
     /// list of [`JsonPatternWarning`]s.
     ///
@@ -60,8 +61,9 @@ impl JsonPattern {
     /// and `dictionary`).
     pub(crate) fn from_bytes(
         bytes: &[u8],
+        validators: &ValidatorResolver,
     ) -> Result<(Self, Vec<JsonPatternWarning>), JsonPatternError> {
-        /// Serde helper: exactly one of `pattern` or `dictionary`.
+        /// Serde helper: exactly one of `pattern` or `dictionary` must be present.
         #[derive(Deserialize)]
         #[serde(untagged)]
         enum RawSource {
@@ -91,19 +93,11 @@ impl JsonPattern {
 
         let mut warnings = Vec::new();
 
-        if let EntityCategory::Custom(ref slug) = raw.category {
-            warnings.push(JsonPatternWarning::UnknownCategory {
-                pattern: raw.name.clone(),
-                slug: slug.clone(),
-            });
-        }
         if let MatchSource::Regex(RegexPattern {
             validator: Some(ref v),
             ..
         }) = match_source
-            && crate::validators::ValidatorResolver::builtins()
-                .resolve(v)
-                .is_none()
+            && validators.resolve(v).is_none()
         {
             warnings.push(JsonPatternWarning::UnknownValidator {
                 pattern: raw.name.clone(),
@@ -128,8 +122,8 @@ impl Pattern for JsonPattern {
         &self.name
     }
 
-    fn category(&self) -> &EntityCategory {
-        &self.category
+    fn category(&self) -> EntityCategory {
+        self.category
     }
 
     fn entity_kind(&self) -> EntityKind {
diff --git a/crates/nvisy-pattern/src/patterns/mod.rs b/crates/nvisy-pattern/src/patterns/mod.rs
index 6085e4b2..7adc4e6c 100644
--- a/crates/nvisy-pattern/src/patterns/mod.rs
+++ b/crates/nvisy-pattern/src/patterns/mod.rs
@@ -1,7 +1,7 @@
 //! Built-in detection patterns.
 //!
 //! Each pattern is a JSON file under `assets/patterns/` that describes how
-//! to detect a single entity type.  Files are embedded at compile time with
+//! to detect a single entity type. Files are embedded at compile time with
 //! `include_dir!` and auto-discovered by [`PatternRegistry::load_builtins`].
 //!
 //! # Key types
@@ -10,242 +10,17 @@
 //! - [`JsonPattern`]: concrete implementation deserialized from JSON.
 //! - [`MatchSource`]: whether matching is regex-based or dictionary-based.
 //! - [`ContextRule`]: optional co-occurrence keywords for confidence boosting.
-//! - [`PatternRegistry`]: sorted collection with O(log n) lookup by name.
+//! - [`PatternRegistry`]: sorted collection with O(log n) lookup.
 //! - [`JsonPatternWarning`]: non-fatal load-time diagnostics.
 
 mod context_rule;
 mod json_pattern;
 mod pattern;
-
-use std::collections::BTreeMap;
-use std::sync::LazyLock;
-
-pub use context_rule::ContextRule;
-use include_dir::{Dir, include_dir};
-pub use json_pattern::{JsonPattern, JsonPatternWarning};
-pub use pattern::{BoxPattern, DictionaryConfidence, MatchSource, Pattern};
-
-/// A registry of named [`Pattern`] definitions with O(log n) lookup.
-///
-/// Use [`load_builtins`] to create a registry pre-populated with
-/// the compile-time-embedded pattern files.
-///
-/// [`load_builtins`]: Self::load_builtins
-pub struct PatternRegistry {
-    inner: BTreeMap<String, BoxPattern>,
-}
-
-impl std::fmt::Debug for PatternRegistry {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let names: Vec<&str> = self.inner.keys().map(|s| s.as_str()).collect();
-        f.debug_struct("PatternRegistry")
-            .field("len", &self.inner.len())
-            .field("names", &names)
-            .finish()
-    }
-}
-
-impl PatternRegistry {
-    /// Create an empty registry.
-    pub fn new() -> Self {
-        Self {
-            inner: BTreeMap::new(),
-        }
-    }
-
-    /// Insert a pattern, keyed by its [`Pattern::name`].
-    pub fn insert(&mut self, pattern: BoxPattern) {
-        let name = pattern.name().to_owned();
-        self.inner.insert(name, pattern);
-    }
-
-    /// Look up a pattern by name.
-    #[must_use]
-    pub fn get(&self, name: &str) -> Option<&dyn Pattern> {
-        self.inner.get(name).map(|b| b.as_ref())
-    }
-
-    /// All patterns in deterministic (alphabetical) order.
-    #[must_use]
-    pub fn values(&self) -> Vec<&dyn Pattern> {
-        self.inner.values().map(|b| b.as_ref()).collect()
-    }
-
-    /// Total number of registered patterns.
-    #[must_use]
-    pub fn len(&self) -> usize {
-        self.inner.len()
-    }
-
-    /// Load all `.json` files from the embedded `assets/patterns/`
-    /// directory and return a populated registry.
-    ///
-    /// Files that fail to parse are logged as warnings and skipped.
-    #[tracing::instrument(name = "patterns.load_builtins", fields(count))]
-    pub fn load_builtins() -> Self {
-        static PATTERN_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/assets/patterns");
-
-        let mut reg = Self::new();
-
-        for file in PATTERN_DIR.files() {
-            let path = file.path();
-
-            let Some("json") = path.extension().and_then(|e| e.to_str()) else {
-                tracing::warn!(
-                    path = %path.display(),
-                    "skipping non-JSON file in patterns directory",
-                );
-                continue;
-            };
-
-            let (pattern, warnings) = match JsonPattern::from_bytes(file.contents()) {
-                Ok(pair) => pair,
-                Err(e) => {
-                    tracing::warn!(
-                        path = %path.display(),
-                        error = %e,
-                        "failed to load pattern, skipping",
-                    );
-                    continue;
-                }
-            };
-
-            for w in &warnings {
-                match w {
-                    JsonPatternWarning::UnknownCategory { pattern, slug } => {
-                        tracing::warn!(%pattern, category = %slug, "unrecognised category falls through to Custom");
-                    }
-                    JsonPatternWarning::UnknownValidator { pattern, validator } => {
-                        tracing::warn!(%pattern, %validator, "unknown validator name, pattern will have no post-match validation");
-                    }
-                }
-            }
-
-            tracing::trace!(
-                name = %pattern.name(),
-                category = %pattern.category(),
-                entity_kind = %pattern.entity_kind(),
-                match_source = ?pattern.match_source(),
-                "pattern loaded",
-            );
-            reg.insert(Box::new(pattern));
-        }
-
-        tracing::Span::current().record("count", reg.len());
-        tracing::debug!("built-in patterns loaded");
-        reg
-    }
-}
-
-impl Default for PatternRegistry {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-static BUILTIN_REGISTRY: LazyLock<PatternRegistry> = LazyLock::new(PatternRegistry::load_builtins);
-
-/// Return a reference to the lazily-initialised built-in [`PatternRegistry`].
-pub fn builtin_registry() -> &'static PatternRegistry {
-    &BUILTIN_REGISTRY
-}
-
-#[cfg(test)]
-mod tests {
-    use super::pattern::RegexPattern;
-    use super::*;
-
-    fn registry() -> &'static PatternRegistry {
-        builtin_registry()
-    }
-
-    #[test]
-    fn builtins_load() {
-        assert!(registry().len() > 0);
-    }
-
-    #[test]
-    fn pattern_names_are_sorted() {
-        let names: Vec<&str> = registry().values().iter().map(|p| p.name()).collect();
-        let mut sorted = names.clone();
-        sorted.sort();
-        assert_eq!(names, sorted);
-    }
-
-    #[test]
-    fn no_duplicate_pattern_names() {
-        let all = registry().values();
-        let names: Vec<_> = all.iter().map(|p| p.name()).collect();
-        let unique: std::collections::HashSet<_> = names.iter().collect();
-        assert_eq!(names.len(), unique.len(), "duplicate pattern names found");
-    }
-
-    #[test]
-    fn all_patterns_have_valid_fields() {
-        for p in registry().values() {
-            assert!(!p.name().is_empty(), "pattern name is empty");
-            match p.match_source() {
-                MatchSource::Regex(rp) => {
-                    assert!(!rp.regex.is_empty(), "regex is empty for {}", p.name());
-                    assert!(rp.confidence > 0.0, "confidence is 0 for {}", p.name());
-                    assert!(rp.confidence <= 1.0, "confidence > 1 for {}", p.name());
-                }
-                MatchSource::Dictionary(dp) => {
-                    assert!(!dp.name.is_empty(), "dictionary is empty for {}", p.name());
-                    let c = dp.confidence.resolve(0);
-                    assert!(c > 0.0, "confidence is 0 for {}", p.name());
-                    assert!(c <= 1.0, "confidence > 1 for {}", p.name());
-                }
-            }
-        }
-    }
-
-    #[test]
-    fn all_regex_patterns_compile() {
-        for p in registry().values() {
-            if let MatchSource::Regex(rp) = p.match_source() {
-                assert!(
-                    regex::Regex::new(&rp.regex).is_ok(),
-                    "pattern {} failed to compile: {}",
-                    p.name(),
-                    rp.regex,
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn all_validators_resolve() {
-        let resolver = crate::validators::ValidatorResolver::builtins();
-        for p in registry().values() {
-            if let MatchSource::Regex(RegexPattern {
-                validator: Some(name),
-                ..
-            }) = p.match_source()
-            {
-                assert!(
-                    resolver.resolve(name).is_some(),
-                    "pattern {} references unregistered validator {name}",
-                    p.name(),
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn registry_insert_and_get() {
-        let json = br#"{
-            "name": "test",
-            "category": "pii",
-            "entity_type": "government_id",
-            "pattern": { "regex": "\\d+", "confidence": 0.9 }
-        }"#;
-        let (pattern, _warnings) = JsonPattern::from_bytes(json).unwrap();
-
-        let mut reg = PatternRegistry::new();
-        reg.insert(Box::new(pattern));
-
-        assert_eq!(reg.len(), 1);
-        assert_eq!(reg.get("test").unwrap().name(), "test");
-    }
-}
+mod pattern_error;
+mod pattern_registry;
+
+pub use self::context_rule::ContextRule;
+pub use self::json_pattern::{JsonPattern, JsonPatternWarning};
+pub use self::pattern::{BoxPattern, DictionaryConfidence, MatchSource, Pattern};
+pub(crate) use self::pattern_error::PatternLoadError;
+pub use self::pattern_registry::{PatternRegistry, builtin_registry};
diff --git a/crates/nvisy-pattern/src/patterns/pattern.rs b/crates/nvisy-pattern/src/patterns/pattern.rs
index 95e4b6f9..1d074d80 100644
--- a/crates/nvisy-pattern/src/patterns/pattern.rs
+++ b/crates/nvisy-pattern/src/patterns/pattern.rs
@@ -1,8 +1,4 @@
-//! Core [`Pattern`] trait, [`MatchSource`] enum, and [`BoxPattern`] alias.
-//!
-//! [`Pattern`]: crate::patterns::Pattern
-//! [`MatchSource`]: crate::patterns::MatchSource
-//! [`BoxPattern`]: crate::patterns::BoxPattern
+//! Core [`Pattern`] trait, [`MatchSource`] enum, and [`BoxPattern`] type alias.
 
 use nvisy_ontology::entity::{EntityCategory, EntityKind};
 use serde::Deserialize;
@@ -14,7 +10,7 @@ use super::context_rule::ContextRule;
 pub struct RegexPattern {
     /// The regular expression string.
     pub regex: String,
-    /// Optional validator name (e.g. `"luhn"`, `"ssn"`, `"iban"`),
+    /// Optional validator name (e.g. `"luhn"`, `"ssn"`, `"iban"`):
     /// resolved at detection time via [`ValidatorResolver`].
     ///
     /// [`ValidatorResolver`]: crate::validators::ValidatorResolver
@@ -22,9 +18,9 @@ pub struct RegexPattern {
     pub validator: Option<String>,
     /// Whether the regex is case-sensitive.
     ///
-    /// Defaults to `false`.  When `false`, the regex is compiled with
-    /// inline `(?i)` or equivalent flag.
-    #[serde(default)]
+    /// Defaults to `true`. When `false`, the regex is compiled with
+    /// an inline `(?i)` prefix.
+    #[serde(default = "default_case_sensitive")]
     pub case_sensitive: bool,
     /// Confidence score (0.0–1.0) assigned to matches from this pattern.
     ///
@@ -33,6 +29,19 @@ pub struct RegexPattern {
     pub confidence: f64,
 }
 
+impl RegexPattern {
+    /// Return the regex string ready for compilation.
+    ///
+    /// Prepends `(?i)` when [`case_sensitive`](Self::case_sensitive) is `false`.
+    pub fn effective_regex(&self) -> String {
+        if self.case_sensitive {
+            self.regex.clone()
+        } else {
+            format!("(?i){}", self.regex)
+        }
+    }
+}
+
 /// Confidence for a dictionary pattern: either a single uniform score
 /// or per-column scores for CSV dictionaries.
 #[derive(Debug, Clone, PartialEq)]
@@ -63,7 +72,7 @@ impl Default for DictionaryConfidence {
     }
 }
 
-/// Serde helper — accepts either a single number or an array of numbers.
+/// Serde helper: accepts either a single number or an array of numbers.
 mod confidence_serde {
     use serde::{Deserialize, Deserializer};
 
@@ -95,15 +104,15 @@ pub struct DictionaryPattern {
     pub name: String,
     /// Whether matching is case-sensitive.
     ///
-    /// Defaults to `false`.  Controls the Aho-Corasick automaton's
+    /// Defaults to `false`. Controls the Aho-Corasick automaton's
     /// `ascii_case_insensitive` setting.
     #[serde(default)]
     pub case_sensitive: bool,
     /// Confidence score(s) for matches from this dictionary.
     ///
-    /// A single number applies uniformly to all entries.
-    /// An array assigns per-column confidence for CSV dictionaries
-    /// (e.g. `[0.85, 0.55]` gives column 0 entries 0.85 and column 1
+    /// A single number applies uniformly to all entries. An array
+    /// assigns per-column confidence for CSV dictionaries (e.g.
+    /// `[0.85, 0.55]` gives column 0 entries 0.85 and column 1
     /// entries 0.55).
     ///
     /// Defaults to `1.0` when not specified.
@@ -126,6 +135,17 @@ pub enum MatchSource {
     Dictionary(DictionaryPattern),
 }
 
+/// Default confidence score when `"confidence"` is omitted from JSON.
+pub const DEFAULT_CONFIDENCE: f64 = 1.0;
+
+fn default_confidence() -> f64 {
+    DEFAULT_CONFIDENCE
+}
+
+fn default_case_sensitive() -> bool {
+    true
+}
+
 /// A named detection pattern.
 ///
 /// Implementors describe a single entity type to detect, including how to
@@ -137,19 +157,12 @@ pub enum MatchSource {
 /// from the JSON files under `assets/patterns/`.
 ///
 /// [`JsonPattern`]: super::JsonPattern
-/// Default confidence score when `"confidence"` is omitted from JSON.
-pub const DEFAULT_CONFIDENCE: f64 = 1.0;
-
-fn default_confidence() -> f64 {
-    DEFAULT_CONFIDENCE
-}
-
 pub trait Pattern: Send + Sync {
     /// Unique name identifying this pattern (e.g. `"ssn"`, `"credit-card"`).
     fn name(&self) -> &str;
 
-    /// High-level entity category (PII, Financial, Credentials, ...).
-    fn category(&self) -> &EntityCategory;
+    /// High-level entity category (PersonalIdentity, Financial, Credentials, ...).
+    fn category(&self) -> EntityCategory;
 
     /// Specific entity kind within the category (e.g. `GovernmentId`, `PaymentCard`).
     fn entity_kind(&self) -> EntityKind;
diff --git a/crates/nvisy-pattern/src/patterns/pattern_error.rs b/crates/nvisy-pattern/src/patterns/pattern_error.rs
new file mode 100644
index 00000000..8f29f1e5
--- /dev/null
+++ b/crates/nvisy-pattern/src/patterns/pattern_error.rs
@@ -0,0 +1,42 @@
+//! Error type for pattern filesystem loading.
+
+use nvisy_core::{Error, ErrorKind};
+
+use super::json_pattern::JsonPatternError;
+
+/// Error returned when loading patterns from the filesystem.
+#[derive(Debug, thiserror::Error)]
+pub enum PatternLoadError {
+    /// The directory could not be read.
+    #[error("failed to read pattern directory '{}': {source}", path.display())]
+    ReadDir {
+        path: std::path::PathBuf,
+        source: std::io::Error,
+    },
+    /// A pattern file could not be read.
+    #[error("failed to read pattern file '{}': {source}", path.display())]
+    ReadFile {
+        path: std::path::PathBuf,
+        source: std::io::Error,
+    },
+    /// A pattern file failed to parse.
+    #[error("failed to parse pattern '{}': {source}", path.display())]
+    Parse {
+        path: std::path::PathBuf,
+        source: JsonPatternError,
+    },
+}
+
+impl From<PatternLoadError> for Error {
+    fn from(err: PatternLoadError) -> Self {
+        let kind = match &err {
+            PatternLoadError::ReadDir { .. } | PatternLoadError::ReadFile { .. } => {
+                ErrorKind::Internal
+            }
+            PatternLoadError::Parse { .. } => ErrorKind::Validation,
+        };
+        Error::new(kind, err.to_string())
+            .with_component("nvisy-pattern::patterns")
+            .with_source(err)
+    }
+}
diff --git a/crates/nvisy-pattern/src/patterns/pattern_registry.rs b/crates/nvisy-pattern/src/patterns/pattern_registry.rs
new file mode 100644
index 00000000..6a66818b
--- /dev/null
+++ b/crates/nvisy-pattern/src/patterns/pattern_registry.rs
@@ -0,0 +1,405 @@
+//! [`PatternRegistry`]: named pattern collection with O(log n) lookup.
+
+use std::collections::BTreeMap;
+use std::path::Path;
+use std::sync::LazyLock;
+
+use include_dir::{Dir, include_dir};
+
+use super::{BoxPattern, JsonPattern, JsonPatternWarning, Pattern, PatternLoadError};
+use crate::validators::ValidatorResolver;
+
+const TARGET: &str = "nvisy_pattern::patterns";
+
+/// A registry of named [`Pattern`] definitions with O(log n) lookup.
+///
+/// Use [`load_builtins`] to populate with the compile-time-embedded
+/// pattern files, or [`load_dir`] / [`load_file`] to load from the
+/// filesystem at runtime.
+///
+/// [`load_builtins`]: Self::load_builtins
+/// [`load_dir`]: Self::load_dir
+/// [`load_file`]: Self::load_file
+#[derive(Default)]
+pub struct PatternRegistry {
+    inner: BTreeMap<String, BoxPattern>,
+}
+
+impl std::fmt::Debug for PatternRegistry {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let names: Vec<&str> = self.inner.keys().map(|s| s.as_str()).collect();
+        f.debug_struct("PatternRegistry")
+            .field("len", &self.inner.len())
+            .field("names", &names)
+            .finish()
+    }
+}
+
+impl PatternRegistry {
+    /// Create an empty registry.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Insert a pattern, keyed by its [`Pattern::name`].
+    pub fn insert(&mut self, pattern: BoxPattern) {
+        let name = pattern.name().to_owned();
+        self.inner.insert(name, pattern);
+    }
+
+    /// Look up a pattern by name.
+    #[must_use]
+    pub fn get(&self, name: &str) -> Option<&dyn Pattern> {
+        self.inner.get(name).map(|b| b.as_ref())
+    }
+
+    /// Iterate over all registered patterns as `&dyn Pattern` in
+    /// deterministic (alphabetical) order.
+    pub fn iter(&self) -> impl Iterator<Item = &dyn Pattern> {
+        self.inner.values().map(|b| b.as_ref())
+    }
+
+    /// Iterate over all registered pattern names.
+    pub fn names(&self) -> impl Iterator<Item = &str> {
+        self.inner.keys().map(|s| s.as_str())
+    }
+
+    /// Total number of registered patterns.
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    /// Whether the registry contains no patterns.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
+    /// Load all `.json` files from the embedded `assets/patterns/`
+    /// directory into this registry.
+    ///
+    /// Files that fail to parse are logged as warnings and skipped.
+    #[tracing::instrument(target = TARGET, name = "patterns.load_builtins", skip(self), fields(count))]
+    pub fn load_builtins(&mut self) {
+        static PATTERN_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/assets/patterns");
+
+        let validators = ValidatorResolver::builtins();
+
+        for file in PATTERN_DIR.files() {
+            let path = file.path();
+
+            let Some("json") = path.extension().and_then(|e| e.to_str()) else {
+                tracing::warn!(
+                    target: TARGET,
+                    path = %path.display(),
+                    "skipping non-JSON file in patterns directory",
+                );
+                continue;
+            };
+
+            let (pattern, warnings) = match JsonPattern::from_bytes(file.contents(), &validators) {
+                Ok(pair) => pair,
+                Err(e) => {
+                    tracing::warn!(
+                        target: TARGET,
+                        path = %path.display(),
+                        error = %e,
+                        "failed to load pattern, skipping",
+                    );
+                    continue;
+                }
+            };
+
+            Self::log_warnings(&warnings);
+
+            tracing::trace!(
+                target: TARGET,
+                name = %pattern.name(),
+                category = %pattern.category(),
+                entity_kind = %pattern.entity_kind(),
+                match_source = ?pattern.match_source(),
+                "pattern loaded",
+            );
+            self.insert(Box::new(pattern));
+        }
+
+        tracing::Span::current().record("count", self.len());
+        tracing::debug!(target: TARGET, "built-in patterns loaded");
+    }
+
+    /// Load a single `.json` pattern file and insert it.
+    ///
+    /// The pattern name is derived from the JSON `"name"` field, not
+    /// the file name. Files with non-`.json` extensions are logged as
+    /// warnings and ignored (no error is returned).
+    ///
+    /// # Errors
+    ///
+    /// Returns [`nvisy_core::Error`] if the file cannot be read or
+    /// the JSON content cannot be parsed.
+    #[tracing::instrument(target = TARGET, name = "patterns.load_file", skip_all, fields(path = %path.as_ref().display()))]
+    pub fn load_file(&mut self, path: impl AsRef<Path>) -> nvisy_core::Result<()> {
+        let path = path.as_ref();
+
+        let Some("json") = path.extension().and_then(|e| e.to_str()) else {
+            tracing::warn!(
+                target: TARGET,
+                path = %path.display(),
+                "skipping non-JSON pattern file",
+            );
+            return Ok(());
+        };
+
+        let bytes = std::fs::read(path).map_err(|source| PatternLoadError::ReadFile {
+            path: path.to_owned(),
+            source,
+        })?;
+
+        let validators = ValidatorResolver::builtins();
+        let (pattern, warnings) =
+            JsonPattern::from_bytes(&bytes, &validators).map_err(|source| {
+                PatternLoadError::Parse {
+                    path: path.to_owned(),
+                    source,
+                }
+            })?;
+
+        Self::log_warnings(&warnings);
+
+        tracing::trace!(
+            target: TARGET,
+            name = %pattern.name(),
+            category = %pattern.category(),
+            entity_kind = %pattern.entity_kind(),
+            match_source = ?pattern.match_source(),
+            "pattern loaded from filesystem",
+        );
+        self.insert(Box::new(pattern));
+        Ok(())
+    }
+
+    /// Load all `.json` files from a filesystem directory.
+    ///
+    /// Non-`.json` files are logged as warnings and skipped. Loaded
+    /// patterns are inserted into `self`, so this can be called after
+    /// [`load_builtins`](Self::load_builtins) to layer user-provided
+    /// patterns on top of the built-ins.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`nvisy_core::Error`] if the directory cannot be read,
+    /// a file cannot be read, or a JSON file fails to parse.
+    #[tracing::instrument(target = TARGET, name = "patterns.load_dir", skip_all, fields(path = %dir.as_ref().display(), count))]
+    pub fn load_dir(&mut self, dir: impl AsRef<Path>) -> nvisy_core::Result<()> {
+        let dir = dir.as_ref();
+
+        let entries = std::fs::read_dir(dir).map_err(|source| PatternLoadError::ReadDir {
+            path: dir.to_owned(),
+            source,
+        })?;
+
+        let mut count = 0usize;
+        for entry in entries {
+            let entry = entry.map_err(|source| PatternLoadError::ReadDir {
+                path: dir.to_owned(),
+                source,
+            })?;
+            let path = entry.path();
+
+            if !path.is_file() {
+                continue;
+            }
+
+            self.load_file(&path)?;
+            count += 1;
+        }
+
+        tracing::Span::current().record("count", count);
+        tracing::debug!(target: TARGET, "filesystem patterns loaded");
+        Ok(())
+    }
+
+    fn log_warnings(warnings: &[JsonPatternWarning]) {
+        for w in warnings {
+            match w {
+                JsonPatternWarning::UnknownValidator { pattern, validator } => {
+                    tracing::warn!(
+                        target: TARGET,
+                        %pattern,
+                        %validator,
+                        "unknown validator name, pattern will have no post-match validation",
+                    );
+                }
+            }
+        }
+    }
+}
+
+static BUILTIN_REGISTRY: LazyLock<PatternRegistry> = LazyLock::new(|| {
+    let mut reg = PatternRegistry::new();
+    reg.load_builtins();
+    reg
+});
+
+/// Return a reference to the lazily-initialised built-in [`PatternRegistry`].
+pub fn builtin_registry() -> &'static PatternRegistry {
+    &BUILTIN_REGISTRY
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashSet;
+
+    use super::super::json_pattern::JsonPattern;
+    use super::super::pattern::{MatchSource, RegexPattern};
+    use super::*;
+    use crate::validators::ValidatorResolver;
+
+    fn registry() -> &'static PatternRegistry {
+        builtin_registry()
+    }
+
+    #[test]
+    fn builtins_load() {
+        assert!(!registry().is_empty());
+    }
+
+    #[test]
+    fn pattern_names_are_sorted() {
+        let names: Vec<&str> = registry().names().collect();
+        let mut sorted = names.clone();
+        sorted.sort();
+        assert_eq!(names, sorted);
+    }
+
+    #[test]
+    fn no_duplicate_pattern_names() {
+        let names: Vec<_> = registry().names().collect();
+        let unique: HashSet<_> = names.iter().collect();
+        assert_eq!(names.len(), unique.len(), "duplicate pattern names found");
+    }
+
+    #[test]
+    fn all_patterns_have_valid_fields() {
+        for p in registry().iter() {
+            assert!(!p.name().is_empty(), "pattern name is empty");
+            match p.match_source() {
+                MatchSource::Regex(rp) => {
+                    assert!(!rp.regex.is_empty(), "regex is empty for {}", p.name());
+                    assert!(rp.confidence > 0.0, "confidence is 0 for {}", p.name());
+                    assert!(rp.confidence <= 1.0, "confidence > 1 for {}", p.name());
+                }
+                MatchSource::Dictionary(dp) => {
+                    assert!(!dp.name.is_empty(), "dictionary is empty for {}", p.name());
+                    let c = dp.confidence.resolve(0);
+                    assert!(c > 0.0, "confidence is 0 for {}", p.name());
+                    assert!(c <= 1.0, "confidence > 1 for {}", p.name());
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn all_regex_patterns_compile() {
+        for p in registry().iter() {
+            if let MatchSource::Regex(rp) = p.match_source() {
+                assert!(
+                    regex::Regex::new(&rp.effective_regex()).is_ok(),
+                    "pattern {} failed to compile: {}",
+                    p.name(),
+                    rp.regex,
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn all_validators_resolve() {
+        let resolver = ValidatorResolver::builtins();
+        for p in registry().iter() {
+            if let MatchSource::Regex(RegexPattern {
+                validator: Some(name),
+                ..
+            }) = p.match_source()
+            {
+                assert!(
+                    resolver.resolve(name).is_some(),
+                    "pattern {} references unregistered validator {name}",
+                    p.name(),
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn registry_insert_and_get() {
+        let validators = ValidatorResolver::builtins();
+        let json = br#"{
+            "name": "test",
+            "category": "personal_identity",
+            "entity_type": "government_id",
+            "pattern": { "regex": "\\d+", "confidence": 0.9 }
+        }"#;
+        let (pattern, _warnings) = JsonPattern::from_bytes(json, &validators).unwrap();
+
+        let mut reg = PatternRegistry::new();
+        reg.insert(Box::new(pattern));
+
+        assert_eq!(reg.len(), 1);
+        assert_eq!(reg.get("test").unwrap().name(), "test");
+    }
+
+    #[test]
+    fn load_dir_reads_filesystem() {
+        let dir = tempfile::tempdir().unwrap();
+
+        std::fs::write(
+            dir.path().join("test_pattern.json"),
+            r#"{
+                "name": "test_fs",
+                "category": "personal_identity",
+                "entity_type": "government_id",
+                "pattern": { "regex": "\\d{3}", "confidence": 0.8 }
+            }"#,
+        )
+        .unwrap();
+        // Should be skipped.
+        std::fs::write(dir.path().join("readme.md"), "ignore me").unwrap();
+
+        let mut reg = PatternRegistry::new();
+        reg.load_dir(dir.path()).unwrap();
+
+        assert_eq!(reg.len(), 1);
+        assert_eq!(reg.get("test_fs").unwrap().name(), "test_fs");
+    }
+
+    #[test]
+    fn load_dir_missing_directory() {
+        let mut reg = PatternRegistry::new();
+        let result = reg.load_dir("/nonexistent/path");
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn load_file_single_pattern() {
+        let dir = tempfile::tempdir().unwrap();
+        let path = dir.path().join("single.json");
+        std::fs::write(
+            &path,
+            r#"{
+                "name": "single_test",
+                "category": "contact_info",
+                "entity_type": "email_address",
+                "pattern": { "regex": ".+@.+", "confidence": 0.7 }
+            }"#,
+        )
+        .unwrap();
+
+        let mut reg = PatternRegistry::new();
+        reg.load_file(&path).unwrap();
+
+        assert_eq!(reg.len(), 1);
+        assert_eq!(reg.get("single_test").unwrap().name(), "single_test");
+    }
+}
diff --git a/crates/nvisy-pattern/src/prelude.rs b/crates/nvisy-pattern/src/prelude.rs
index 49247186..c15685ac 100644
--- a/crates/nvisy-pattern/src/prelude.rs
+++ b/crates/nvisy-pattern/src/prelude.rs
@@ -4,4 +4,6 @@
 //! use nvisy_pattern::prelude::*;
 //! ```
 
-pub use crate::{ContextRule, DetectionSource, PatternEngine, PatternEngineBuilder, PatternMatch};
+pub use crate::{
+    AllowList, DenyList, DenyRule, PatternEngine, PatternEngineBuilder, RawMatch, ScanContext,
+};
diff --git a/crates/nvisy-pattern/src/validators/luhn.rs b/crates/nvisy-pattern/src/validators/luhn.rs
index 8a1bd530..0d26d890 100644
--- a/crates/nvisy-pattern/src/validators/luhn.rs
+++ b/crates/nvisy-pattern/src/validators/luhn.rs
@@ -2,29 +2,47 @@
 //!
 //! Implements the [Luhn algorithm](https://en.wikipedia.org/wiki/Luhn_algorithm)
 //! used to validate credit/debit card numbers and other identification
-//! numbers.  Non-digit characters (spaces, dashes) are stripped before
-//! the check.
+//! numbers. Only digits, spaces, and dashes are accepted as input: any
+//! other character causes the check to fail.
 
 /// Return `true` if `num` passes the Luhn checksum.
 ///
-/// All non-digit characters are ignored, so `"4539 1488 0343 6467"`,
-/// `"4539-1488-0343-6467"`, and `"4539148803436467"` are equivalent.
+/// Spaces and dashes are stripped before validation, so
+/// `"4539 1488 0343 6467"`, `"4539-1488-0343-6467"`, and
+/// `"4539148803436467"` are all equivalent.
+///
+/// Returns `false` if the input is empty or contains characters other
+/// than digits, spaces, and dashes.
 pub fn luhn_check(num: &str) -> bool {
-    let digits: String = num.chars().filter(|c| c.is_ascii_digit()).collect();
+    if num.is_empty() {
+        return false;
+    }
+
+    // Reject anything that isn't a digit, space, or dash.
+    if !num
+        .chars()
+        .all(|c| c.is_ascii_digit() || c == ' ' || c == '-')
+    {
+        return false;
+    }
+
+    let digits: Vec<u32> = num.chars().filter_map(|c| c.to_digit(10)).collect();
+
     if digits.is_empty() {
         return false;
     }
+
     let mut sum = 0u32;
     let mut alternate = false;
-    for ch in digits.chars().rev() {
-        let mut n = ch.to_digit(10).unwrap_or(0);
+    for &n in digits.iter().rev() {
+        let mut d = n;
         if alternate {
-            n *= 2;
-            if n > 9 {
-                n -= 9;
+            d *= 2;
+            if d > 9 {
+                d -= 9;
             }
         }
-        sum += n;
+        sum += d;
         alternate = !alternate;
     }
     sum.is_multiple_of(10)
@@ -57,8 +75,20 @@ mod tests {
         assert!(!luhn_check("abcdef"));
     }
 
+    #[test]
+    fn mixed_alpha_digit_rejected() {
+        assert!(!luhn_check("45abc39"));
+        assert!(!luhn_check("4539 14X8 0343 6467"));
+    }
+
     #[test]
     fn single_zero() {
         assert!(luhn_check("0"));
     }
+
+    #[test]
+    fn only_separators_rejected() {
+        assert!(!luhn_check("   "));
+        assert!(!luhn_check("---"));
+    }
 }
diff --git a/crates/nvisy-pattern/src/validators/mod.rs b/crates/nvisy-pattern/src/validators/mod.rs
index bb57cd13..4c1e4762 100644
--- a/crates/nvisy-pattern/src/validators/mod.rs
+++ b/crates/nvisy-pattern/src/validators/mod.rs
@@ -1,7 +1,7 @@
 //! Post-match validators for detected entity values.
 //!
 //! Patterns can reference a validator by name (e.g. `"validator": "luhn"`)
-//! to reduce false positives.  At detection time the name is resolved to a
+//! to reduce false positives. At detection time the name is resolved to a
 //! [`ValidatorFn`] via [`ValidatorResolver`].
 
 mod iban;
@@ -10,22 +10,19 @@ mod ssn;
 
 use std::collections::HashMap;
 
-pub use iban::validate_iban;
-pub use luhn::luhn_check;
-pub use ssn::validate_ssn;
+pub use self::iban::validate_iban;
+pub use self::luhn::luhn_check;
+pub use self::ssn::validate_ssn;
 
-/// Signature for a validation function: takes the matched text and returns
-/// `true` if the value is valid.
+/// Validation function signature: takes matched text, returns `true` if
+/// the value is valid.
 pub type ValidatorFn = fn(&str) -> bool;
 
 /// Maps validator names to [`ValidatorFn`]s.
 ///
-/// Created with the built-in validators via [`builtins`] (or
-/// [`Default`]), then optionally extended with [`register`] for
-/// custom validators.
-///
-/// [`builtins`]: Self::builtins
-/// [`register`]: Self::register
+/// Created with the built-in validators via [`builtins`](Self::builtins)
+/// (or [`Default`]), then optionally extended with
+/// [`register`](Self::register) for custom validators.
 #[derive(Debug, Clone)]
 pub struct ValidatorResolver {
     table: HashMap<&'static str, ValidatorFn>,
diff --git a/crates/nvisy-python/Cargo.toml b/crates/nvisy-python/Cargo.toml
index 6899fe59..f3db0c7e 100644
--- a/crates/nvisy-python/Cargo.toml
+++ b/crates/nvisy-python/Cargo.toml
@@ -2,8 +2,8 @@
 
 [package]
 name = "nvisy-python"
-description = "PyO3 bridge for AI NER/OCR detection via embedded Python"
-keywords = ["nvisy", "python", "pyo3", "ner"]
+description = "PyO3 bridge for Python-backed processing via embedded Python"
+keywords = ["nvisy", "python", "pyo3", "exif"]
 categories = ["api-bindings"]
 readme = "README.md"
 
@@ -35,6 +35,9 @@ hipstr = { workspace = true, features = [] }
 # Async runtime and parallelism
 tokio = { workspace = true, features = ["sync", "rt"] }
 
+# Observability
+tracing = { workspace = true, features = [] }
+
 # Python interop
 pyo3 = { workspace = true, features = ["auto-initialize"] }
 pyo3-async-runtimes = { workspace = true, features = [] }
diff --git a/crates/nvisy-python/src/bridge/error.rs b/crates/nvisy-python/src/bridge/error.rs
index 3d0e6852..c6a6b62b 100644
--- a/crates/nvisy-python/src/bridge/error.rs
+++ b/crates/nvisy-python/src/bridge/error.rs
@@ -1,15 +1,16 @@
-//! Conversion utilities from Python errors to [`Error`].
+//! Conversion from Python errors to [`Error`].
 
 use nvisy_core::Error;
 use pyo3::PyErr;
 use pyo3::types::PyTracebackMethods;
 
-/// Convert a [`PyErr`] into an [`Error`], preserving the Python traceback when available.
+/// Converts a [`PyErr`] into an [`Error`], preserving the Python
+/// traceback when available.
 pub fn from_pyerr(err: PyErr) -> Error {
     pyo3::Python::with_gil(|py| {
         let traceback = err.traceback(py).map(|tb| tb.format().unwrap_or_default());
         let msg = match traceback {
-            Some(tb) => format!("{}\n{}", err, tb),
+            Some(tb) => format!("{err}\n{tb}"),
             None => err.to_string(),
         };
         Error::runtime(msg, "python", false)
diff --git a/crates/nvisy-python/src/bridge/mod.rs b/crates/nvisy-python/src/bridge/mod.rs
index 9e9a6142..1c381dba 100644
--- a/crates/nvisy-python/src/bridge/mod.rs
+++ b/crates/nvisy-python/src/bridge/mod.rs
@@ -1,57 +1,88 @@
 //! Lightweight handle to a Python module loaded via PyO3.
 //!
-//! Provides [`PythonBridge`] — a thin wrapper that remembers which Python
-//! module to import — plus helpers for calling synchronous and asynchronous
+//! Provides [`PythonBridge`]: a thin wrapper that remembers which Python
+//! module to import, plus helpers for calling synchronous and asynchronous
 //! Python functions from Rust async code.
 
 mod error;
 
-pub use error::from_pyerr;
 use hipstr::HipStr;
 use nvisy_core::Error;
 use pyo3::prelude::*;
 use pyo3::types::PyDict;
 use serde_json::Value;
 
-/// Lightweight handle to a Python NER module.
+pub use self::error::from_pyerr;
+
+const TARGET: &str = "nvisy_python::bridge";
+
+/// Lightweight handle to a Python module.
 ///
-/// The bridge does **not** hold the GIL or any Python objects; it simply
-/// remembers which module to `import` when a detection function is called.
+/// The bridge does **not** hold the GIL or any Python objects: it simply
+/// remembers which module to `import` when a function is called.
 /// The default module name is `"nvisy_ai"`.
 #[derive(Clone)]
 pub struct PythonBridge {
-    /// Dotted Python module name to import (e.g., `"nvisy_ai"`).
+    /// Dotted Python module name to import (e.g. `"nvisy_ai"`).
     module_name: HipStr<'static>,
 }
 
+impl std::fmt::Debug for PythonBridge {
+    /// Formats the bridge for debugging, showing only the module name.
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("PythonBridge")
+            .field("module_name", &self.module_name.as_str())
+            .finish()
+    }
+}
+
 impl PythonBridge {
-    /// Create a new bridge that will load the given Python module.
+    /// Creates a new bridge that will load the given Python module.
     pub fn new(module_name: impl Into<HipStr<'static>>) -> Self {
         Self {
             module_name: module_name.into(),
         }
     }
 
-    /// Initialize Python and verify the module can be imported.
+    /// Initializes Python and verifies the module can be imported.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Python interpreter cannot be started or
+    /// the module cannot be imported.
+    #[tracing::instrument(target = TARGET, name = "bridge.init", skip(self), fields(module = %self.module_name))]
     pub fn init(&self) -> Result<(), Error> {
         Python::with_gil(|py| {
             py.import(&*self.module_name).map_err(from_pyerr)?;
+            tracing::debug!(target: TARGET, "python module imported");
             Ok(())
         })
     }
 
-    /// Get the module name.
+    /// Returns the dotted Python module name.
+    #[must_use]
     pub fn module_name(&self) -> &str {
         &self.module_name
     }
 
-    /// Call a **synchronous** Python method on the bridge module inside
+    /// Calls a **synchronous** Python method on the bridge module inside
     /// `spawn_blocking` + `Python::with_gil`.
     ///
-    /// `build_kwargs` receives a GIL token and must return a [`PyDict`] of
-    /// keyword arguments.  The method is invoked as
-    /// `module.<method>(**, kwargs)` and the return value is deserialized
+    /// `build_kwargs` receives a GIL token and must return a [`PyDict`]
+    /// of keyword arguments. The method is invoked as
+    /// `module.<method>(**kwargs)` and the return value is deserialized
     /// into `Vec<Value>`.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Python call fails or the return value
+    /// cannot be deserialized.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "bridge.call_sync",
+        skip(self, build_kwargs),
+        fields(module = %self.module_name, method),
+    )]
     pub async fn call_sync<F>(&self, method: &str, build_kwargs: F) -> Result<Vec<Value>, Error>
     where
         F: FnOnce(Python<'_>) -> Result<Bound<'_, PyDict>, Error> + Send + 'static,
@@ -59,6 +90,8 @@ impl PythonBridge {
         let module_name = self.module_name.clone();
         let method = method.to_string();
 
+        tracing::Span::current().record("method", &method);
+
         tokio::task::spawn_blocking(move || {
             Python::with_gil(|py| {
                 let module = py.import(&*module_name).map_err(from_pyerr)?;
@@ -70,7 +103,7 @@ impl PythonBridge {
 
                 pythonize::depythonize::<Vec<Value>>(&result).map_err(|e| {
                     Error::runtime(
-                        format!("Failed to deserialize {} result: {}", method, e),
+                        format!("failed to deserialize {method} result: {e}"),
                         "python",
                         false,
                     )
@@ -78,17 +111,28 @@ impl PythonBridge {
             })
         })
         .await
-        .map_err(|e| Error::runtime(format!("Task join error: {}", e), "python", false))?
+        .map_err(|e| Error::runtime(format!("blocking task panicked: {e}"), "python", false))?
     }
 
-    /// Call an **asynchronous** (coroutine) Python method on the bridge
+    /// Calls an **asynchronous** (coroutine) Python method on the bridge
     /// module.
     ///
     /// Acquires the GIL, invokes `module.<method>(**kwargs)` to obtain a
     /// Python coroutine, converts it to a Rust [`Future`] via
     /// [`pyo3_async_runtimes::tokio::into_future`], and awaits it on the
-    /// Tokio runtime.  The coroutine's return value is deserialized into
+    /// Tokio runtime. The coroutine's return value is deserialized into
     /// `Vec<Value>`.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Python call fails or the return value
+    /// cannot be deserialized.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "bridge.call_async",
+        skip(self, build_kwargs),
+        fields(module = %self.module_name, method),
+    )]
     pub async fn call_async<F>(&self, method: &str, build_kwargs: F) -> Result<Vec<Value>, Error>
     where
         F: FnOnce(Python<'_>) -> Result<Bound<'_, PyDict>, Error> + Send + 'static,
@@ -96,6 +140,8 @@ impl PythonBridge {
         use std::future::Future;
         use std::pin::Pin;
 
+        tracing::Span::current().record("method", method);
+
         let future: Pin<Box<dyn Future<Output = PyResult<PyObject>> + Send>> =
             Python::with_gil(|py| -> Result<_, Error> {
                 let module = py.import(&*self.module_name).map_err(from_pyerr)?;
@@ -115,7 +161,7 @@ impl PythonBridge {
         Python::with_gil(|py| {
             pythonize::depythonize::<Vec<Value>>(py_result.bind(py)).map_err(|e| {
                 Error::runtime(
-                    format!("Failed to deserialize {} result: {}", method, e),
+                    format!("failed to deserialize {method} result: {e}"),
                     "python",
                     false,
                 )
@@ -125,6 +171,7 @@ impl PythonBridge {
 }
 
 impl Default for PythonBridge {
+    /// Creates a bridge with the default module name `"nvisy_ai"`.
     fn default() -> Self {
         Self::new("nvisy_ai")
     }
diff --git a/crates/nvisy-python/src/exif/mod.rs b/crates/nvisy-python/src/exif/mod.rs
new file mode 100644
index 00000000..7eac7d8b
--- /dev/null
+++ b/crates/nvisy-python/src/exif/mod.rs
@@ -0,0 +1,14 @@
+//! EXIF metadata extraction via the Python backend.
+//!
+//! Provides [`ExifModule`]: a configured handle that calls
+//! `nvisy_ai.extract_exif()` through the [`PythonBridge`]
+//! to extract EXIF metadata from images. Returns raw JSON values:
+//! metadata construction is handled by the caller.
+//!
+//! [`PythonBridge`]: crate::bridge::PythonBridge
+
+mod module;
+mod params;
+
+pub use self::module::ExifModule;
+pub use self::params::ExifParams;
diff --git a/crates/nvisy-python/src/exif/module.rs b/crates/nvisy-python/src/exif/module.rs
new file mode 100644
index 00000000..db833f2f
--- /dev/null
+++ b/crates/nvisy-python/src/exif/module.rs
@@ -0,0 +1,134 @@
+//! [`ExifModule`]: EXIF extraction via the Python bridge.
+
+use nvisy_core::Error;
+use nvisy_core::content::ContentData;
+use pyo3::prelude::*;
+use pyo3::types::PyDict;
+use serde_json::Value;
+
+use super::params::ExifParams;
+use crate::bridge::{PythonBridge, from_pyerr};
+
+const TARGET: &str = "nvisy_python::exif";
+
+/// Configured handle for EXIF metadata extraction.
+///
+/// Holds a [`PythonBridge`] and [`ExifParams`] so callers do not need
+/// to pass them on every invocation.
+#[derive(Debug, Clone)]
+pub struct ExifModule {
+    /// Python bridge used to call into the `nvisy_ai` module.
+    bridge: PythonBridge,
+    /// Extraction parameters applied to every call.
+    params: ExifParams,
+}
+
+impl ExifModule {
+    /// Creates a new module with the given bridge and parameters.
+    pub fn new(bridge: PythonBridge, params: ExifParams) -> Self {
+        Self { bridge, params }
+    }
+
+    /// Returns a reference to the underlying bridge.
+    #[must_use]
+    pub fn bridge(&self) -> &PythonBridge {
+        &self.bridge
+    }
+
+    /// Returns a reference to the current parameters.
+    #[must_use]
+    pub fn params(&self) -> &ExifParams {
+        &self.params
+    }
+
+    /// Calls Python `extract_exif()` synchronously via `spawn_blocking`.
+    ///
+    /// Returns raw JSON dicts containing EXIF tag key-value pairs.
+    /// The MIME type is resolved from `content.content_type()`,
+    /// defaulting to `"application/octet-stream"` when unavailable.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Python call fails or the return value
+    /// cannot be deserialized.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "exif.extract",
+        skip(self, content),
+        fields(data_len = content.size()),
+    )]
+    pub async fn extract(&self, content: ContentData) -> Result<Vec<Value>, Error> {
+        let request = ExifRequest::new(content, self.params);
+
+        self.bridge
+            .call_sync("extract_exif", move |py| request.to_kwargs(py))
+            .await
+    }
+
+    /// Calls Python `extract_exif()` as a **coroutine** (async Python
+    /// function).
+    ///
+    /// Returns raw JSON dicts containing EXIF tag key-value pairs.
+    /// The MIME type is resolved from `content.content_type()`,
+    /// defaulting to `"application/octet-stream"` when unavailable.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Python call fails or the return value
+    /// cannot be deserialized.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "exif.extract_async",
+        skip(self, content),
+        fields(data_len = content.size()),
+    )]
+    pub async fn extract_async(&self, content: ContentData) -> Result<Vec<Value>, Error> {
+        let request = ExifRequest::new(content, self.params);
+
+        self.bridge
+            .call_async("extract_exif", move |py| request.to_kwargs(py))
+            .await
+    }
+}
+
+/// Owned snapshot of a single EXIF extraction request.
+///
+/// Wraps [`ContentData`] and [`ExifParams`] so they can be moved into
+/// a `Send + 'static` closure for the bridge call. No extra allocations:
+/// `ContentData` is internally arc-backed.
+struct ExifRequest {
+    /// Content to extract EXIF metadata from.
+    content: ContentData,
+    /// Extraction parameters.
+    params: ExifParams,
+}
+
+impl ExifRequest {
+    /// Creates a new request from content data and parameters.
+    fn new(content: ContentData, params: ExifParams) -> Self {
+        Self { content, params }
+    }
+
+    /// Converts the request into a Python keyword arguments dict.
+    fn to_kwargs<'py>(&self, py: Python<'py>) -> Result<Bound<'py, PyDict>, Error> {
+        let mime_type = self
+            .content
+            .content_type()
+            .unwrap_or("application/octet-stream");
+
+        let kwargs = PyDict::new(py);
+        kwargs
+            .set_item("image_bytes", self.content.as_bytes())
+            .map_err(from_pyerr)?;
+        kwargs
+            .set_item("mime_type", mime_type)
+            .map_err(from_pyerr)?;
+        kwargs
+            .set_item("include_gps", self.params.include_gps)
+            .map_err(from_pyerr)?;
+        kwargs
+            .set_item("include_thumbnail", self.params.include_thumbnail)
+            .map_err(from_pyerr)?;
+        Ok(kwargs)
+    }
+}
diff --git a/crates/nvisy-python/src/exif/params.rs b/crates/nvisy-python/src/exif/params.rs
new file mode 100644
index 00000000..0352b201
--- /dev/null
+++ b/crates/nvisy-python/src/exif/params.rs
@@ -0,0 +1,19 @@
+//! [`ExifParams`]: configuration for EXIF extraction calls.
+
+/// Parameters for EXIF extraction.
+#[derive(Debug, Clone, Copy)]
+pub struct ExifParams {
+    /// Whether to include GPS coordinates in the output.
+    pub include_gps: bool,
+    /// Whether to include thumbnail data in the output.
+    pub include_thumbnail: bool,
+}
+
+impl Default for ExifParams {
+    fn default() -> Self {
+        Self {
+            include_gps: true,
+            include_thumbnail: false,
+        }
+    }
+}
diff --git a/crates/nvisy-python/src/lib.rs b/crates/nvisy-python/src/lib.rs
index 96ee78ef..54c30782 100644
--- a/crates/nvisy-python/src/lib.rs
+++ b/crates/nvisy-python/src/lib.rs
@@ -3,9 +3,7 @@
 #![doc = include_str!("../README.md")]
 
 pub mod bridge;
-pub mod ner;
-pub mod ocr;
-pub mod transcribe;
+pub mod exif;
 
 #[doc(hidden)]
 pub mod prelude;
diff --git a/crates/nvisy-python/src/ner/mod.rs b/crates/nvisy-python/src/ner/mod.rs
deleted file mode 100644
index 71d568e7..00000000
--- a/crates/nvisy-python/src/ner/mod.rs
+++ /dev/null
@@ -1,139 +0,0 @@
-//! Named-entity recognition (NER) detection via a Python AI backend.
-//!
-//! Functions in this module call into the Python `nvisy_ai` module via
-//! [`PythonBridge`] and return raw JSON values.  Entity construction is
-//! handled by the pipeline's `NerBackend` / `DetectNerAction` layer.
-
-use nvisy_core::Error;
-use pyo3::prelude::*;
-use pyo3::types::PyDict;
-use serde_json::Value;
-
-use crate::bridge::{PythonBridge, from_pyerr};
-
-/// Parameters for NER detection, independent of any pipeline types.
-#[derive(Debug, Clone)]
-pub struct NerParams {
-    /// Entity type labels to detect (e.g., `["PERSON", "SSN"]`).
-    pub entity_types: Vec<String>,
-    /// Minimum confidence score to include a detection (0.0 -- 1.0).
-    pub confidence_threshold: f64,
-}
-
-/// Call Python `detect_ner()` synchronously via `spawn_blocking`.
-///
-/// Returns raw JSON dicts — no domain-type construction.
-pub async fn detect_ner(
-    bridge: &PythonBridge,
-    text: &str,
-    params: &NerParams,
-) -> Result<Vec<Value>, Error> {
-    let text = text.to_string();
-    let params = params.clone();
-
-    bridge
-        .call_sync("detect_ner", move |py| {
-            let kwargs = PyDict::new(py);
-            kwargs.set_item("text", &text).map_err(from_pyerr)?;
-            kwargs
-                .set_item("entity_types", &params.entity_types)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("confidence_threshold", params.confidence_threshold)
-                .map_err(from_pyerr)?;
-            Ok(kwargs)
-        })
-        .await
-}
-
-/// Call Python `detect_ner_image()` synchronously via `spawn_blocking`.
-///
-/// Returns raw JSON dicts — no domain-type construction.
-pub async fn detect_ner_image(
-    bridge: &PythonBridge,
-    image_data: &[u8],
-    mime_type: &str,
-    params: &NerParams,
-) -> Result<Vec<Value>, Error> {
-    let image_data = image_data.to_vec();
-    let mime_type = mime_type.to_string();
-    let params = params.clone();
-
-    bridge
-        .call_sync("detect_ner_image", move |py| {
-            let kwargs = PyDict::new(py);
-            kwargs
-                .set_item("image_bytes", &image_data[..])
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("mime_type", &mime_type)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("entity_types", &params.entity_types)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("confidence_threshold", params.confidence_threshold)
-                .map_err(from_pyerr)?;
-            Ok(kwargs)
-        })
-        .await
-}
-
-/// Call Python `detect_ner()` as a **coroutine** (async Python function).
-///
-/// Returns raw JSON dicts — no domain-type construction.
-pub async fn detect_ner_async(
-    bridge: &PythonBridge,
-    text: &str,
-    params: &NerParams,
-) -> Result<Vec<Value>, Error> {
-    let text = text.to_string();
-    let params = params.clone();
-
-    bridge
-        .call_async("detect_ner", move |py| {
-            let kwargs = PyDict::new(py);
-            kwargs.set_item("text", &text).map_err(from_pyerr)?;
-            kwargs
-                .set_item("entity_types", &params.entity_types)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("confidence_threshold", params.confidence_threshold)
-                .map_err(from_pyerr)?;
-            Ok(kwargs)
-        })
-        .await
-}
-
-/// Call Python `detect_ner_image()` as a **coroutine** (async Python function).
-///
-/// Returns raw JSON dicts — no domain-type construction.
-pub async fn detect_ner_image_async(
-    bridge: &PythonBridge,
-    image_data: &[u8],
-    mime_type: &str,
-    params: &NerParams,
-) -> Result<Vec<Value>, Error> {
-    let image_data = image_data.to_vec();
-    let mime_type = mime_type.to_string();
-    let params = params.clone();
-
-    bridge
-        .call_async("detect_ner_image", move |py| {
-            let kwargs = PyDict::new(py);
-            kwargs
-                .set_item("image_bytes", &image_data[..])
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("mime_type", &mime_type)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("entity_types", &params.entity_types)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("confidence_threshold", params.confidence_threshold)
-                .map_err(from_pyerr)?;
-            Ok(kwargs)
-        })
-        .await
-}
diff --git a/crates/nvisy-python/src/ocr/mod.rs b/crates/nvisy-python/src/ocr/mod.rs
deleted file mode 100644
index 87c12b36..00000000
--- a/crates/nvisy-python/src/ocr/mod.rs
+++ /dev/null
@@ -1,96 +0,0 @@
-//! OCR text extraction via the Python backend.
-//!
-//! Calls `nvisy_ai.detect_ocr()` through the Python bridge to perform
-//! optical character recognition on images, returning raw JSON values.
-//! Entity construction is handled by the pipeline's `OcrBackend` /
-//! `GenerateOcrAction` layer.
-
-use nvisy_core::Error;
-use pyo3::prelude::*;
-use pyo3::types::PyDict;
-use serde_json::Value;
-
-use crate::bridge::{PythonBridge, from_pyerr};
-
-/// Parameters for OCR detection, independent of any pipeline types.
-#[derive(Debug, Clone)]
-pub struct OcrParams {
-    /// Language hint (e.g. `"eng"` for English).
-    pub language: String,
-    /// OCR engine to use (`"tesseract"`, `"google-vision"`, `"aws-textract"`).
-    pub engine: String,
-    /// Minimum confidence threshold for OCR results.
-    pub confidence_threshold: f64,
-}
-
-/// Call Python `detect_ocr()` synchronously via `spawn_blocking`.
-///
-/// Returns raw JSON dicts — no domain-type construction.
-pub async fn detect_ocr(
-    bridge: &PythonBridge,
-    image_data: &[u8],
-    mime_type: &str,
-    params: &OcrParams,
-) -> Result<Vec<Value>, Error> {
-    let image_data = image_data.to_vec();
-    let mime_type = mime_type.to_string();
-    let params = params.clone();
-
-    bridge
-        .call_sync("detect_ocr", move |py| {
-            let kwargs = PyDict::new(py);
-            kwargs
-                .set_item("image_bytes", &image_data[..])
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("mime_type", &mime_type)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("language", &params.language)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("engine", &params.engine)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("confidence_threshold", params.confidence_threshold)
-                .map_err(from_pyerr)?;
-            Ok(kwargs)
-        })
-        .await
-}
-
-/// Call Python `detect_ocr()` as a **coroutine** (async Python function).
-///
-/// Returns raw JSON dicts — no domain-type construction.
-pub async fn detect_ocr_async(
-    bridge: &PythonBridge,
-    image_data: &[u8],
-    mime_type: &str,
-    params: &OcrParams,
-) -> Result<Vec<Value>, Error> {
-    let image_data = image_data.to_vec();
-    let mime_type = mime_type.to_string();
-    let params = params.clone();
-
-    bridge
-        .call_async("detect_ocr", move |py| {
-            let kwargs = PyDict::new(py);
-            kwargs
-                .set_item("image_bytes", &image_data[..])
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("mime_type", &mime_type)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("language", &params.language)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("engine", &params.engine)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("confidence_threshold", params.confidence_threshold)
-                .map_err(from_pyerr)?;
-            Ok(kwargs)
-        })
-        .await
-}
diff --git a/crates/nvisy-python/src/prelude.rs b/crates/nvisy-python/src/prelude.rs
index e3b932b9..60c66244 100644
--- a/crates/nvisy-python/src/prelude.rs
+++ b/crates/nvisy-python/src/prelude.rs
@@ -1,4 +1,4 @@
 //! Convenience re-exports.
+
 pub use crate::bridge::PythonBridge;
-pub use crate::ner::NerParams;
-pub use crate::ocr::OcrParams;
+pub use crate::exif::{ExifModule, ExifParams};
diff --git a/crates/nvisy-python/src/transcribe/mod.rs b/crates/nvisy-python/src/transcribe/mod.rs
deleted file mode 100644
index 54dc337c..00000000
--- a/crates/nvisy-python/src/transcribe/mod.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-//! Speech-to-text transcription via the Python backend.
-//!
-//! Calls `nvisy_ai.transcribe()` through the Python bridge to perform
-//! speech transcription on audio, returning raw JSON values.
-
-use nvisy_core::Error;
-use pyo3::prelude::*;
-use pyo3::types::PyDict;
-use serde_json::Value;
-
-use crate::bridge::{PythonBridge, from_pyerr};
-
-/// Parameters for transcription, independent of any pipeline types.
-#[derive(Debug, Clone)]
-pub struct TranscribeParams {
-    /// BCP-47 language tag for transcription.
-    pub language: String,
-    /// Whether to perform speaker diarization.
-    pub enable_speaker_diarization: bool,
-    /// Minimum confidence threshold for results.
-    pub confidence_threshold: f64,
-}
-
-/// Call Python `transcribe()` synchronously via `spawn_blocking`.
-///
-/// Returns raw JSON dicts — no domain-type construction.
-pub async fn transcribe(
-    bridge: &PythonBridge,
-    audio_data: &[u8],
-    mime_type: &str,
-    params: &TranscribeParams,
-) -> Result<Vec<Value>, Error> {
-    let audio_data = audio_data.to_vec();
-    let mime_type = mime_type.to_string();
-    let params = params.clone();
-
-    bridge
-        .call_sync("transcribe", move |py| {
-            let kwargs = PyDict::new(py);
-            kwargs
-                .set_item("audio_bytes", &audio_data[..])
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("mime_type", &mime_type)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("language", &params.language)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item(
-                    "enable_speaker_diarization",
-                    params.enable_speaker_diarization,
-                )
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("confidence_threshold", params.confidence_threshold)
-                .map_err(from_pyerr)?;
-            Ok(kwargs)
-        })
-        .await
-}
-
-/// Call Python `transcribe()` as a **coroutine** (async Python function).
-///
-/// Returns raw JSON dicts — no domain-type construction.
-pub async fn transcribe_async(
-    bridge: &PythonBridge,
-    audio_data: &[u8],
-    mime_type: &str,
-    params: &TranscribeParams,
-) -> Result<Vec<Value>, Error> {
-    let audio_data = audio_data.to_vec();
-    let mime_type = mime_type.to_string();
-    let params = params.clone();
-
-    bridge
-        .call_async("transcribe", move |py| {
-            let kwargs = PyDict::new(py);
-            kwargs
-                .set_item("audio_bytes", &audio_data[..])
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("mime_type", &mime_type)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("language", &params.language)
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item(
-                    "enable_speaker_diarization",
-                    params.enable_speaker_diarization,
-                )
-                .map_err(from_pyerr)?;
-            kwargs
-                .set_item("confidence_threshold", params.confidence_threshold)
-                .map_err(from_pyerr)?;
-            Ok(kwargs)
-        })
-        .await
-}
diff --git a/crates/nvisy-registry/Cargo.toml b/crates/nvisy-registry/Cargo.toml
index bb383c18..399ca45d 100644
--- a/crates/nvisy-registry/Cargo.toml
+++ b/crates/nvisy-registry/Cargo.toml
@@ -33,6 +33,9 @@ fjall = { workspace = true, features = [] }
 # Async runtime and parallelism
 tokio = { workspace = true, features = ["sync", "rt"] }
 
+# Observability
+tracing = { workspace = true, features = [] }
+
 # (De)serialization
 serde = { workspace = true, features = [] }
 serde_json = { workspace = true, features = [] }
diff --git a/crates/nvisy-registry/src/handler/content.rs b/crates/nvisy-registry/src/handler/content.rs
new file mode 100644
index 00000000..55e29dda
--- /dev/null
+++ b/crates/nvisy-registry/src/handler/content.rs
@@ -0,0 +1,154 @@
+//! [`ContentHandle`]: async handle to stored content data and metadata.
+
+use std::fmt;
+
+use bytes::Bytes;
+use fjall::Keyspace;
+use nvisy_core::content::{ContentData, ContentMetadata, ContentSource};
+use nvisy_core::{Error, ErrorKind, Result};
+use uuid::Uuid;
+
+use crate::registry::composite_key;
+
+const COMPONENT: &str = "nvisy-registry::content";
+
+/// Lightweight handle to a content entry stored in the registry.
+///
+/// Holds references to the fjall keyspaces so it can read content data
+/// and metadata on demand. Cloning is cheap: fjall handles are
+/// internally `Arc`-wrapped.
+#[derive(Clone)]
+pub struct ContentHandle {
+    /// Actor identity that owns this content entry.
+    actor_id: Uuid,
+    /// Source identifier for the stored content.
+    content_source: ContentSource,
+    /// Keyspace storing raw content bytes.
+    content_ks: Keyspace,
+    /// Keyspace storing serialized content metadata.
+    content_meta_ks: Keyspace,
+}
+
+impl fmt::Debug for ContentHandle {
+    /// Formats the handle for debugging, omitting keyspace internals.
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ContentHandle")
+            .field("actor_id", &self.actor_id)
+            .field("content_source", &self.content_source)
+            .finish_non_exhaustive()
+    }
+}
+
+impl ContentHandle {
+    /// Creates a new handle from pre-resolved keyspaces.
+    ///
+    /// This is `pub(crate)` because only [`Registry`](crate::Registry)
+    /// should construct handles after verifying the entry exists.
+    pub(crate) fn new(
+        actor_id: Uuid,
+        content_source: ContentSource,
+        content_ks: Keyspace,
+        content_meta_ks: Keyspace,
+    ) -> Self {
+        Self {
+            actor_id,
+            content_source,
+            content_ks,
+            content_meta_ks,
+        }
+    }
+
+    /// Returns the content source identifier.
+    #[must_use]
+    pub fn content_source(&self) -> ContentSource {
+        self.content_source
+    }
+
+    /// Returns the actor ID that owns this content.
+    #[must_use]
+    pub fn actor_id(&self) -> Uuid {
+        self.actor_id
+    }
+
+    /// Reads the content bytes from the store.
+    ///
+    /// The read is dispatched to a blocking thread via
+    /// [`spawn_blocking`](tokio::task::spawn_blocking) to avoid
+    /// blocking the async runtime on fjall I/O.
+    #[tracing::instrument(
+        target = COMPONENT,
+        name = "content.read_data",
+        skip(self),
+        fields(actor_id = %self.actor_id, source_id = %self.content_source.as_uuid()),
+    )]
+    pub async fn content_data(&self) -> Result<ContentData> {
+        let key = composite_key(self.actor_id, self.content_source.as_uuid());
+        let source = self.content_source;
+        let ks = self.content_ks.clone();
+
+        tokio::task::spawn_blocking(move || -> Result<ContentData> {
+            let value = ks.get(key).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to read content data")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+
+            let guard = value.ok_or_else(|| {
+                Error::new(
+                    ErrorKind::NotFound,
+                    format!("content data not found: {}", source.as_uuid()),
+                )
+                .with_component(COMPONENT)
+            })?;
+
+            Ok(ContentData::new(source, Bytes::copy_from_slice(&guard)))
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })?
+    }
+
+    /// Reads the content metadata from the store.
+    ///
+    /// Returns [`ContentMetadata::default()`] when the metadata key
+    /// exists but has no value (e.g. content registered without metadata).
+    #[tracing::instrument(
+        target = COMPONENT,
+        name = "content.read_metadata",
+        skip(self),
+        fields(actor_id = %self.actor_id, source_id = %self.content_source.as_uuid()),
+    )]
+    pub async fn metadata(&self) -> Result<ContentMetadata> {
+        let key = composite_key(self.actor_id, self.content_source.as_uuid());
+        let ks = self.content_meta_ks.clone();
+
+        tokio::task::spawn_blocking(move || -> Result<ContentMetadata> {
+            let value = ks.get(key).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to read content metadata")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+
+            match value {
+                Some(guard) => serde_json::from_slice(&guard).map_err(|err| {
+                    Error::new(
+                        ErrorKind::Serialization,
+                        "failed to deserialize content metadata",
+                    )
+                    .with_component(COMPONENT)
+                    .with_source(err)
+                }),
+                None => Ok(ContentMetadata::default()),
+            }
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })?
+    }
+}
diff --git a/crates/nvisy-registry/src/handler/context.rs b/crates/nvisy-registry/src/handler/context.rs
new file mode 100644
index 00000000..77cc3c91
--- /dev/null
+++ b/crates/nvisy-registry/src/handler/context.rs
@@ -0,0 +1,104 @@
+//! [`ContextHandle`]: async handle to a stored detection context.
+
+use std::fmt;
+
+use fjall::Keyspace;
+use nvisy_core::content::ContentSource;
+use nvisy_core::{Error, ErrorKind, Result};
+use nvisy_ontology::context::Context;
+use uuid::Uuid;
+
+use crate::registry::composite_key;
+
+const COMPONENT: &str = "nvisy-registry::context";
+
+/// Lightweight handle to a context entry stored in the registry.
+///
+/// Holds a reference to the contexts keyspace so it can deserialize the
+/// stored JSON on demand. Cloning is cheap: fjall handles are
+/// internally `Arc`-wrapped.
+#[derive(Clone)]
+pub struct ContextHandle {
+    /// Actor identity that owns this context entry.
+    actor_id: Uuid,
+    /// Content source this context is associated with.
+    source: ContentSource,
+    /// Keyspace storing serialized context JSON.
+    contexts_ks: Keyspace,
+}
+
+impl fmt::Debug for ContextHandle {
+    /// Formats the handle for debugging, omitting keyspace internals.
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ContextHandle")
+            .field("actor_id", &self.actor_id)
+            .field("source", &self.source)
+            .finish_non_exhaustive()
+    }
+}
+
+impl ContextHandle {
+    /// Creates a new handle from a pre-resolved keyspace.
+    ///
+    /// This is `pub(crate)` because only [`Registry`](crate::Registry)
+    /// should construct handles after verifying the entry exists.
+    pub(crate) fn new(actor_id: Uuid, source: ContentSource, contexts_ks: Keyspace) -> Self {
+        Self {
+            actor_id,
+            source,
+            contexts_ks,
+        }
+    }
+
+    /// Returns the content source identifier.
+    #[must_use]
+    pub fn source(&self) -> ContentSource {
+        self.source
+    }
+
+    /// Returns the actor ID that owns this context.
+    #[must_use]
+    pub fn actor_id(&self) -> Uuid {
+        self.actor_id
+    }
+
+    /// Reads and deserializes the context from the store.
+    ///
+    /// The read is dispatched to a blocking thread via
+    /// [`spawn_blocking`](tokio::task::spawn_blocking) to avoid
+    /// blocking the async runtime on fjall I/O.
+    #[tracing::instrument(
+        target = COMPONENT,
+        name = "context.read",
+        skip(self),
+        fields(actor_id = %self.actor_id, source_id = %self.source.as_uuid()),
+    )]
+    pub async fn context(&self) -> Result<Context> {
+        let key = composite_key(self.actor_id, self.source.as_uuid());
+        let ks = self.contexts_ks.clone();
+
+        tokio::task::spawn_blocking(move || -> Result<Context> {
+            let value = ks.get(key).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to read context")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+
+            let guard = value.ok_or_else(|| {
+                Error::new(ErrorKind::NotFound, "context data not found").with_component(COMPONENT)
+            })?;
+
+            serde_json::from_slice(&guard).map_err(|err| {
+                Error::new(ErrorKind::Serialization, "failed to deserialize context")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })?
+    }
+}
diff --git a/crates/nvisy-registry/src/handler/mod.rs b/crates/nvisy-registry/src/handler/mod.rs
new file mode 100644
index 00000000..b8e68fbe
--- /dev/null
+++ b/crates/nvisy-registry/src/handler/mod.rs
@@ -0,0 +1,7 @@
+//! Async handles for reading stored content and contexts.
+
+mod content;
+mod context;
+
+pub use self::content::ContentHandle;
+pub use self::context::ContextHandle;
diff --git a/crates/nvisy-registry/src/lib.rs b/crates/nvisy-registry/src/lib.rs
index f8fa9531..a45658ca 100644
--- a/crates/nvisy-registry/src/lib.rs
+++ b/crates/nvisy-registry/src/lib.rs
@@ -1,19 +1,12 @@
-//! Actor-scoped content and context storage backed by fjall.
-//!
-//! This crate provides [`Registry`], a unified store that manages both
-//! content files and detection contexts. Every resource is scoped by a
-//! `Uuid` actor identity, so listing and reading are inherently
-//! actor-isolated at the database level via composite keys.
-//!
-//! # Core Types
-//!
-//! - [`Registry`]: Shared, clonable handle to the fjall database
-//! - [`ContentHandle`]: Lightweight async handle to stored content
-//! - [`ContextHandle`]: Lightweight async handle to a stored context
+#![forbid(unsafe_code)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
 
-mod store;
+mod handler;
+mod registry;
 
 #[doc(hidden)]
 pub mod prelude;
 
-pub use store::{ContentHandle, ContextHandle, Registry};
+pub use self::handler::{ContentHandle, ContextHandle};
+pub use self::registry::Registry;
diff --git a/crates/nvisy-registry/src/registry/mod.rs b/crates/nvisy-registry/src/registry/mod.rs
new file mode 100644
index 00000000..31e1b438
--- /dev/null
+++ b/crates/nvisy-registry/src/registry/mod.rs
@@ -0,0 +1,867 @@
+//! [`Registry`]: actor-scoped content and context store backed by fjall.
+
+use std::path::{Path, PathBuf};
+
+use fjall::{Database, Keyspace, KeyspaceCreateOptions, KvSeparationOptions};
+use nvisy_core::content::{Content, ContentSource};
+use nvisy_core::{Error, ErrorKind, Result};
+use nvisy_ontology::context::Context;
+use uuid::Uuid;
+
+use crate::handler::{ContentHandle, ContextHandle};
+
+const TARGET: &str = "nvisy_registry";
+const COMPONENT: &str = "nvisy-registry";
+
+/// Builds a 32-byte composite key: `[actor_id: 16][resource_id: 16]`.
+///
+/// Used by both [`ContentHandle`] and [`ContextHandle`] to scope every
+/// read/write to a specific actor.
+pub(crate) fn composite_key(actor_id: Uuid, resource_id: Uuid) -> [u8; 32] {
+    let mut key = [0u8; 32];
+    key[..16].copy_from_slice(actor_id.as_bytes());
+    key[16..].copy_from_slice(resource_id.as_bytes());
+    key
+}
+
+/// Actor-scoped content and context store backed by fjall.
+///
+/// Stores content data, content metadata, and contexts in three keyspaces.
+/// Every key is a 32-byte composite of `[actor_id][resource_id]`, so all
+/// operations are inherently scoped to a single actor.
+///
+/// All handles are internally `Arc`-wrapped, making `Registry` cheap to
+/// clone and safe to share across threads.
+#[derive(Clone)]
+pub struct Registry {
+    /// Filesystem path where the fjall database is stored.
+    base_dir: PathBuf,
+    /// Underlying fjall database handle.
+    db: Database,
+    /// Keyspace for raw content bytes (blob-separated).
+    content_ks: Keyspace,
+    /// Keyspace for serialized content metadata.
+    content_meta_ks: Keyspace,
+    /// Keyspace for serialized detection contexts.
+    contexts_ks: Keyspace,
+}
+
+impl std::fmt::Debug for Registry {
+    /// Formats the registry for debugging, showing only the base directory.
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Registry")
+            .field("base_dir", &self.base_dir)
+            .finish_non_exhaustive()
+    }
+}
+
+impl Registry {
+    /// Opens (or creates) the fjall database at `path`.
+    ///
+    /// Three keyspaces are created:
+    /// - `"content"`: blob separation for efficient large-value storage
+    /// - `"content_meta"`: default configuration
+    /// - `"contexts"`: default configuration
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the database or keyspaces cannot be opened.
+    #[tracing::instrument(target = TARGET, name = "registry.open", fields(path = %path.as_ref().display()))]
+    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
+        let base_dir = path.as_ref().to_path_buf();
+
+        let db = Database::builder(&base_dir).open().map_err(|err| {
+            Error::new(
+                ErrorKind::Internal,
+                format!("failed to open database: {}", base_dir.display()),
+            )
+            .with_component(COMPONENT)
+            .with_source(err)
+        })?;
+
+        let content_ks = db
+            .keyspace("content", || {
+                KeyspaceCreateOptions::default()
+                    .with_kv_separation(Some(KvSeparationOptions::default()))
+            })
+            .map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to open content keyspace")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+
+        let content_meta_ks = db
+            .keyspace("content_meta", KeyspaceCreateOptions::default)
+            .map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to open content_meta keyspace")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+
+        let contexts_ks = db
+            .keyspace("contexts", KeyspaceCreateOptions::default)
+            .map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to open contexts keyspace")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+
+        tracing::debug!(target: TARGET, "registry opened");
+
+        Ok(Self {
+            base_dir,
+            db,
+            content_ks,
+            content_meta_ks,
+            contexts_ks,
+        })
+    }
+
+    // -- Content operations --------------------------------------------------
+
+    /// Registers content, writing its bytes and metadata to the store.
+    ///
+    /// Returns a [`ContentHandle`] for subsequent reads.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if serialization or the underlying write fails.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "registry.register_content",
+        skip(self, content),
+        fields(actor_id = %actor_id),
+    )]
+    pub async fn register_content(
+        &self,
+        actor_id: Uuid,
+        content: Content,
+    ) -> Result<ContentHandle> {
+        let content_source = content.content_source();
+        let key = composite_key(actor_id, content_source.as_uuid());
+        let data = content.as_bytes().to_vec();
+
+        let (_, content_metadata) = content.into_parts();
+        let meta_bytes =
+            serde_json::to_vec(&content_metadata.unwrap_or_default()).map_err(|err| {
+                Error::new(
+                    ErrorKind::Serialization,
+                    "failed to serialize content metadata",
+                )
+                .with_component(COMPONENT)
+                .with_source(err)
+            })?;
+
+        let content_ks = self.content_ks.clone();
+        let meta_ks = self.content_meta_ks.clone();
+        let db = self.db.clone();
+
+        tokio::task::spawn_blocking(move || -> Result<()> {
+            content_ks.insert(key, &data).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to write content data")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            meta_ks.insert(key, &meta_bytes).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to write content metadata")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to persist database")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            Ok(())
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })??;
+
+        tracing::trace!(
+            target: TARGET,
+            source_id = %content_source.as_uuid(),
+            "content registered",
+        );
+
+        Ok(ContentHandle::new(
+            actor_id,
+            content_source,
+            self.content_ks.clone(),
+            self.content_meta_ks.clone(),
+        ))
+    }
+
+    /// Looks up previously registered content by actor and content ID.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`ErrorKind::NotFound`] if no entry exists for the given key.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "registry.read_content",
+        skip(self),
+        fields(actor_id = %actor_id, content_id = %content_id),
+    )]
+    pub async fn read_content(&self, actor_id: Uuid, content_id: Uuid) -> Result<ContentHandle> {
+        let key = composite_key(actor_id, content_id);
+        let ks = self.content_ks.clone();
+
+        let exists = tokio::task::spawn_blocking(move || -> Result<bool> {
+            ks.contains_key(key).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to check content key")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })??;
+
+        if !exists {
+            return Err(Error::new(
+                ErrorKind::NotFound,
+                format!("content not found: actor_id={actor_id}, content_id={content_id}"),
+            )
+            .with_component(COMPONENT));
+        }
+
+        let source = ContentSource::from(content_id);
+        Ok(ContentHandle::new(
+            actor_id,
+            source,
+            self.content_ks.clone(),
+            self.content_meta_ks.clone(),
+        ))
+    }
+
+    /// Removes a single content entry (data + metadata) by actor and content ID.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`ErrorKind::NotFound`] if no entry exists for the given key.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "registry.unregister_content",
+        skip(self),
+        fields(actor_id = %actor_id, content_id = %content_id),
+    )]
+    pub async fn unregister_content(&self, actor_id: Uuid, content_id: Uuid) -> Result<()> {
+        let key = composite_key(actor_id, content_id);
+        let content_ks = self.content_ks.clone();
+        let meta_ks = self.content_meta_ks.clone();
+        let db = self.db.clone();
+
+        tokio::task::spawn_blocking(move || -> Result<()> {
+            let exists = content_ks.contains_key(key).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to check content key")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+
+            if !exists {
+                return Err(Error::new(
+                    ErrorKind::NotFound,
+                    format!("content not found: actor_id={actor_id}, content_id={content_id}"),
+                )
+                .with_component(COMPONENT));
+            }
+
+            content_ks.remove(key).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to remove content data")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            meta_ks.remove(key).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to remove content metadata")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to persist database")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            Ok(())
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })?
+    }
+
+    /// Removes all content entries (data + metadata) for an actor.
+    ///
+    /// Returns the number of entries removed.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "registry.unregister_all_content",
+        skip(self),
+        fields(actor_id = %actor_id, removed),
+    )]
+    pub async fn unregister_all_content(&self, actor_id: Uuid) -> Result<usize> {
+        let prefix = actor_id.as_bytes().to_vec();
+        let content_ks = self.content_ks.clone();
+        let meta_ks = self.content_meta_ks.clone();
+        let db = self.db.clone();
+
+        let count = tokio::task::spawn_blocking(move || -> Result<usize> {
+            let keys = collect_prefix_keys(&content_ks, &prefix)?;
+            let count = keys.len();
+
+            for key in &keys {
+                content_ks.remove(key).map_err(|err| {
+                    Error::new(ErrorKind::Internal, "failed to remove content data")
+                        .with_component(COMPONENT)
+                        .with_source(err)
+                })?;
+                meta_ks.remove(key).map_err(|err| {
+                    Error::new(ErrorKind::Internal, "failed to remove content metadata")
+                        .with_component(COMPONENT)
+                        .with_source(err)
+                })?;
+            }
+
+            if count > 0 {
+                db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
+                    Error::new(ErrorKind::Internal, "failed to persist database")
+                        .with_component(COMPONENT)
+                        .with_source(err)
+                })?;
+            }
+
+            Ok(count)
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })??;
+
+        tracing::Span::current().record("removed", count);
+        Ok(count)
+    }
+
+    /// Lists all content IDs for an actor, sorted in ascending order.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "registry.list_content",
+        skip(self),
+        fields(actor_id = %actor_id),
+    )]
+    pub async fn list_content(&self, actor_id: Uuid) -> Result<Vec<Uuid>> {
+        let prefix = actor_id.as_bytes().to_vec();
+        let ks = self.content_ks.clone();
+
+        tokio::task::spawn_blocking(move || extract_resource_ids(&ks, &prefix))
+            .await
+            .map_err(|err| {
+                Error::new(ErrorKind::Internal, "blocking task panicked")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?
+    }
+
+    // -- Context operations --------------------------------------------------
+
+    /// Registers a context, serializing it as JSON.
+    ///
+    /// Returns a [`ContextHandle`] for subsequent reads.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if serialization or the underlying write fails.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "registry.register_context",
+        skip(self, context),
+        fields(actor_id = %actor_id),
+    )]
+    pub async fn register_context(
+        &self,
+        actor_id: Uuid,
+        context: Context,
+    ) -> Result<ContextHandle> {
+        let source = context.source;
+        let key = composite_key(actor_id, source.as_uuid());
+
+        let json_bytes = serde_json::to_vec(&context).map_err(|err| {
+            Error::new(ErrorKind::Serialization, "failed to serialize context")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })?;
+
+        let ks = self.contexts_ks.clone();
+        let db = self.db.clone();
+
+        tokio::task::spawn_blocking(move || -> Result<()> {
+            ks.insert(key, &json_bytes).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to write context")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to persist database")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            Ok(())
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })??;
+
+        tracing::trace!(
+            target: TARGET,
+            source_id = %source.as_uuid(),
+            "context registered",
+        );
+
+        Ok(ContextHandle::new(
+            actor_id,
+            source,
+            self.contexts_ks.clone(),
+        ))
+    }
+
+    /// Looks up a previously registered context by actor and context ID.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`ErrorKind::NotFound`] if no entry exists for the given key.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "registry.read_context",
+        skip(self),
+        fields(actor_id = %actor_id, context_id = %context_id),
+    )]
+    pub async fn read_context(&self, actor_id: Uuid, context_id: Uuid) -> Result<ContextHandle> {
+        let key = composite_key(actor_id, context_id);
+        let ks = self.contexts_ks.clone();
+
+        let exists = tokio::task::spawn_blocking(move || -> Result<bool> {
+            ks.contains_key(key).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to check context key")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })??;
+
+        if !exists {
+            return Err(Error::new(
+                ErrorKind::NotFound,
+                format!("context not found: actor_id={actor_id}, context_id={context_id}"),
+            )
+            .with_component(COMPONENT));
+        }
+
+        let source = ContentSource::from(context_id);
+        Ok(ContextHandle::new(
+            actor_id,
+            source,
+            self.contexts_ks.clone(),
+        ))
+    }
+
+    /// Removes a single context entry by actor and context ID.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`ErrorKind::NotFound`] if no entry exists for the given key.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "registry.unregister_context",
+        skip(self),
+        fields(actor_id = %actor_id, context_id = %context_id),
+    )]
+    pub async fn unregister_context(&self, actor_id: Uuid, context_id: Uuid) -> Result<()> {
+        let key = composite_key(actor_id, context_id);
+        let ks = self.contexts_ks.clone();
+        let db = self.db.clone();
+
+        tokio::task::spawn_blocking(move || -> Result<()> {
+            let exists = ks.contains_key(key).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to check context key")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+
+            if !exists {
+                return Err(Error::new(
+                    ErrorKind::NotFound,
+                    format!("context not found: actor_id={actor_id}, context_id={context_id}"),
+                )
+                .with_component(COMPONENT));
+            }
+
+            ks.remove(key).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to remove context")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to persist database")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            Ok(())
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })?
+    }
+
+    /// Removes all context entries for an actor.
+    ///
+    /// Returns the number of entries removed.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "registry.unregister_all_contexts",
+        skip(self),
+        fields(actor_id = %actor_id, removed),
+    )]
+    pub async fn unregister_all_contexts(&self, actor_id: Uuid) -> Result<usize> {
+        let prefix = actor_id.as_bytes().to_vec();
+        let ks = self.contexts_ks.clone();
+        let db = self.db.clone();
+
+        let count = tokio::task::spawn_blocking(move || -> Result<usize> {
+            let keys = collect_prefix_keys(&ks, &prefix)?;
+            let count = keys.len();
+
+            for key in &keys {
+                ks.remove(key).map_err(|err| {
+                    Error::new(ErrorKind::Internal, "failed to remove context")
+                        .with_component(COMPONENT)
+                        .with_source(err)
+                })?;
+            }
+
+            if count > 0 {
+                db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
+                    Error::new(ErrorKind::Internal, "failed to persist database")
+                        .with_component(COMPONENT)
+                        .with_source(err)
+                })?;
+            }
+
+            Ok(count)
+        })
+        .await
+        .map_err(|err| {
+            Error::new(ErrorKind::Internal, "blocking task panicked")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })??;
+
+        tracing::Span::current().record("removed", count);
+        Ok(count)
+    }
+
+    /// Lists all context IDs for an actor, sorted in ascending order.
+    #[tracing::instrument(
+        target = TARGET,
+        name = "registry.list_contexts",
+        skip(self),
+        fields(actor_id = %actor_id),
+    )]
+    pub async fn list_contexts(&self, actor_id: Uuid) -> Result<Vec<Uuid>> {
+        let prefix = actor_id.as_bytes().to_vec();
+        let ks = self.contexts_ks.clone();
+
+        tokio::task::spawn_blocking(move || extract_resource_ids(&ks, &prefix))
+            .await
+            .map_err(|err| {
+                Error::new(ErrorKind::Internal, "blocking task panicked")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?
+    }
+
+    /// Returns the base directory path where the database is stored.
+    #[must_use]
+    pub fn base_dir(&self) -> &Path {
+        &self.base_dir
+    }
+}
+
+/// Collects all raw keys from a keyspace that share the given prefix.
+fn collect_prefix_keys(ks: &Keyspace, prefix: &[u8]) -> Result<Vec<Vec<u8>>> {
+    ks.prefix(prefix)
+        .map(|guard| {
+            let key = guard.key().map_err(|err| {
+                Error::new(ErrorKind::Internal, "failed to iterate keyspace")
+                    .with_component(COMPONENT)
+                    .with_source(err)
+            })?;
+            Ok(key.to_vec())
+        })
+        .collect()
+}
+
+/// Extracts sorted resource UUIDs from the trailing 16 bytes of each
+/// 32-byte composite key that shares the given prefix.
+fn extract_resource_ids(ks: &Keyspace, prefix: &[u8]) -> Result<Vec<Uuid>> {
+    let mut ids = Vec::new();
+    for guard in ks.prefix(prefix) {
+        let key = guard.key().map_err(|err| {
+            Error::new(ErrorKind::Internal, "failed to iterate keyspace")
+                .with_component(COMPONENT)
+                .with_source(err)
+        })?;
+        if key.len() == 32
+            && let Ok(bytes) = <[u8; 16]>::try_from(&key[16..])
+        {
+            ids.push(Uuid::from_bytes(bytes));
+        }
+    }
+    ids.sort();
+    Ok(ids)
+}
+
+#[cfg(test)]
+mod tests {
+    use nvisy_core::content::{Content, ContentData};
+    use nvisy_ontology::context::Context;
+
+    use super::*;
+
+    /// Opens a temporary registry backed by a fresh [`tempfile::TempDir`].
+    fn open_temp_registry() -> (tempfile::TempDir, Registry) {
+        let temp = tempfile::TempDir::new().unwrap();
+        let registry = Registry::open(temp.path().join("data")).unwrap();
+        (temp, registry)
+    }
+
+    #[tokio::test]
+    async fn register_and_read_content() {
+        let (_temp, registry) = open_temp_registry();
+        let actor_id = Uuid::now_v7();
+        let content = Content::new(ContentData::from("Hello, world!"));
+
+        let handle = registry.register_content(actor_id, content).await.unwrap();
+        let data = handle.content_data().await.unwrap();
+        assert_eq!(data.as_str().unwrap(), "Hello, world!");
+    }
+
+    #[tokio::test]
+    async fn content_scoped_by_actor() {
+        let (_temp, registry) = open_temp_registry();
+        let actor_a = Uuid::now_v7();
+        let actor_b = Uuid::now_v7();
+
+        let content = Content::new(ContentData::from("actor A only"));
+        let handle = registry.register_content(actor_a, content).await.unwrap();
+        let content_id = handle.content_source().as_uuid();
+
+        // Actor B cannot see actor A's content.
+        let err = registry
+            .read_content(actor_b, content_id)
+            .await
+            .unwrap_err();
+        assert_eq!(err.kind, ErrorKind::NotFound);
+
+        // Actor A can.
+        registry.read_content(actor_a, content_id).await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn list_content_per_actor() {
+        let (_temp, registry) = open_temp_registry();
+        let actor_a = Uuid::now_v7();
+        let actor_b = Uuid::now_v7();
+
+        registry
+            .register_content(actor_a, Content::new(ContentData::from("a1")))
+            .await
+            .unwrap();
+        registry
+            .register_content(actor_a, Content::new(ContentData::from("a2")))
+            .await
+            .unwrap();
+        registry
+            .register_content(actor_b, Content::new(ContentData::from("b1")))
+            .await
+            .unwrap();
+
+        assert_eq!(registry.list_content(actor_a).await.unwrap().len(), 2);
+        assert_eq!(registry.list_content(actor_b).await.unwrap().len(), 1);
+    }
+
+    #[tokio::test]
+    async fn unregister_content() {
+        let (_temp, registry) = open_temp_registry();
+        let actor_id = Uuid::now_v7();
+        let content = Content::new(ContentData::from("delete me"));
+        let content_id = content.content_source().as_uuid();
+        registry.register_content(actor_id, content).await.unwrap();
+
+        registry
+            .unregister_content(actor_id, content_id)
+            .await
+            .unwrap();
+
+        let err = registry
+            .read_content(actor_id, content_id)
+            .await
+            .unwrap_err();
+        assert_eq!(err.kind, ErrorKind::NotFound);
+    }
+
+    #[tokio::test]
+    async fn unregister_all_content() {
+        let (_temp, registry) = open_temp_registry();
+        let actor_id = Uuid::now_v7();
+
+        registry
+            .register_content(actor_id, Content::new(ContentData::from("first")))
+            .await
+            .unwrap();
+        registry
+            .register_content(actor_id, Content::new(ContentData::from("second")))
+            .await
+            .unwrap();
+
+        let deleted = registry.unregister_all_content(actor_id).await.unwrap();
+        assert_eq!(deleted, 2);
+        assert!(registry.list_content(actor_id).await.unwrap().is_empty());
+    }
+
+    #[tokio::test]
+    async fn register_and_read_context() {
+        let (_temp, registry) = open_temp_registry();
+        let actor_id = Uuid::now_v7();
+        let ctx = Context::new("test-context", vec![]);
+
+        let handle = registry
+            .register_context(actor_id, ctx.clone())
+            .await
+            .unwrap();
+        let read_ctx = handle.context().await.unwrap();
+        assert_eq!(read_ctx.name, "test-context");
+    }
+
+    #[tokio::test]
+    async fn context_scoped_by_actor() {
+        let (_temp, registry) = open_temp_registry();
+        let actor_a = Uuid::now_v7();
+        let actor_b = Uuid::now_v7();
+
+        let ctx = Context::new("private", vec![]);
+        let handle = registry.register_context(actor_a, ctx).await.unwrap();
+        let context_id = handle.source().as_uuid();
+
+        let err = registry
+            .read_context(actor_b, context_id)
+            .await
+            .unwrap_err();
+        assert_eq!(err.kind, ErrorKind::NotFound);
+
+        registry.read_context(actor_a, context_id).await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn list_contexts_per_actor() {
+        let (_temp, registry) = open_temp_registry();
+        let actor_id = Uuid::now_v7();
+
+        registry
+            .register_context(actor_id, Context::new("ctx-1", vec![]))
+            .await
+            .unwrap();
+        registry
+            .register_context(actor_id, Context::new("ctx-2", vec![]))
+            .await
+            .unwrap();
+
+        assert_eq!(registry.list_contexts(actor_id).await.unwrap().len(), 2);
+    }
+
+    #[tokio::test]
+    async fn unregister_context() {
+        let (_temp, registry) = open_temp_registry();
+        let actor_id = Uuid::now_v7();
+        let ctx = Context::new("remove-me", vec![]);
+        let context_id = ctx.source.as_uuid();
+
+        registry.register_context(actor_id, ctx).await.unwrap();
+        registry
+            .unregister_context(actor_id, context_id)
+            .await
+            .unwrap();
+
+        let err = registry
+            .read_context(actor_id, context_id)
+            .await
+            .unwrap_err();
+        assert_eq!(err.kind, ErrorKind::NotFound);
+    }
+
+    #[tokio::test]
+    async fn unregister_all_contexts() {
+        let (_temp, registry) = open_temp_registry();
+        let actor_id = Uuid::now_v7();
+
+        registry
+            .register_context(actor_id, Context::new("c1", vec![]))
+            .await
+            .unwrap();
+        registry
+            .register_context(actor_id, Context::new("c2", vec![]))
+            .await
+            .unwrap();
+
+        let deleted = registry.unregister_all_contexts(actor_id).await.unwrap();
+        assert_eq!(deleted, 2);
+        assert!(registry.list_contexts(actor_id).await.unwrap().is_empty());
+    }
+
+    #[tokio::test]
+    async fn data_persists_across_reopen() {
+        let temp = tempfile::TempDir::new().unwrap();
+        let path = temp.path().join("data");
+        let actor_id = Uuid::now_v7();
+
+        let content = Content::new(ContentData::from("persistent"));
+        let content_id = content.content_source().as_uuid();
+
+        {
+            let registry = Registry::open(&path).unwrap();
+            registry.register_content(actor_id, content).await.unwrap();
+        }
+
+        let registry = Registry::open(&path).unwrap();
+        let handle = registry.read_content(actor_id, content_id).await.unwrap();
+        let data = handle.content_data().await.unwrap();
+        assert_eq!(data.as_str().unwrap(), "persistent");
+    }
+
+    #[tokio::test]
+    async fn base_dir() {
+        let temp = tempfile::TempDir::new().unwrap();
+        let base = temp.path().join("data");
+        let registry = Registry::open(&base).unwrap();
+        assert_eq!(registry.base_dir(), base);
+    }
+}
diff --git a/crates/nvisy-registry/src/store/content.rs b/crates/nvisy-registry/src/store/content.rs
deleted file mode 100644
index 9d501496..00000000
--- a/crates/nvisy-registry/src/store/content.rs
+++ /dev/null
@@ -1,111 +0,0 @@
-use std::fmt;
-
-use bytes::Bytes;
-use fjall::Keyspace;
-use nvisy_core::content::{ContentData, ContentMetadata, ContentSource};
-use nvisy_core::{Error, ErrorKind, Result};
-use uuid::Uuid;
-
-/// Lightweight handle to a content entry stored in the registry.
-///
-/// Holds references to the fjall keyspaces so it can read content data
-/// and metadata on demand. Cloning is cheap because fjall handles are
-/// internally `Arc`-wrapped.
-#[derive(Clone)]
-pub struct ContentHandle {
-    actor: Uuid,
-    content_source: ContentSource,
-    content: Keyspace,
-    content_meta: Keyspace,
-}
-
-impl fmt::Debug for ContentHandle {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("ContentHandle")
-            .field("actor", &self.actor)
-            .field("content_source", &self.content_source)
-            .finish_non_exhaustive()
-    }
-}
-
-impl ContentHandle {
-    pub(crate) fn new(
-        actor: Uuid,
-        content_source: ContentSource,
-        content: Keyspace,
-        content_meta: Keyspace,
-    ) -> Self {
-        Self {
-            actor,
-            content_source,
-            content,
-            content_meta,
-        }
-    }
-
-    /// Returns the content source identifier.
-    pub fn content_source(&self) -> ContentSource {
-        self.content_source
-    }
-
-    /// Returns the actor that owns this content.
-    pub fn actor(&self) -> Uuid {
-        self.actor
-    }
-
-    /// Reads the content bytes from the store.
-    pub async fn content_data(&self) -> Result<ContentData> {
-        let key = self.composite_key();
-        let source = self.content_source;
-        let content_ks = self.content.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<ContentData> {
-            let value = content_ks.get(key).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to read content data").with_source(err)
-            })?;
-
-            let guard = value.ok_or_else(|| {
-                Error::new(
-                    ErrorKind::NotFound,
-                    format!("Content data not found (id: {})", source.as_uuid()),
-                )
-            })?;
-
-            Ok(ContentData::new(source, Bytes::copy_from_slice(&guard)))
-        })
-        .await
-        .map_err(|err| Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err))?
-    }
-
-    /// Reads the content metadata from the store.
-    pub async fn metadata(&self) -> Result<ContentMetadata> {
-        let key = self.composite_key();
-        let meta_ks = self.content_meta.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<ContentMetadata> {
-            let value = meta_ks.get(key).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to read content metadata").with_source(err)
-            })?;
-
-            match value {
-                Some(guard) => serde_json::from_slice(&guard).map_err(|err| {
-                    Error::new(
-                        ErrorKind::Serialization,
-                        "Failed to deserialize content metadata",
-                    )
-                    .with_source(err)
-                }),
-                None => Ok(ContentMetadata::default()),
-            }
-        })
-        .await
-        .map_err(|err| Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err))?
-    }
-
-    fn composite_key(&self) -> [u8; 32] {
-        let mut key = [0u8; 32];
-        key[..16].copy_from_slice(self.actor.as_bytes());
-        key[16..].copy_from_slice(self.content_source.as_uuid().as_bytes());
-        key
-    }
-}
diff --git a/crates/nvisy-registry/src/store/context.rs b/crates/nvisy-registry/src/store/context.rs
deleted file mode 100644
index 6e690ffd..00000000
--- a/crates/nvisy-registry/src/store/context.rs
+++ /dev/null
@@ -1,77 +0,0 @@
-use std::fmt;
-
-use fjall::Keyspace;
-use nvisy_core::content::ContentSource;
-use nvisy_core::{Error, ErrorKind, Result};
-use nvisy_ontology::context::Context;
-use uuid::Uuid;
-
-/// Lightweight handle to a context entry stored in the registry.
-///
-/// Holds a reference to the contexts keyspace so it can deserialize the
-/// stored JSON on demand. Cloning is cheap because fjall handles are
-/// internally `Arc`-wrapped.
-#[derive(Clone)]
-pub struct ContextHandle {
-    actor: Uuid,
-    source: ContentSource,
-    contexts: Keyspace,
-}
-
-impl fmt::Debug for ContextHandle {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("ContextHandle")
-            .field("actor", &self.actor)
-            .field("source", &self.source)
-            .finish_non_exhaustive()
-    }
-}
-
-impl ContextHandle {
-    pub(crate) fn new(actor: Uuid, source: ContentSource, contexts: Keyspace) -> Self {
-        Self {
-            actor,
-            source,
-            contexts,
-        }
-    }
-
-    /// Returns the content source identifier.
-    pub fn source(&self) -> ContentSource {
-        self.source
-    }
-
-    /// Returns the actor that owns this context.
-    pub fn actor(&self) -> Uuid {
-        self.actor
-    }
-
-    /// Reads and deserializes the context from the store.
-    pub async fn context(&self) -> Result<Context> {
-        let key = self.composite_key();
-        let ctx_ks = self.contexts.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<Context> {
-            let value = ctx_ks.get(key).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to read context").with_source(err)
-            })?;
-
-            let guard =
-                value.ok_or_else(|| Error::new(ErrorKind::NotFound, "Context data not found"))?;
-
-            serde_json::from_slice(&guard).map_err(|err| {
-                Error::new(ErrorKind::Serialization, "Failed to deserialize context")
-                    .with_source(err)
-            })
-        })
-        .await
-        .map_err(|err| Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err))?
-    }
-
-    fn composite_key(&self) -> [u8; 32] {
-        let mut key = [0u8; 32];
-        key[..16].copy_from_slice(self.actor.as_bytes());
-        key[16..].copy_from_slice(self.source.as_uuid().as_bytes());
-        key
-    }
-}
diff --git a/crates/nvisy-registry/src/store/mod.rs b/crates/nvisy-registry/src/store/mod.rs
deleted file mode 100644
index 83c342ef..00000000
--- a/crates/nvisy-registry/src/store/mod.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-mod content;
-mod context;
-mod registry;
-
-pub use content::ContentHandle;
-pub use context::ContextHandle;
-pub use registry::Registry;
diff --git a/crates/nvisy-registry/src/store/registry.rs b/crates/nvisy-registry/src/store/registry.rs
deleted file mode 100644
index 53a710bb..00000000
--- a/crates/nvisy-registry/src/store/registry.rs
+++ /dev/null
@@ -1,653 +0,0 @@
-use std::path::{Path, PathBuf};
-
-use fjall::{Database, Keyspace, KeyspaceCreateOptions, KvSeparationOptions};
-use nvisy_core::content::{Content, ContentSource};
-use nvisy_core::{Error, ErrorKind, Result};
-use nvisy_ontology::context::Context;
-use uuid::Uuid;
-
-use super::content::ContentHandle;
-use super::context::ContextHandle;
-
-/// Builds a 32-byte composite key: `[actor: 16][resource_id: 16]`.
-fn make_key(actor: Uuid, id: Uuid) -> [u8; 32] {
-    let mut key = [0u8; 32];
-    key[..16].copy_from_slice(actor.as_bytes());
-    key[16..].copy_from_slice(id.as_bytes());
-    key
-}
-
-/// Actor-scoped content and context store backed by fjall.
-///
-/// Stores content data, content metadata, and contexts in three keyspaces.
-/// Every key is a 32-byte composite of `[actor_id][resource_id]`, so all
-/// operations are inherently scoped to a single actor.
-///
-/// All handles are internally `Arc`-wrapped, making `Registry` cheap to
-/// clone and safe to share across threads.
-#[derive(Clone)]
-pub struct Registry {
-    base_dir: PathBuf,
-    db: Database,
-    content: Keyspace,
-    content_meta: Keyspace,
-    contexts: Keyspace,
-}
-
-impl std::fmt::Debug for Registry {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("Registry")
-            .field("base_dir", &self.base_dir)
-            .finish_non_exhaustive()
-    }
-}
-
-impl Registry {
-    /// Opens (or creates) the fjall database at `path`.
-    ///
-    /// Three keyspaces are created:
-    /// - `"content"` with blob separation for efficient large-value storage
-    /// - `"content_meta"` with default configuration
-    /// - `"contexts"` with default configuration
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if the database or keyspaces cannot be opened.
-    pub fn open(path: impl Into<PathBuf>) -> Result<Self> {
-        let base_dir = path.into();
-
-        let db = Database::builder(&base_dir).open().map_err(|err| {
-            Error::new(
-                ErrorKind::Internal,
-                format!(
-                    "Failed to open registry database (path: {})",
-                    base_dir.display()
-                ),
-            )
-            .with_source(err)
-        })?;
-
-        let content = db
-            .keyspace("content", || {
-                KeyspaceCreateOptions::default()
-                    .with_kv_separation(Some(KvSeparationOptions::default()))
-            })
-            .map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to open content keyspace").with_source(err)
-            })?;
-
-        let content_meta = db
-            .keyspace("content_meta", KeyspaceCreateOptions::default)
-            .map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to open content_meta keyspace")
-                    .with_source(err)
-            })?;
-
-        let contexts = db
-            .keyspace("contexts", KeyspaceCreateOptions::default)
-            .map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to open contexts keyspace").with_source(err)
-            })?;
-
-        Ok(Self {
-            base_dir,
-            db,
-            content,
-            content_meta,
-            contexts,
-        })
-    }
-
-    /// Registers content, writing its bytes and metadata to the store.
-    ///
-    /// Returns a [`ContentHandle`] for subsequent reads.
-    pub async fn register_content(&self, actor: Uuid, content: Content) -> Result<ContentHandle> {
-        let content_source = content.content_source();
-        let key = make_key(actor, content_source.as_uuid());
-        let data = content.as_bytes().to_vec();
-
-        let (_, content_metadata) = content.into_parts();
-        let meta_bytes =
-            serde_json::to_vec(&content_metadata.unwrap_or_default()).map_err(|err| {
-                Error::new(
-                    ErrorKind::Serialization,
-                    "Failed to serialize content metadata",
-                )
-                .with_source(err)
-            })?;
-
-        let content_ks = self.content.clone();
-        let meta_ks = self.content_meta.clone();
-        let db = self.db.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<()> {
-            content_ks.insert(key, &data).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to write content data").with_source(err)
-            })?;
-            meta_ks.insert(key, &meta_bytes).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to write content metadata").with_source(err)
-            })?;
-            db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to persist database").with_source(err)
-            })?;
-            Ok(())
-        })
-        .await
-        .map_err(|err| {
-            Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err)
-        })??;
-
-        Ok(ContentHandle::new(
-            actor,
-            content_source,
-            self.content.clone(),
-            self.content_meta.clone(),
-        ))
-    }
-
-    /// Looks up previously registered content by actor and content ID.
-    ///
-    /// Returns [`ErrorKind::NotFound`] if no entry exists for the given key.
-    pub async fn read_content(&self, actor: Uuid, id: Uuid) -> Result<ContentHandle> {
-        let key = make_key(actor, id);
-        let content_ks = self.content.clone();
-
-        let exists = tokio::task::spawn_blocking(move || -> Result<bool> {
-            content_ks.contains_key(key).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to check content key").with_source(err)
-            })
-        })
-        .await
-        .map_err(|err| {
-            Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err)
-        })??;
-
-        if !exists {
-            return Err(Error::new(
-                ErrorKind::NotFound,
-                format!("Content not found (actor: {actor}, id: {id})"),
-            ));
-        }
-
-        let source = ContentSource::from(id);
-        Ok(ContentHandle::new(
-            actor,
-            source,
-            self.content.clone(),
-            self.content_meta.clone(),
-        ))
-    }
-
-    /// Removes a single content entry by actor and content ID.
-    ///
-    /// Returns [`ErrorKind::NotFound`] if no entry exists for the given key.
-    pub async fn unregister_content(&self, actor: Uuid, id: Uuid) -> Result<()> {
-        let key = make_key(actor, id);
-        let content_ks = self.content.clone();
-        let meta_ks = self.content_meta.clone();
-        let db = self.db.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<()> {
-            let exists = content_ks.contains_key(key).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to check content key").with_source(err)
-            })?;
-
-            if !exists {
-                return Err(Error::new(
-                    ErrorKind::NotFound,
-                    format!("Content not found (actor: {actor}, id: {id})"),
-                ));
-            }
-
-            content_ks.remove(key).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to remove content data").with_source(err)
-            })?;
-            meta_ks.remove(key).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to remove content metadata")
-                    .with_source(err)
-            })?;
-            db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to persist database").with_source(err)
-            })?;
-            Ok(())
-        })
-        .await
-        .map_err(|err| Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err))?
-    }
-
-    /// Removes all content entries for an actor.
-    ///
-    /// Returns the number of entries removed.
-    pub async fn unregister_all_content(&self, actor: Uuid) -> Result<usize> {
-        let prefix = actor.as_bytes().to_vec();
-        let content_ks = self.content.clone();
-        let meta_ks = self.content_meta.clone();
-        let db = self.db.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<usize> {
-            let keys: Vec<Vec<u8>> = content_ks
-                .prefix(&prefix)
-                .map(|guard| {
-                    let key = guard.key().map_err(|err| {
-                        Error::new(ErrorKind::Internal, "Failed to iterate content keyspace")
-                            .with_source(err)
-                    })?;
-                    Ok(key.to_vec())
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            let count = keys.len();
-
-            for key in &keys {
-                content_ks.remove(key).map_err(|err| {
-                    Error::new(ErrorKind::Internal, "Failed to remove content data")
-                        .with_source(err)
-                })?;
-                meta_ks.remove(key).map_err(|err| {
-                    Error::new(ErrorKind::Internal, "Failed to remove content metadata")
-                        .with_source(err)
-                })?;
-            }
-
-            if count > 0 {
-                db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
-                    Error::new(ErrorKind::Internal, "Failed to persist database").with_source(err)
-                })?;
-            }
-
-            Ok(count)
-        })
-        .await
-        .map_err(|err| Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err))?
-    }
-
-    /// Lists all content IDs for an actor.
-    pub async fn list_content(&self, actor: Uuid) -> Result<Vec<Uuid>> {
-        let prefix = actor.as_bytes().to_vec();
-        let content_ks = self.content.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<Vec<Uuid>> {
-            let mut ids = Vec::new();
-            for guard in content_ks.prefix(&prefix) {
-                let key = guard.key().map_err(|err| {
-                    Error::new(ErrorKind::Internal, "Failed to iterate content keyspace")
-                        .with_source(err)
-                })?;
-                if key.len() == 32
-                    && let Ok(bytes) = <[u8; 16]>::try_from(&key[16..])
-                {
-                    ids.push(Uuid::from_bytes(bytes));
-                }
-            }
-            ids.sort();
-            Ok(ids)
-        })
-        .await
-        .map_err(|err| Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err))?
-    }
-
-    /// Registers a context, serializing it as JSON.
-    ///
-    /// Returns a [`ContextHandle`] for subsequent reads.
-    pub async fn register_context(&self, actor: Uuid, context: Context) -> Result<ContextHandle> {
-        let source = context.source;
-        let key = make_key(actor, source.as_uuid());
-
-        let json_bytes = serde_json::to_vec(&context).map_err(|err| {
-            Error::new(ErrorKind::Serialization, "Failed to serialize context").with_source(err)
-        })?;
-
-        let ctx_ks = self.contexts.clone();
-        let db = self.db.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<()> {
-            ctx_ks.insert(key, &json_bytes).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to write context").with_source(err)
-            })?;
-            db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to persist database").with_source(err)
-            })?;
-            Ok(())
-        })
-        .await
-        .map_err(|err| {
-            Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err)
-        })??;
-
-        Ok(ContextHandle::new(actor, source, self.contexts.clone()))
-    }
-
-    /// Looks up a previously registered context by actor and context ID.
-    ///
-    /// Returns [`ErrorKind::NotFound`] if no entry exists for the given key.
-    pub async fn read_context(&self, actor: Uuid, id: Uuid) -> Result<ContextHandle> {
-        let key = make_key(actor, id);
-        let ctx_ks = self.contexts.clone();
-
-        let exists = tokio::task::spawn_blocking(move || -> Result<bool> {
-            ctx_ks.contains_key(key).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to check context key").with_source(err)
-            })
-        })
-        .await
-        .map_err(|err| {
-            Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err)
-        })??;
-
-        if !exists {
-            return Err(Error::new(
-                ErrorKind::NotFound,
-                format!("Context not found (actor: {actor}, id: {id})"),
-            ));
-        }
-
-        let source = ContentSource::from(id);
-        Ok(ContextHandle::new(actor, source, self.contexts.clone()))
-    }
-
-    /// Removes a single context entry by actor and context ID.
-    ///
-    /// Returns [`ErrorKind::NotFound`] if no entry exists for the given key.
-    pub async fn unregister_context(&self, actor: Uuid, id: Uuid) -> Result<()> {
-        let key = make_key(actor, id);
-        let ctx_ks = self.contexts.clone();
-        let db = self.db.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<()> {
-            let exists = ctx_ks.contains_key(key).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to check context key").with_source(err)
-            })?;
-
-            if !exists {
-                return Err(Error::new(
-                    ErrorKind::NotFound,
-                    format!("Context not found (actor: {actor}, id: {id})"),
-                ));
-            }
-
-            ctx_ks.remove(key).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to remove context").with_source(err)
-            })?;
-            db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
-                Error::new(ErrorKind::Internal, "Failed to persist database").with_source(err)
-            })?;
-            Ok(())
-        })
-        .await
-        .map_err(|err| Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err))?
-    }
-
-    /// Removes all context entries for an actor.
-    ///
-    /// Returns the number of entries removed.
-    pub async fn unregister_all_contexts(&self, actor: Uuid) -> Result<usize> {
-        let prefix = actor.as_bytes().to_vec();
-        let ctx_ks = self.contexts.clone();
-        let db = self.db.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<usize> {
-            let keys: Vec<Vec<u8>> = ctx_ks
-                .prefix(&prefix)
-                .map(|guard| {
-                    let key = guard.key().map_err(|err| {
-                        Error::new(ErrorKind::Internal, "Failed to iterate contexts keyspace")
-                            .with_source(err)
-                    })?;
-                    Ok(key.to_vec())
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            let count = keys.len();
-
-            for key in &keys {
-                ctx_ks.remove(key).map_err(|err| {
-                    Error::new(ErrorKind::Internal, "Failed to remove context").with_source(err)
-                })?;
-            }
-
-            if count > 0 {
-                db.persist(fjall::PersistMode::SyncAll).map_err(|err| {
-                    Error::new(ErrorKind::Internal, "Failed to persist database").with_source(err)
-                })?;
-            }
-
-            Ok(count)
-        })
-        .await
-        .map_err(|err| Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err))?
-    }
-
-    /// Lists all context IDs for an actor.
-    pub async fn list_contexts(&self, actor: Uuid) -> Result<Vec<Uuid>> {
-        let prefix = actor.as_bytes().to_vec();
-        let ctx_ks = self.contexts.clone();
-
-        tokio::task::spawn_blocking(move || -> Result<Vec<Uuid>> {
-            let mut ids = Vec::new();
-            for guard in ctx_ks.prefix(&prefix) {
-                let key = guard.key().map_err(|err| {
-                    Error::new(ErrorKind::Internal, "Failed to iterate contexts keyspace")
-                        .with_source(err)
-                })?;
-                if key.len() == 32
-                    && let Ok(bytes) = <[u8; 16]>::try_from(&key[16..])
-                {
-                    ids.push(Uuid::from_bytes(bytes));
-                }
-            }
-            ids.sort();
-            Ok(ids)
-        })
-        .await
-        .map_err(|err| Error::new(ErrorKind::Internal, "Blocking task panicked").with_source(err))?
-    }
-
-    /// Returns the base directory path (the database location).
-    pub fn base_dir(&self) -> &Path {
-        &self.base_dir
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use nvisy_core::content::{Content, ContentData};
-    use nvisy_ontology::context::Context;
-
-    use super::*;
-
-    fn open_temp_registry() -> (tempfile::TempDir, Registry) {
-        let temp = tempfile::TempDir::new().unwrap();
-        let registry = Registry::open(temp.path().join("data")).unwrap();
-        (temp, registry)
-    }
-
-    #[tokio::test]
-    async fn register_and_read_content() {
-        let (_temp, registry) = open_temp_registry();
-        let actor = Uuid::now_v7();
-        let content = Content::new(ContentData::from("Hello, world!"));
-
-        let handle = registry.register_content(actor, content).await.unwrap();
-        let data = handle.content_data().await.unwrap();
-        assert_eq!(data.as_str().unwrap(), "Hello, world!");
-    }
-
-    #[tokio::test]
-    async fn content_scoped_by_actor() {
-        let (_temp, registry) = open_temp_registry();
-        let actor_a = Uuid::now_v7();
-        let actor_b = Uuid::now_v7();
-
-        let content = Content::new(ContentData::from("actor A only"));
-        let handle = registry.register_content(actor_a, content).await.unwrap();
-        let id = handle.content_source().as_uuid();
-
-        // Actor B cannot see actor A's content
-        let err = registry.read_content(actor_b, id).await.unwrap_err();
-        assert_eq!(err.kind, ErrorKind::NotFound);
-
-        // Actor A can
-        registry.read_content(actor_a, id).await.unwrap();
-    }
-
-    #[tokio::test]
-    async fn list_content_per_actor() {
-        let (_temp, registry) = open_temp_registry();
-        let actor_a = Uuid::now_v7();
-        let actor_b = Uuid::now_v7();
-
-        registry
-            .register_content(actor_a, Content::new(ContentData::from("a1")))
-            .await
-            .unwrap();
-        registry
-            .register_content(actor_a, Content::new(ContentData::from("a2")))
-            .await
-            .unwrap();
-        registry
-            .register_content(actor_b, Content::new(ContentData::from("b1")))
-            .await
-            .unwrap();
-
-        assert_eq!(registry.list_content(actor_a).await.unwrap().len(), 2);
-        assert_eq!(registry.list_content(actor_b).await.unwrap().len(), 1);
-    }
-
-    #[tokio::test]
-    async fn unregister_content() {
-        let (_temp, registry) = open_temp_registry();
-        let actor = Uuid::now_v7();
-        let content = Content::new(ContentData::from("delete me"));
-        let id = content.content_source().as_uuid();
-        registry.register_content(actor, content).await.unwrap();
-
-        registry.unregister_content(actor, id).await.unwrap();
-
-        let err = registry.read_content(actor, id).await.unwrap_err();
-        assert_eq!(err.kind, ErrorKind::NotFound);
-    }
-
-    #[tokio::test]
-    async fn unregister_all_content() {
-        let (_temp, registry) = open_temp_registry();
-        let actor = Uuid::now_v7();
-
-        registry
-            .register_content(actor, Content::new(ContentData::from("first")))
-            .await
-            .unwrap();
-        registry
-            .register_content(actor, Content::new(ContentData::from("second")))
-            .await
-            .unwrap();
-
-        let deleted = registry.unregister_all_content(actor).await.unwrap();
-        assert_eq!(deleted, 2);
-        assert!(registry.list_content(actor).await.unwrap().is_empty());
-    }
-
-    #[tokio::test]
-    async fn register_and_read_context() {
-        let (_temp, registry) = open_temp_registry();
-        let actor = Uuid::now_v7();
-        let ctx = Context::new("test-context", vec![]);
-
-        let handle = registry.register_context(actor, ctx.clone()).await.unwrap();
-        let read_ctx = handle.context().await.unwrap();
-        assert_eq!(read_ctx.name, "test-context");
-    }
-
-    #[tokio::test]
-    async fn context_scoped_by_actor() {
-        let (_temp, registry) = open_temp_registry();
-        let actor_a = Uuid::now_v7();
-        let actor_b = Uuid::now_v7();
-
-        let ctx = Context::new("private", vec![]);
-        let handle = registry.register_context(actor_a, ctx).await.unwrap();
-        let id = handle.source().as_uuid();
-
-        let err = registry.read_context(actor_b, id).await.unwrap_err();
-        assert_eq!(err.kind, ErrorKind::NotFound);
-
-        registry.read_context(actor_a, id).await.unwrap();
-    }
-
-    #[tokio::test]
-    async fn list_contexts_per_actor() {
-        let (_temp, registry) = open_temp_registry();
-        let actor = Uuid::now_v7();
-
-        registry
-            .register_context(actor, Context::new("ctx-1", vec![]))
-            .await
-            .unwrap();
-        registry
-            .register_context(actor, Context::new("ctx-2", vec![]))
-            .await
-            .unwrap();
-
-        assert_eq!(registry.list_contexts(actor).await.unwrap().len(), 2);
-    }
-
-    #[tokio::test]
-    async fn unregister_context() {
-        let (_temp, registry) = open_temp_registry();
-        let actor = Uuid::now_v7();
-        let ctx = Context::new("remove-me", vec![]);
-        let id = ctx.source.as_uuid();
-
-        registry.register_context(actor, ctx).await.unwrap();
-        registry.unregister_context(actor, id).await.unwrap();
-
-        let err = registry.read_context(actor, id).await.unwrap_err();
-        assert_eq!(err.kind, ErrorKind::NotFound);
-    }
-
-    #[tokio::test]
-    async fn unregister_all_contexts() {
-        let (_temp, registry) = open_temp_registry();
-        let actor = Uuid::now_v7();
-
-        registry
-            .register_context(actor, Context::new("c1", vec![]))
-            .await
-            .unwrap();
-        registry
-            .register_context(actor, Context::new("c2", vec![]))
-            .await
-            .unwrap();
-
-        let deleted = registry.unregister_all_contexts(actor).await.unwrap();
-        assert_eq!(deleted, 2);
-        assert!(registry.list_contexts(actor).await.unwrap().is_empty());
-    }
-
-    #[tokio::test]
-    async fn data_persists_across_reopen() {
-        let temp = tempfile::TempDir::new().unwrap();
-        let path = temp.path().join("data");
-        let actor = Uuid::now_v7();
-
-        let content = Content::new(ContentData::from("persistent"));
-        let id = content.content_source().as_uuid();
-
-        {
-            let registry = Registry::open(&path).unwrap();
-            registry.register_content(actor, content).await.unwrap();
-        }
-
-        let registry = Registry::open(&path).unwrap();
-        let handle = registry.read_content(actor, id).await.unwrap();
-        let data = handle.content_data().await.unwrap();
-        assert_eq!(data.as_str().unwrap(), "persistent");
-    }
-
-    #[tokio::test]
-    async fn base_dir() {
-        let temp = tempfile::TempDir::new().unwrap();
-        let base = temp.path().join("data");
-        let registry = Registry::open(&base).unwrap();
-        assert_eq!(registry.base_dir(), base);
-    }
-}
diff --git a/crates/nvisy-rig/src/agent/base/mod.rs b/crates/nvisy-rig/src/agent/base/mod.rs
index 00c1d811..97fba788 100644
--- a/crates/nvisy-rig/src/agent/base/mod.rs
+++ b/crates/nvisy-rig/src/agent/base/mod.rs
@@ -7,11 +7,11 @@ mod detection;
 mod provider;
 mod response;
 
-pub use agent::AgentConfig;
-pub(crate) use agent::{Agents, BaseAgent};
-pub(crate) use builder::BaseAgentBuilder;
-pub use context::ContextWindow;
-pub(crate) use detection::ALL_TYPES_HINT;
-pub use detection::{DetectionConfig, DetectionRequest, DetectionResponse};
-pub use provider::AgentProvider;
-pub(crate) use response::ResponseParser;
+pub use self::agent::AgentConfig;
+pub(crate) use self::agent::{Agents, BaseAgent};
+pub(crate) use self::builder::BaseAgentBuilder;
+pub use self::context::ContextWindow;
+pub(crate) use self::detection::ALL_TYPES_HINT;
+pub use self::detection::{DetectionConfig, DetectionRequest, DetectionResponse};
+pub use self::provider::AgentProvider;
+pub(crate) use self::response::ResponseParser;
diff --git a/crates/nvisy-rig/src/agent/base/response.rs b/crates/nvisy-rig/src/agent/base/response.rs
index a37ff3b7..ddb12a22 100644
--- a/crates/nvisy-rig/src/agent/base/response.rs
+++ b/crates/nvisy-rig/src/agent/base/response.rs
@@ -114,7 +114,7 @@ mod tests {
 
     #[test]
     fn parse_json_raw_array() {
-        let text = r#"[{"category":"pii","entity_type":"email_address","value":"a@b.com","confidence":0.9,"start_offset":0,"end_offset":7}]"#;
+        let text = r#"[{"category":"contact_info","entity_type":"email_address","value":"a@b.com","confidence":0.9,"start_offset":0,"end_offset":7}]"#;
         let result = ResponseParser::from_text(text)
             .parse_json::<Vec<Value>>()
             .unwrap();
@@ -123,7 +123,7 @@ mod tests {
 
     #[test]
     fn parse_json_fenced() {
-        let text = "```json\n[{\"category\":\"pii\",\"entity_type\":\"email_address\",\"value\":\"a@b.com\",\"confidence\":0.9}]\n```";
+        let text = "```json\n[{\"category\":\"contact_info\",\"entity_type\":\"email_address\",\"value\":\"a@b.com\",\"confidence\":0.9}]\n```";
         let result = ResponseParser::from_text(text)
             .parse_json::<Vec<Value>>()
             .unwrap();
diff --git a/crates/nvisy-rig/src/agent/cv/mod.rs b/crates/nvisy-rig/src/agent/cv/mod.rs
index 6bb0199e..30cb2ec2 100644
--- a/crates/nvisy-rig/src/agent/cv/mod.rs
+++ b/crates/nvisy-rig/src/agent/cv/mod.rs
@@ -12,12 +12,12 @@ mod tool;
 use async_trait::async_trait;
 use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
-pub use output::{CvEntities, CvEntity};
-use prompt::{CV_SYSTEM_PROMPT, CvPromptBuilder};
 use serde::Serialize;
-use tool::CvRigTool;
 use uuid::Uuid;
 
+pub use self::output::{CvEntities, CvEntity};
+use self::prompt::{CV_SYSTEM_PROMPT, CvPromptBuilder};
+use self::tool::CvRigTool;
 use super::{AgentConfig, AgentProvider, BaseAgent, DetectionConfig};
 use crate::backend::UsageTracker;
 use crate::error::Error;
diff --git a/crates/nvisy-rig/src/agent/cv/prompt.rs b/crates/nvisy-rig/src/agent/cv/prompt.rs
index 513c31f1..eade555e 100644
--- a/crates/nvisy-rig/src/agent/cv/prompt.rs
+++ b/crates/nvisy-rig/src/agent/cv/prompt.rs
@@ -52,15 +52,14 @@ You have access to a computer vision tool that detects faces, license plates, an
 \n\
 Your workflow:\n\
 1. Use the cv_detect_objects tool to detect objects in the provided image.\n\
-2. Analyze the detections and classify each into an entity category (pii, phi, etc.) \
-   and specific entity type.\n\
+2. Analyze the detections and classify each into an entity category and specific entity type.\n\
 3. Return a JSON array of detected entities, each with keys: \
    category, entity_type, label, confidence, bbox ([x, y, width, height] in pixels).\n\
 \n\
 Common entity mappings:\n\
-- face → category: pii, entity_type: biometric_data\n\
-- license_plate → category: pii, entity_type: vehicle_id\n\
-- signature → category: pii, entity_type: biometric_data\n\
-- handwriting → category: pii, entity_type: person_name (if it contains a name)\n\
+- face → category: biometric, entity_type: face\n\
+- license_plate → category: personal_identity, entity_type: vehicle_registration\n\
+- signature → category: biometric, entity_type: signature\n\
+- handwriting → category: personal_identity, entity_type: person_name (if it contains a name)\n\
 \n\
 If no objects are detected, return an empty array [].";
diff --git a/crates/nvisy-rig/src/agent/generate/mod.rs b/crates/nvisy-rig/src/agent/generate/mod.rs
index 1c22a3aa..60636871 100644
--- a/crates/nvisy-rig/src/agent/generate/mod.rs
+++ b/crates/nvisy-rig/src/agent/generate/mod.rs
@@ -8,10 +8,10 @@ mod output;
 mod prompt;
 
 use nvisy_ontology::entity::EntityKind;
-pub use output::{GenOutput, GeneratedEntity};
-use prompt::{GEN_SYSTEM_PROMPT, GenPromptBuilder};
 use uuid::Uuid;
 
+pub use self::output::{GenOutput, GeneratedEntity};
+use self::prompt::{GEN_SYSTEM_PROMPT, GenPromptBuilder};
 use super::{AgentConfig, AgentProvider, BaseAgent};
 use crate::backend::UsageTracker;
 use crate::error::Error;
diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs
index b90da594..ad04d73c 100644
--- a/crates/nvisy-rig/src/agent/mod.rs
+++ b/crates/nvisy-rig/src/agent/mod.rs
@@ -11,11 +11,15 @@ mod generate;
 mod ner;
 mod ocr;
 
-pub(crate) use base::{ALL_TYPES_HINT, BaseAgent};
-pub use base::{
+pub(crate) use self::base::{ALL_TYPES_HINT, BaseAgent};
+pub use self::base::{
     AgentConfig, AgentProvider, ContextWindow, DetectionConfig, DetectionRequest, DetectionResponse,
 };
-pub use cv::{CvAgent, CvDetection, CvEntities, CvEntity, CvProvider};
-pub use generate::{GenAgent, GenOutput, GenRequest, GeneratedEntity};
-pub use ner::{KnownNerEntity, NerAgent, NerContext, NerEntities, NerEntity, ResolvedOffsets};
-pub use ocr::{OcrAgent, ProposedEntity, VerificationOutput, VerificationStatus, VerifiedEntity};
+pub use self::cv::{CvAgent, CvDetection, CvEntities, CvEntity, CvProvider};
+pub use self::generate::{GenAgent, GenOutput, GenRequest, GeneratedEntity};
+pub use self::ner::{
+    KnownNerEntity, NerAgent, NerContext, NerEntities, NerEntity, ResolvedOffsets,
+};
+pub use self::ocr::{
+    OcrAgent, ProposedEntity, VerificationOutput, VerificationStatus, VerifiedEntity,
+};
diff --git a/crates/nvisy-rig/src/agent/ner/mod.rs b/crates/nvisy-rig/src/agent/ner/mod.rs
index a1627b63..45dab21c 100644
--- a/crates/nvisy-rig/src/agent/ner/mod.rs
+++ b/crates/nvisy-rig/src/agent/ner/mod.rs
@@ -8,12 +8,12 @@ mod context;
 mod output;
 mod prompt;
 
-pub use context::NerContext;
 use nvisy_http::HttpClient;
-pub use output::{KnownNerEntity, NerEntities, NerEntity, ResolvedOffsets};
-use prompt::{NER_SYSTEM_PROMPT, NerPromptBuilder};
 use uuid::Uuid;
 
+pub use self::context::NerContext;
+pub use self::output::{KnownNerEntity, NerEntities, NerEntity, ResolvedOffsets};
+use self::prompt::{NER_SYSTEM_PROMPT, NerPromptBuilder};
 use super::{AgentConfig, AgentProvider, BaseAgent, DetectionConfig};
 use crate::backend::UsageTracker;
 use crate::error::Error;
diff --git a/crates/nvisy-rig/src/agent/ocr/input.rs b/crates/nvisy-rig/src/agent/ocr/input.rs
index 89b2c19d..9308ce1f 100644
--- a/crates/nvisy-rig/src/agent/ocr/input.rs
+++ b/crates/nvisy-rig/src/agent/ocr/input.rs
@@ -31,7 +31,7 @@ impl ProposedEntity {
         };
         Self {
             id,
-            category: entity.category.clone(),
+            category: entity.category,
             entity_type: entity.entity_kind,
             value: entity.value.clone(),
             confidence: entity.confidence,
diff --git a/crates/nvisy-rig/src/agent/ocr/mod.rs b/crates/nvisy-rig/src/agent/ocr/mod.rs
index 9078c309..6f46a8bf 100644
--- a/crates/nvisy-rig/src/agent/ocr/mod.rs
+++ b/crates/nvisy-rig/src/agent/ocr/mod.rs
@@ -11,12 +11,12 @@ mod prompt;
 
 use base64::Engine;
 use base64::engine::general_purpose::STANDARD;
-pub use input::ProposedEntity;
 use nvisy_ontology::entity::Entity;
-pub use output::{VerificationOutput, VerificationStatus, VerifiedEntity};
-use prompt::{OCR_SYSTEM_PROMPT, OcrPromptBuilder};
 use uuid::Uuid;
 
+pub use self::input::ProposedEntity;
+pub use self::output::{VerificationOutput, VerificationStatus, VerifiedEntity};
+use self::prompt::{OCR_SYSTEM_PROMPT, OcrPromptBuilder};
 use super::{AgentConfig, AgentProvider, BaseAgent};
 use crate::backend::UsageTracker;
 use crate::error::Error;
diff --git a/crates/nvisy-rig/src/agent/ocr/output.rs b/crates/nvisy-rig/src/agent/ocr/output.rs
index 3e754e9d..c70d27e8 100644
--- a/crates/nvisy-rig/src/agent/ocr/output.rs
+++ b/crates/nvisy-rig/src/agent/ocr/output.rs
@@ -3,7 +3,10 @@
 use std::collections::HashMap;
 
 use nvisy_core::math::BoundingBox;
-use nvisy_ontology::entity::{DetectionMethod, Entity, EntityCategory, EntityKind, ImageLocation};
+use nvisy_ontology::entity::{
+    Entity, EntityCategory, EntityKind, ExtractionMethod, ImageLocation, RecognitionMethod,
+    RefinementMethod,
+};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
@@ -58,9 +61,13 @@ impl VerifiedEntity {
                     self.category.unwrap_or(entity.category),
                     self.entity_type.unwrap_or(entity.entity_kind),
                     self.value.as_deref().unwrap_or(&entity.value),
-                    DetectionMethod::Ocr,
+                    RecognitionMethod::Ner,
                     self.confidence,
                 );
+                corrected.extraction_methods = vec![ExtractionMethod::OpticalCharacterRecognition];
+                corrected
+                    .refinement_methods
+                    .push(RefinementMethod::ModelVerification);
                 corrected.source = entity.source;
 
                 if let Some(bbox) = self.bbox {
diff --git a/crates/nvisy-rig/src/agent/ocr/prompt.rs b/crates/nvisy-rig/src/agent/ocr/prompt.rs
index 75d3b2fe..b61581cb 100644
--- a/crates/nvisy-rig/src/agent/ocr/prompt.rs
+++ b/crates/nvisy-rig/src/agent/ocr/prompt.rs
@@ -78,7 +78,7 @@ mod tests {
         let entities = vec![
             ProposedEntity {
                 id: 0,
-                category: EntityCategory::Pii,
+                category: EntityCategory::PersonalIdentity,
                 entity_type: EntityKind::PersonName,
                 value: "John Doe".into(),
                 confidence: 0.95,
@@ -100,7 +100,7 @@ mod tests {
         ];
 
         let prompt = OcrPromptBuilder::new(&entities).build("AAAA");
-        assert!(prompt.contains("[0] category=pii"));
+        assert!(prompt.contains("[0] category=personal_identity"));
         assert!(prompt.contains("person_name"));
         assert!(prompt.contains("John Doe"));
         assert!(prompt.contains("bbox=[10.0, 20.0, 100.0, 30.0]"));
diff --git a/crates/nvisy-rig/src/audio/mod.rs b/crates/nvisy-rig/src/audio/mod.rs
index d76f5cf4..c18f088d 100644
--- a/crates/nvisy-rig/src/audio/mod.rs
+++ b/crates/nvisy-rig/src/audio/mod.rs
@@ -3,5 +3,5 @@
 pub mod stt;
 pub mod tts;
 
-pub use stt::SttProvider;
-pub use tts::TtsProvider;
+pub use self::stt::SttProvider;
+pub use self::tts::TtsProvider;
diff --git a/crates/nvisy-rig/src/audio/stt/mod.rs b/crates/nvisy-rig/src/audio/stt/mod.rs
index 1995864b..4c7d3f46 100644
--- a/crates/nvisy-rig/src/audio/stt/mod.rs
+++ b/crates/nvisy-rig/src/audio/stt/mod.rs
@@ -7,12 +7,12 @@
 mod provider;
 
 use nvisy_http::HttpClient;
-pub(crate) use provider::SttModels;
-pub use provider::SttProvider;
 #[cfg(feature = "openai-whisper")]
 use rig::transcription::TranscriptionModel;
 use uuid::Uuid;
 
+pub(crate) use self::provider::SttModels;
+pub use self::provider::SttProvider;
 use crate::error::Error;
 
 const TARGET: &str = "nvisy_rig::stt";
diff --git a/crates/nvisy-rig/src/audio/tts/mod.rs b/crates/nvisy-rig/src/audio/tts/mod.rs
index d3e58782..da84a800 100644
--- a/crates/nvisy-rig/src/audio/tts/mod.rs
+++ b/crates/nvisy-rig/src/audio/tts/mod.rs
@@ -3,12 +3,12 @@
 mod provider;
 
 use nvisy_http::HttpClient;
-pub(crate) use provider::TtsModels;
-pub use provider::TtsProvider;
 #[cfg(feature = "openai-tts")]
 use rig::audio_generation::AudioGenerationModel as _;
 use uuid::Uuid;
 
+pub(crate) use self::provider::TtsModels;
+pub use self::provider::TtsProvider;
 use crate::error::Error;
 
 const TARGET: &str = "nvisy_rig::tts";
diff --git a/crates/nvisy-rig/src/backend/mod.rs b/crates/nvisy-rig/src/backend/mod.rs
index d65dee0d..525d27ff 100644
--- a/crates/nvisy-rig/src/backend/mod.rs
+++ b/crates/nvisy-rig/src/backend/mod.rs
@@ -2,5 +2,5 @@
 
 mod metrics;
 mod provider;
-pub use metrics::{UsageStats, UsageTracker};
-pub use provider::{AuthenticatedProvider, UnauthenticatedProvider};
+pub use self::metrics::{UsageStats, UsageTracker};
+pub use self::provider::{AuthenticatedProvider, UnauthenticatedProvider};
diff --git a/crates/nvisy-rig/src/backend/provider/mod.rs b/crates/nvisy-rig/src/backend/provider/mod.rs
index 3d892313..beaff0ed 100644
--- a/crates/nvisy-rig/src/backend/provider/mod.rs
+++ b/crates/nvisy-rig/src/backend/provider/mod.rs
@@ -6,5 +6,5 @@
 mod authenticated;
 mod unauthenticated;
 
-pub use authenticated::AuthenticatedProvider;
-pub use unauthenticated::UnauthenticatedProvider;
+pub use self::authenticated::AuthenticatedProvider;
+pub use self::unauthenticated::UnauthenticatedProvider;
diff --git a/crates/nvisy-server/src/extract/mod.rs b/crates/nvisy-server/src/extract/mod.rs
index 7f8faeae..bc696e5a 100644
--- a/crates/nvisy-server/src/extract/mod.rs
+++ b/crates/nvisy-server/src/extract/mod.rs
@@ -4,6 +4,6 @@ mod json;
 mod path;
 mod version;
 
-pub use json::Json;
-pub use path::Path;
-pub use version::Version;
+pub use self::json::Json;
+pub use self::path::Path;
+pub use self::version::Version;
diff --git a/crates/nvisy-server/src/handler/error/from_core.rs b/crates/nvisy-server/src/handler/error/from_core.rs
index 68644d30..1bacfef1 100644
--- a/crates/nvisy-server/src/handler/error/from_core.rs
+++ b/crates/nvisy-server/src/handler/error/from_core.rs
@@ -19,7 +19,7 @@ impl From<nvisy_core::Error> for Error<'static> {
         };
 
         let mut error = Self::new(kind).with_message(err.message);
-        if let Some(component) = err.source_component {
+        if let Some(component) = err.component {
             error = error.with_context(component);
         }
         error
diff --git a/crates/nvisy-server/src/handler/error/mod.rs b/crates/nvisy-server/src/handler/error/mod.rs
index 7d733be7..0687adaf 100644
--- a/crates/nvisy-server/src/handler/error/mod.rs
+++ b/crates/nvisy-server/src/handler/error/mod.rs
@@ -8,5 +8,5 @@ mod from_core;
 mod http_error;
 mod http_kind;
 
-pub use http_error::{Error, Result};
-pub use http_kind::ErrorKind;
+pub use self::http_error::{Error, Result};
+pub use self::http_kind::ErrorKind;
diff --git a/crates/nvisy-server/src/handler/mod.rs b/crates/nvisy-server/src/handler/mod.rs
index 8cb5cce0..2a7cc769 100644
--- a/crates/nvisy-server/src/handler/mod.rs
+++ b/crates/nvisy-server/src/handler/mod.rs
@@ -21,8 +21,8 @@ mod request;
 mod response;
 
 use aide::axum::ApiRouter;
-pub use error::{Error, ErrorKind, Result};
 
+pub use self::error::{Error, ErrorKind, Result};
 use crate::service::ServiceState;
 
 /// Build the handler route tree.
diff --git a/crates/nvisy-server/src/handler/request/mod.rs b/crates/nvisy-server/src/handler/request/mod.rs
index 9a60105e..d01612b2 100644
--- a/crates/nvisy-server/src/handler/request/mod.rs
+++ b/crates/nvisy-server/src/handler/request/mod.rs
@@ -9,7 +9,7 @@ mod files;
 mod path;
 mod process;
 
-pub use contexts::NewContext;
-pub use files::NewFile;
-pub use path::{ActorQuery, ContentPath, ContextPath};
-pub use process::NewProcess;
+pub use self::contexts::NewContext;
+pub use self::files::NewFile;
+pub use self::path::{ActorQuery, ContentPath, ContextPath};
+pub use self::process::NewProcess;
diff --git a/crates/nvisy-server/src/handler/response/mod.rs b/crates/nvisy-server/src/handler/response/mod.rs
index b2f7fa9c..6c57c58b 100644
--- a/crates/nvisy-server/src/handler/response/mod.rs
+++ b/crates/nvisy-server/src/handler/response/mod.rs
@@ -11,8 +11,8 @@ mod error;
 mod files;
 mod process;
 
-pub use check::{Analytics, Health, ServiceStatus};
-pub use contexts::{Context, ContextId, ContextList};
-pub use error::ErrorResponse;
-pub use files::{File, FileId, FileList};
-pub use process::ProcessResult;
+pub use self::check::{Analytics, Health, ServiceStatus};
+pub use self::contexts::{Context, ContextId, ContextList};
+pub use self::error::ErrorResponse;
+pub use self::files::{File, FileId, FileList};
+pub use self::process::ProcessResult;
diff --git a/crates/nvisy-server/src/lib.rs b/crates/nvisy-server/src/lib.rs
index 8c92dde0..8d6a4241 100644
--- a/crates/nvisy-server/src/lib.rs
+++ b/crates/nvisy-server/src/lib.rs
@@ -7,6 +7,6 @@ pub mod handler;
 pub mod middleware;
 pub mod service;
 
-pub use handler::error::{Error, ErrorKind, Result};
-pub use handler::routes;
-pub use service::ServiceState;
+pub use self::handler::error::{Error, ErrorKind, Result};
+pub use self::handler::routes;
+pub use self::service::ServiceState;
diff --git a/crates/nvisy-server/src/middleware/mod.rs b/crates/nvisy-server/src/middleware/mod.rs
index c4f1e18c..eb25d59c 100644
--- a/crates/nvisy-server/src/middleware/mod.rs
+++ b/crates/nvisy-server/src/middleware/mod.rs
@@ -47,10 +47,10 @@ mod recovery;
 mod security;
 mod specification;
 
-pub use constants::{
+pub use self::constants::{
     DEFAULT_MAX_BODY_SIZE, DEFAULT_MAX_FILE_BODY_SIZE, DEFAULT_REQUEST_TIMEOUT_SECS,
 };
-pub use observability::RouterObservabilityExt;
-pub use recovery::{RecoveryConfig, RouterRecoveryExt};
-pub use security::{RouterSecurityExt, SecurityConfig};
-pub use specification::{OpenApiConfig, RouterOpenApiExt};
+pub use self::observability::RouterObservabilityExt;
+pub use self::recovery::{RecoveryConfig, RouterRecoveryExt};
+pub use self::security::{RouterSecurityExt, SecurityConfig};
+pub use self::specification::{OpenApiConfig, RouterOpenApiExt};