From 8215d89b2401c65a7a4168c33f4fa58f1fad4001 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Fri, 13 Mar 2026 21:50:59 +0100 Subject: [PATCH 1/6] refactor(engine): redesign pipeline graph with typed action nodes and phase ordering Replace the generic Source/Action/Target node model with a strongly-typed GraphNodeKind enum where each variant carries its own configuration struct. Split action configs into dedicated files (context, extraction, recognition, refinement, lifecycle). Add phase-based pipeline ordering validation, action- level config validation, and structural checks (self-loops, duplicate edges). Move Compiler into DefaultEngineInner for reuse across runs. Rename error.rs `source` parameter to `component` to align with the internal field name. Co-Authored-By: Claude Opus 4.6 --- crates/nvisy-core/src/error.rs | 22 +- .../nvisy-engine/src/compiler/graph/action.rs | 37 --- .../src/compiler/graph/context.rs | 69 ++++++ .../src/compiler/graph/extraction.rs | 41 ++++ .../src/compiler/graph/lifecycle.rs | 50 ++++ crates/nvisy-engine/src/compiler/graph/mod.rs | 219 +++++++++++++----- .../src/compiler/graph/recognition.rs | 77 ++++++ .../src/compiler/graph/refinement.rs | 47 ++++ .../nvisy-engine/src/compiler/graph/source.rs | 13 -- .../nvisy-engine/src/compiler/graph/target.rs | 13 -- crates/nvisy-engine/src/compiler/mod.rs | 113 ++++----- crates/nvisy-engine/src/lib.rs | 4 +- crates/nvisy-engine/src/pipeline/default.rs | 22 +- crates/nvisy-engine/src/pipeline/executor.rs | 145 +++--------- 14 files changed, 563 insertions(+), 309 deletions(-) delete mode 100644 crates/nvisy-engine/src/compiler/graph/action.rs create mode 100644 crates/nvisy-engine/src/compiler/graph/context.rs create mode 100644 crates/nvisy-engine/src/compiler/graph/extraction.rs create mode 100644 crates/nvisy-engine/src/compiler/graph/lifecycle.rs create mode 100644 crates/nvisy-engine/src/compiler/graph/recognition.rs create mode 100644 crates/nvisy-engine/src/compiler/graph/refinement.rs delete mode 100644 crates/nvisy-engine/src/compiler/graph/source.rs delete mode 100644 crates/nvisy-engine/src/compiler/graph/target.rs diff --git a/crates/nvisy-core/src/error.rs b/crates/nvisy-core/src/error.rs index 1a50448a..8722032a 100644 --- a/crates/nvisy-core/src/error.rs +++ b/crates/nvisy-core/src/error.rs @@ -82,19 +82,19 @@ impl Error { self } - /// Shorthand for a validation error with a source component. - pub fn validation(message: impl Into, source: impl Into) -> Self { - Self::new(ErrorKind::Validation, message).with_component(source) + /// Shorthand for a validation error with a component name. + pub fn validation(message: impl Into, component: impl Into) -> Self { + Self::new(ErrorKind::Validation, message).with_component(component) } - /// Shorthand for a connection error with a source component and retryable flag. + /// Shorthand for a connection error with a component name and retryable flag. pub fn connection( message: impl Into, - source: impl Into, + component: impl Into, retryable: bool, ) -> Self { Self::new(ErrorKind::Connection, message) - .with_component(source) + .with_component(component) .with_retryable(retryable) } @@ -113,10 +113,14 @@ impl Error { Self::new(ErrorKind::Policy, message) } - /// Shorthand for a runtime error with a source component and retryable flag. - pub fn runtime(message: impl Into, source: impl Into, retryable: bool) -> Self { + /// Shorthand for a runtime error with a component name and retryable flag. + pub fn runtime( + message: impl Into, + component: impl Into, + retryable: bool, + ) -> Self { Self::new(ErrorKind::Runtime, message) - .with_component(source) + .with_component(component) .with_retryable(retryable) } diff --git a/crates/nvisy-engine/src/compiler/graph/action.rs b/crates/nvisy-engine/src/compiler/graph/action.rs deleted file mode 100644 index 52f9995c..00000000 --- a/crates/nvisy-engine/src/compiler/graph/action.rs +++ /dev/null @@ -1,37 +0,0 @@ -//! Action node definition with strongly-typed action variants. - -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -/// The set of strongly-typed actions a pipeline node can perform. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub enum ActionKind { - /// Run OCR before connected nodes. - Ocr, - /// Transcribe audio content to text. - Transcribe, - /// Run entity detection (NER, pattern, CV). - Detect, - /// Evaluate policies against detected entities. - Evaluate, - /// Apply redaction instructions to the content. - Redact, - /// Translate content or context between languages. - Translate, - /// Classify content and route to different outputs. - Classify, - /// Generate a summary and inject into context. - Summarize, - /// Emit a per-file audit record. - Audit, - /// Deliver to a target connection. - Export, -} - -/// A transformation or detection step. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct ActionNode { - /// The action this node performs. - pub action: ActionKind, -} diff --git a/crates/nvisy-engine/src/compiler/graph/context.rs b/crates/nvisy-engine/src/compiler/graph/context.rs new file mode 100644 index 00000000..d0761e12 --- /dev/null +++ b/crates/nvisy-engine/src/compiler/graph/context.rs @@ -0,0 +1,69 @@ +//! Context action configurations: load, save, and generate. + +use nvisy_core::Error; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Configuration for the [`LoadContext`](super::GraphNodeKind::LoadContext) action. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +pub struct LoadContextAction { + /// Context identifiers to load. + pub context_ids: Vec, +} + +impl LoadContextAction { + /// Validates that at least one context ID is specified. + pub fn validate(&self) -> Result<(), Error> { + if self.context_ids.is_empty() { + return Err(Error::validation( + "load_context requires at least one context id", + "compiler", + )); + } + Ok(()) + } +} + +/// Configuration for the [`SaveContext`](super::GraphNodeKind::SaveContext) action. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +pub struct SaveContextAction { + /// Context identifiers to persist. + pub context_ids: Vec, +} + +impl SaveContextAction { + /// Validates that at least one context ID is specified. + pub fn validate(&self) -> Result<(), Error> { + if self.context_ids.is_empty() { + return Err(Error::validation( + "save_context requires at least one context id", + "compiler", + )); + } + Ok(()) + } +} + +/// Configuration for the [`GenerateContext`](super::GraphNodeKind::GenerateContext) action. +#[derive( + Debug, + Clone, + Default, + PartialEq, + Eq, + Serialize, + Deserialize, + JsonSchema +)] +pub struct GenerateContextAction { + /// Include a span-level summary in the generated context. + #[serde(default)] + pub summarization: bool, + /// Include translated spans in the generated context. + #[serde(default)] + pub translation: bool, + /// Include an audit record in the generated context. + #[serde(default)] + pub audit: bool, +} diff --git a/crates/nvisy-engine/src/compiler/graph/extraction.rs b/crates/nvisy-engine/src/compiler/graph/extraction.rs new file mode 100644 index 00000000..d8a2e0fb --- /dev/null +++ b/crates/nvisy-engine/src/compiler/graph/extraction.rs @@ -0,0 +1,41 @@ +//! Extraction action configurations: visual and audial. + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Configuration for the [`VisualExtraction`](super::GraphNodeKind::VisualExtraction) action. +#[derive( + Debug, + Clone, + Default, + PartialEq, + Eq, + Serialize, + Deserialize, + JsonSchema +)] +pub struct VisualExtractionAction { + /// Run a secondary LLM verification pass on OCR results. + #[serde(default)] + pub verification: bool, + /// Run computer vision entity detection on images. + #[serde(default)] + pub entity_detection: bool, +} + +/// Configuration for the [`AudialExtraction`](super::GraphNodeKind::AudialExtraction) action. +#[derive( + Debug, + Clone, + Default, + PartialEq, + Eq, + Serialize, + Deserialize, + JsonSchema +)] +pub struct AudialExtractionAction { + /// Segment the audio by speaker identity. + #[serde(default)] + pub diarization: bool, +} diff --git a/crates/nvisy-engine/src/compiler/graph/lifecycle.rs b/crates/nvisy-engine/src/compiler/graph/lifecycle.rs new file mode 100644 index 00000000..8be24b75 --- /dev/null +++ b/crates/nvisy-engine/src/compiler/graph/lifecycle.rs @@ -0,0 +1,50 @@ +//! Lifecycle action configurations: import and export. + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Configuration for the [`Import`](super::GraphNodeKind::Import) action. +#[derive( + Debug, + Clone, + Default, + PartialEq, + Eq, + Serialize, + Deserialize, + JsonSchema +)] +pub struct ImportAction { + /// Decompress the content before processing. + #[serde(default)] + pub decompression: bool, + /// Decrypt the content before processing. + #[serde(default)] + pub decryption: bool, + /// Convert the content to a processable format. + #[serde(default)] + pub conversion: bool, +} + +/// Configuration for the [`Export`](super::GraphNodeKind::Export) action. +#[derive( + Debug, + Clone, + Default, + PartialEq, + Eq, + Serialize, + Deserialize, + JsonSchema +)] +pub struct ExportAction { + /// Compress the content before publishing. + #[serde(default)] + pub compression: bool, + /// Encrypt the content before publishing. + #[serde(default)] + pub encryption: bool, + /// Convert the content to the target format. + #[serde(default)] + pub conversion: bool, +} diff --git a/crates/nvisy-engine/src/compiler/graph/mod.rs b/crates/nvisy-engine/src/compiler/graph/mod.rs index 221de0b1..98cf2642 100644 --- a/crates/nvisy-engine/src/compiler/graph/mod.rs +++ b/crates/nvisy-engine/src/compiler/graph/mod.rs @@ -1,15 +1,17 @@ //! Graph data model for pipeline definitions. //! //! A pipeline is represented as a set of [`GraphNode`]s connected by -//! [`GraphEdge`]s, collected into a [`Graph`]. Nodes are flattened into -//! a struct carrying shared fields (`id`, `retry`, `timeout`) alongside -//! a `kind` discriminator that determines the node's role. +//! [`GraphEdge`]s, collected into a [`Graph`]. Each node carries shared +//! fields (`id`, `retry`, `timeout`) alongside a [`GraphNodeKind`] that +//! determines what the node does. -mod action; -mod source; -mod target; +mod context; +mod extraction; +mod lifecycle; +mod recognition; +mod refinement; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use nvisy_core::Error; use schemars::JsonSchema; @@ -17,60 +19,130 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use validator::Validate; -pub use self::action::{ActionKind, ActionNode}; -pub use self::source::SourceNode; -pub use self::target::TargetNode; +pub use self::context::{GenerateContextAction, LoadContextAction, SaveContextAction}; +pub use self::extraction::{AudialExtractionAction, VisualExtractionAction}; +pub use self::lifecycle::{ExportAction, ImportAction}; +pub use self::recognition::{NamedEntityRecognitionAction, PatternRecognitionAction}; +pub use self::refinement::{FusionAction, RedactionAction}; use super::policy::{RetryPolicy, TimeoutPolicy}; +/// The set of strongly-typed actions a pipeline node can perform. +/// +/// Each variant maps to one or more [`Operation`](crate::operation::Operation) +/// implementations. Variants carry a dedicated configuration struct. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "action", rename_all = "snake_case")] +pub enum GraphNodeKind { + /// Loads reference-data contexts required by downstream actions. + LoadContext(LoadContextAction), + /// Persists contexts produced during the pipeline run. + SaveContext(SaveContextAction), + /// Generates a new context from detection results and content data. + GenerateContext(GenerateContextAction), + + /// Extracts text and entities from images and scanned documents. + VisualExtraction(VisualExtractionAction), + /// Extracts text from speech audio. + AudialExtraction(AudialExtractionAction), + + /// Detects named entities via language model inference. + NamedEntityRecognition(NamedEntityRecognitionAction), + /// Detects entities via regex, checksum, dictionary, and heuristic rules. + PatternRecognition(PatternRecognitionAction), + + /// Merges and scores entities from multiple detection sources. + Fusion(FusionAction), + /// Applies redaction instructions to produce output content. + Redaction(RedactionAction), + + /// Imports content into the pipeline for processing. + Import(ImportAction), + /// Exports processed content to a target destination. + Export(ExportAction), +} + +impl GraphNodeKind { + /// Returns the pipeline phase for this node kind. + /// + /// Phases enforce execution ordering: edges must flow from equal or + /// lower phase to equal or higher phase. + /// + /// | Phase | Actions | + /// |-------|--------------------------------------------| + /// | 0 | Import, LoadContext | + /// | 1 | VisualExtraction, AudialExtraction | + /// | 2 | NamedEntityRecognition, PatternRecognition | + /// | 3 | Fusion | + /// | 4 | Redaction, GenerateContext | + /// | 5 | Export, SaveContext | + #[must_use] + pub fn phase(&self) -> u8 { + match self { + Self::Import(_) | Self::LoadContext(_) => 0, + Self::VisualExtraction(_) | Self::AudialExtraction(_) => 1, + Self::NamedEntityRecognition(_) | Self::PatternRecognition(_) => 2, + Self::Fusion(_) => 3, + Self::Redaction(_) | Self::GenerateContext(_) => 4, + Self::Export(_) | Self::SaveContext(_) => 5, + } + } + + /// Validates action-specific configuration. + pub fn validate(&self) -> Result<(), Error> { + match self { + Self::LoadContext(action) => action.validate(), + Self::SaveContext(action) => action.validate(), + Self::NamedEntityRecognition(action) => action.validate(), + _ => Ok(()), + } + } +} + /// A node in the pipeline graph. /// -/// Shared fields (`id`, `retry`, `timeout`) live directly on the struct -/// while the role-specific payload is carried in [`GraphNodeKind`] via -/// `#[serde(flatten)]`. +/// Common fields (`id`, `retry`, `timeout`) live on the struct directly. +/// The action-specific payload is carried in [`GraphNodeKind`]. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct GraphNode { /// Unique identifier for this node within the graph. pub id: Uuid, - /// Optional retry policy. + /// Optional retry policy for this node. #[serde(skip_serializing_if = "Option::is_none")] pub retry: Option, - /// Optional timeout policy. + /// Optional timeout policy for this node. #[serde(skip_serializing_if = "Option::is_none")] pub timeout: Option, - /// Role-specific payload (source, action, or target). + /// Action-specific payload. #[serde(flatten)] pub kind: GraphNodeKind, } impl GraphNode { - /// Returns the retry policy, if one is configured. + /// Creates a new node with the given ID and action kind. + pub fn new(id: Uuid, kind: GraphNodeKind) -> Self { + Self { + id, + retry: None, + timeout: None, + kind, + } + } + + /// Returns the retry policy, if configured. + #[must_use] pub fn retry(&self) -> Option<&RetryPolicy> { self.retry.as_ref() } - /// Returns the timeout policy, if one is configured. + /// Returns the timeout policy, if configured. + #[must_use] pub fn timeout(&self) -> Option<&TimeoutPolicy> { self.timeout.as_ref() } } -/// Discriminator for the three node roles in a pipeline. -/// -/// Serialized with a `"type"` tag so JSON definitions specify -/// `"source"`, `"action"`, or `"target"`. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum GraphNodeKind { - /// A data source that reads from an external provider via a named stream. - Source(SourceNode), - /// A transformation or detection step applied to data flowing through the pipeline. - Action(ActionNode), - /// A data sink that writes to an external provider via a named stream. - Target(TargetNode), -} - /// A directed edge connecting two nodes. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] pub struct GraphEdge { /// ID of the upstream node. pub source: Uuid, @@ -78,9 +150,7 @@ pub struct GraphEdge { pub target: Uuid, } -/// A complete pipeline graph definition containing nodes and edges. -/// -/// The graph must be a valid DAG (directed acyclic graph) with unique node IDs. +/// A complete pipeline graph: nodes and directed edges forming a DAG. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct Graph { /// All nodes in the pipeline. @@ -90,24 +160,31 @@ pub struct Graph { } impl Graph { - /// Validate structural invariants. + /// Creates a new graph from nodes and edges. + pub fn new(nodes: Vec, edges: Vec) -> Self { + Self { nodes, edges } + } + + /// Validates structural invariants. /// - /// - The graph must contain at least one node. - /// - All node IDs must be unique. - /// - All edge endpoints must reference existing node IDs. + /// Checks that the graph contains at least one node, all node IDs are + /// unique, node-level policies and action configs are valid, edges have + /// no self-loops or duplicates, all edge endpoints reference existing + /// node IDs, and edges respect pipeline phase ordering. + #[must_use = "validation errors are silently ignored if the result is unused"] pub fn validate(&self) -> Result<(), Error> { if self.nodes.is_empty() { return Err(Error::validation( - "Graph must have at least one node", + "graph must have at least one node", "compiler", )); } - let mut seen = HashSet::new(); + let mut node_map = HashMap::with_capacity(self.nodes.len()); for node in &self.nodes { - if !seen.insert(node.id) { + if node_map.insert(node.id, node).is_some() { return Err(Error::validation( - format!("Duplicate node ID: {}", node.id), + format!("duplicate node id: {}", node.id), "compiler", )); } @@ -115,28 +192,58 @@ impl Graph { for node in &self.nodes { if let Some(retry) = &node.retry { - retry.validate().map_err(|e| { - Error::validation(format!("Node {}: {}", node.id, e), "compiler") - })?; + retry + .validate() + .map_err(|e| Error::validation(format!("node {}: {e}", node.id), "compiler"))?; } if let Some(timeout) = &node.timeout { - timeout.validate().map_err(|e| { - Error::validation(format!("Node {}: {}", node.id, e), "compiler") - })?; + timeout + .validate() + .map_err(|e| Error::validation(format!("node {}: {e}", node.id), "compiler"))?; } + node.kind.validate().map_err(|e| { + Error::validation(format!("node {}: {}", node.id, e.message), "compiler") + })?; } - let node_ids: HashSet = seen; + let mut seen_edges = HashSet::with_capacity(self.edges.len()); for edge in &self.edges { - if !node_ids.contains(&edge.source) { + if edge.source == edge.target { return Err(Error::validation( - format!("Edge references unknown source node: {}", edge.source), + format!("self-loop on node {}", edge.source), "compiler", )); } - if !node_ids.contains(&edge.target) { + + if !seen_edges.insert((edge.source, edge.target)) { + return Err(Error::validation( + format!("duplicate edge from {} to {}", edge.source, edge.target,), + "compiler", + )); + } + + let source = node_map.get(&edge.source).ok_or_else(|| { + Error::validation( + format!("edge references unknown source node: {}", edge.source), + "compiler", + ) + })?; + let target = node_map.get(&edge.target).ok_or_else(|| { + Error::validation( + format!("edge references unknown target node: {}", edge.target), + "compiler", + ) + })?; + + let source_phase = source.kind.phase(); + let target_phase = target.kind.phase(); + if source_phase > target_phase { return Err(Error::validation( - format!("Edge references unknown target node: {}", edge.target), + format!( + "edge from node {} (phase {source_phase}) to node {} \ + (phase {target_phase}) violates pipeline ordering", + edge.source, edge.target, + ), "compiler", )); } diff --git a/crates/nvisy-engine/src/compiler/graph/recognition.rs b/crates/nvisy-engine/src/compiler/graph/recognition.rs new file mode 100644 index 00000000..37c85d7f --- /dev/null +++ b/crates/nvisy-engine/src/compiler/graph/recognition.rs @@ -0,0 +1,77 @@ +//! Recognition action configurations: NER and pattern-based. + +use nvisy_core::Error; +use nvisy_ontology::entity::EntityKind; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Default minimum confidence threshold for NER detections. +const DEFAULT_CONFIDENCE_THRESHOLD: f64 = 0.5; + +/// Configuration for the [`NamedEntityRecognition`](super::GraphNodeKind::NamedEntityRecognition) action. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] +pub struct NamedEntityRecognitionAction { + /// Entity kinds to detect. An empty list means all known kinds. + #[serde(default)] + pub entity_kinds: Vec, + /// Minimum confidence threshold for detections (0.0 to 1.0). + #[serde(default = "default_confidence_threshold")] + pub confidence_threshold: f64, +} + +impl Default for NamedEntityRecognitionAction { + fn default() -> Self { + Self { + entity_kinds: Vec::new(), + confidence_threshold: DEFAULT_CONFIDENCE_THRESHOLD, + } + } +} + +impl NamedEntityRecognitionAction { + /// Validates that the confidence threshold is within `0.0..=1.0`. + pub fn validate(&self) -> Result<(), Error> { + if !(0.0..=1.0).contains(&self.confidence_threshold) { + return Err(Error::validation( + format!( + "confidence_threshold must be between 0.0 and 1.0, got {}", + self.confidence_threshold, + ), + "compiler", + )); + } + Ok(()) + } +} + +/// Configuration for the [`PatternRecognition`](super::GraphNodeKind::PatternRecognition) action. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +pub struct PatternRecognitionAction { + /// Enable format heuristics, entropy, and structural cues. + #[serde(default)] + pub heuristic: bool, + /// Enable co-occurrence analysis for contextual confidence adjustment. + #[serde(default = "default_true")] + pub contextual_analysis: bool, + /// Run a second pass with stricter thresholds. + #[serde(default = "default_true")] + pub second_pass: bool, +} + +impl Default for PatternRecognitionAction { + fn default() -> Self { + Self { + heuristic: false, + contextual_analysis: true, + second_pass: true, + } + } +} + +fn default_true() -> bool { + true +} + +fn default_confidence_threshold() -> f64 { + DEFAULT_CONFIDENCE_THRESHOLD +} diff --git a/crates/nvisy-engine/src/compiler/graph/refinement.rs b/crates/nvisy-engine/src/compiler/graph/refinement.rs new file mode 100644 index 00000000..d6261338 --- /dev/null +++ b/crates/nvisy-engine/src/compiler/graph/refinement.rs @@ -0,0 +1,47 @@ +//! Refinement action configurations: fusion and redaction. + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Configuration for the [`Fusion`](super::GraphNodeKind::Fusion) action. +#[derive( + Debug, + Clone, + Default, + PartialEq, + Eq, + Serialize, + Deserialize, + JsonSchema +)] +pub struct FusionAction { + /// Remove overlapping duplicate entities before fusion. + #[serde(default)] + pub entity_deduplication: bool, + /// Adjust raw model scores to align with empirical precision targets. + #[serde(default)] + pub confidence_calibration: bool, + /// Use surrounding document context to upgrade or downgrade confidence. + #[serde(default)] + pub contextual_adjustment: bool, +} + +/// Configuration for the [`Redaction`](super::GraphNodeKind::Redaction) action. +#[derive( + Debug, + Clone, + Default, + PartialEq, + Eq, + Serialize, + Deserialize, + JsonSchema +)] +pub struct RedactionAction { + /// Run a validation pass on the redacted output. + #[serde(default)] + pub validation: bool, + /// Strip or redact document metadata (EXIF, PDF properties). + #[serde(default)] + pub process_metadata: bool, +} diff --git a/crates/nvisy-engine/src/compiler/graph/source.rs b/crates/nvisy-engine/src/compiler/graph/source.rs deleted file mode 100644 index 810c8e09..00000000 --- a/crates/nvisy-engine/src/compiler/graph/source.rs +++ /dev/null @@ -1,13 +0,0 @@ -//! Source node definition. - -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -/// A data source that reads from an external provider. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct SourceNode { - /// Provider name used to resolve the connection (e.g. `"s3"`). - pub provider: String, - /// Stream name on the provider (e.g. `"read"`). - pub stream: String, -} diff --git a/crates/nvisy-engine/src/compiler/graph/target.rs b/crates/nvisy-engine/src/compiler/graph/target.rs deleted file mode 100644 index 982f87c2..00000000 --- a/crates/nvisy-engine/src/compiler/graph/target.rs +++ /dev/null @@ -1,13 +0,0 @@ -//! Target node definition. - -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -/// A data sink that writes to an external provider. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct TargetNode { - /// Provider name used to resolve the connection (e.g. `"s3"`). - pub provider: String, - /// Stream name on the provider (e.g. `"write"`). - pub stream: String, -} diff --git a/crates/nvisy-engine/src/compiler/mod.rs b/crates/nvisy-engine/src/compiler/mod.rs index 6ac7baa2..446fd89d 100644 --- a/crates/nvisy-engine/src/compiler/mod.rs +++ b/crates/nvisy-engine/src/compiler/mod.rs @@ -10,14 +10,12 @@ mod policy; use std::collections::HashMap; +use derive_builder::Builder; use nvisy_core::Error; use petgraph::algo::{is_cyclic_directed, toposort}; -use petgraph::graph::{DiGraph, NodeIndex}; -use uuid::Uuid; +use petgraph::graph::DiGraph; -pub use self::graph::{ - ActionKind, ActionNode, Graph, GraphEdge, GraphNode, GraphNodeKind, SourceNode, TargetNode, -}; +pub use self::graph::{Graph, GraphEdge, GraphNode, GraphNodeKind}; pub(crate) use self::plan::{ExecutionPlan, ResolvedNode}; pub use self::policy::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy}; @@ -25,41 +23,49 @@ pub use self::policy::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPol /// /// Nodes that don't carry their own retry or timeout policy will inherit /// the compiler-level defaults (if set) at compile time. -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Builder)] +#[builder( + name = "CompilerBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with"), + build_fn(private, name = "build_inner") +)] pub(crate) struct Compiler { /// Default retry policy applied to nodes without one. + #[builder(default)] pub retry: Option, /// Default timeout policy applied to nodes without one. + #[builder(default)] pub timeout: Option, } +impl CompilerBuilder { + /// Build the compiler. + pub fn build(self) -> Result { + self.build_inner() + .map_err(|e| Error::validation(e.to_string(), "compiler")) + } +} + impl Compiler { - /// Create a compiler with no default policies. + /// Creates a compiler with no default policies. pub fn new() -> Self { Self::default() } - /// Set the default retry policy. - pub fn with_retry(mut self, policy: RetryPolicy) -> Self { - self.retry = Some(policy); - self - } - - /// Set the default timeout policy. - pub fn with_timeout(mut self, policy: TimeoutPolicy) -> Self { - self.timeout = Some(policy); - self + /// Returns a builder for configuring compiler defaults. + pub fn builder() -> CompilerBuilder { + CompilerBuilder::default() } - /// Compile a [`Graph`] into an [`ExecutionPlan`]. + /// Compiles a [`Graph`] into an [`ExecutionPlan`]. /// /// Validates the graph, applies compiler-level default policies to nodes - /// that don't specify their own, builds a `petgraph` representation, + /// that don't specify their own, builds a petgraph representation, /// checks for cycles, and produces a topologically-sorted plan. pub fn compile(&self, graph: &Graph) -> Result { let mut graph = graph.clone(); - // Apply compiler-level defaults to nodes missing their own policies. for node in &mut graph.nodes { if node.retry.is_none() { node.retry.clone_from(&self.retry); @@ -71,9 +77,38 @@ impl Compiler { graph.validate()?; - // Build petgraph - let mut pg: DiGraph = DiGraph::new(); - let mut index_map: HashMap = HashMap::new(); + let pg = Self::build_petgraph(&graph)?; + + let topo = toposort(&pg, None) + .map_err(|_| Error::validation("graph contains a cycle", "compiler"))?; + + let resolved = topo + .iter() + .map(|&idx| { + let upstream_ids = pg + .neighbors_directed(idx, petgraph::Direction::Incoming) + .map(|n| pg[n].id) + .collect(); + let downstream_ids = pg + .neighbors_directed(idx, petgraph::Direction::Outgoing) + .map(|n| pg[n].id) + .collect(); + ResolvedNode { + node: pg[idx].clone(), + upstream_ids, + downstream_ids, + } + }) + .collect(); + + Ok(ExecutionPlan { nodes: resolved }) + } + + /// Builds a petgraph `DiGraph` from a validated [`Graph`] and checks + /// for cycles. + fn build_petgraph(graph: &Graph) -> Result, Error> { + let mut pg = DiGraph::with_capacity(graph.nodes.len(), graph.edges.len()); + let mut index_map = HashMap::with_capacity(graph.nodes.len()); for node in &graph.nodes { let idx = pg.add_node(node.clone()); @@ -83,39 +118,13 @@ impl Compiler { for edge in &graph.edges { let from = index_map[&edge.source]; let to = index_map[&edge.target]; - pg.add_edge(from, to, ()); + pg.add_edge(from, to, edge.clone()); } - // Cycle detection if is_cyclic_directed(&pg) { - return Err(Error::validation("Graph contains a cycle", "compiler")); + return Err(Error::validation("graph contains a cycle", "compiler")); } - // Topological sort - let topo = toposort(&pg, None) - .map_err(|_| Error::validation("Graph contains a cycle", "compiler"))?; - - // Build resolved nodes with adjacency info in topological order. - let mut resolved = Vec::new(); - - for idx in &topo { - let upstream_ids: Vec = pg - .neighbors_directed(*idx, petgraph::Direction::Incoming) - .map(|n| pg[n].id) - .collect(); - - let downstream_ids: Vec = pg - .neighbors_directed(*idx, petgraph::Direction::Outgoing) - .map(|n| pg[n].id) - .collect(); - - resolved.push(ResolvedNode { - node: pg[*idx].clone(), - upstream_ids, - downstream_ids, - }); - } - - Ok(ExecutionPlan { nodes: resolved }) + Ok(pg) } } diff --git a/crates/nvisy-engine/src/lib.rs b/crates/nvisy-engine/src/lib.rs index 08727136..b67fca4e 100644 --- a/crates/nvisy-engine/src/lib.rs +++ b/crates/nvisy-engine/src/lib.rs @@ -8,11 +8,9 @@ pub mod pipeline; pub mod provenance; // Re-export graph data model for pipeline definitions. -pub use self::compiler::{ - ActionKind, ActionNode, Graph, GraphEdge, GraphNode, GraphNodeKind, SourceNode, TargetNode, -}; // Re-export retry and timeout policies for pipeline nodes. pub use self::compiler::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy}; +pub use self::compiler::{Graph, GraphEdge, GraphNode, GraphNodeKind}; pub use self::pipeline::{ DefaultEngine, EngineSection, LlmSection, OcrSection, RuntimeConfig, SttSection, TtsSection, }; diff --git a/crates/nvisy-engine/src/pipeline/default.rs b/crates/nvisy-engine/src/pipeline/default.rs index 7d5aac66..3a4d0440 100644 --- a/crates/nvisy-engine/src/pipeline/default.rs +++ b/crates/nvisy-engine/src/pipeline/default.rs @@ -31,10 +31,8 @@ const CHANNEL_BUFFER_SIZE: usize = 256; /// Inner state shared behind an [`Arc`]. #[derive(Clone, Default)] struct DefaultEngineInner { - /// Default retry policy for graph nodes without one. - retry: Option, - /// Default timeout policy for graph nodes without one. - timeout: Option, + /// Compiler with default retry and timeout policies. + compiler: Compiler, /// Shared HTTP client for downstream providers. http_client: HttpClient, } @@ -50,8 +48,7 @@ pub struct DefaultEngine { impl std::fmt::Debug for DefaultEngine { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("DefaultEngine") - .field("retry", &self.inner.retry) - .field("timeout", &self.inner.timeout) + .field("compiler", &self.inner.compiler) .field("http_client", &self.inner.http_client) .finish() } @@ -65,13 +62,13 @@ impl DefaultEngine { /// Set the default retry policy. pub fn with_retry(mut self, policy: RetryPolicy) -> Self { - Arc::make_mut(&mut self.inner).retry = Some(policy); + Arc::make_mut(&mut self.inner).compiler.retry = Some(policy); self } /// Set the default timeout policy. pub fn with_timeout(mut self, policy: TimeoutPolicy) -> Self { - Arc::make_mut(&mut self.inner).timeout = Some(policy); + Arc::make_mut(&mut self.inner).compiler.timeout = Some(policy); self } @@ -211,14 +208,7 @@ impl Engine for DefaultEngine { // // Compile the graph into a topologically-sorted execution plan and // run Source/Action/Target nodes concurrently. - let mut compiler = Compiler::new(); - if let Some(ref retry) = self.inner.retry { - compiler = compiler.with_retry(retry.clone()); - } - if let Some(ref timeout) = self.inner.timeout { - compiler = compiler.with_timeout(timeout.clone()); - } - let plan = compiler.compile(&input.graph)?; + let plan = self.inner.compiler.compile(&input.graph)?; let run_output = Self::run_graph(&plan).await?; Ok(EngineOutput { diff --git a/crates/nvisy-engine/src/pipeline/executor.rs b/crates/nvisy-engine/src/pipeline/executor.rs index 748bfe5f..9e445126 100644 --- a/crates/nvisy-engine/src/pipeline/executor.rs +++ b/crates/nvisy-engine/src/pipeline/executor.rs @@ -1,12 +1,9 @@ //! Node-level execution dispatchers. //! -//! [`execute_node`] dispatches to variant-specific handlers: -//! -//! | Variant | Behaviour | -//! |----------|--------------------------------------------------------| -//! | `Source` | Reads data from an external provider and sends it downstream. | -//! | `Action` | Receives upstream data, applies a transformation, and forwards results. | -//! | `Target` | Receives upstream data and writes it to an external provider. | +//! [`execute_node`] dispatches each graph node to the appropriate handler +//! based on its [`GraphNodeKind`]. A per-node timeout is applied when +//! configured, with [`TimeoutBehavior`] controlling whether a timeout +//! is treated as an error or silently yields zero items. use nvisy_core::content::ContentData; use nvisy_core::{Error, ErrorKind}; @@ -15,8 +12,8 @@ use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use uuid::Uuid; -use super::policy::{CompiledRetryPolicy, CompiledTimeoutPolicy}; -use crate::compiler::{ActionKind, GraphNode, GraphNodeKind, RetryPolicy, TimeoutBehavior}; +use super::policy::CompiledTimeoutPolicy; +use crate::compiler::{GraphNode, GraphNodeKind, TimeoutBehavior}; /// Outcome of executing a single node in the pipeline. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] @@ -40,34 +37,18 @@ pub struct RunOutput { pub success: bool, } -/// Execute a single node, dispatching to the correct handler based on the -/// [`GraphNodeKind`] variant. +/// Executes a single graph node by dispatching on its [`GraphNodeKind`]. /// /// A per-node timeout is applied when configured. The [`TimeoutBehavior`] /// determines whether a timeout is treated as an error (`Fail`) or silently -/// yields zero items (`Skip`). Retry policies are applied within the -/// individual source/target handlers where the retryable I/O actually -/// occurs (channel consumption is not retryable). +/// yields zero items (`Skip`). pub(crate) async fn execute_node( node: &GraphNode, senders: Vec>, mut receivers: Vec>, ) -> Result { - let run = async { - match &node.kind { - GraphNodeKind::Source(src) => { - execute_source(&src.provider, &src.stream, node.retry(), &senders).await - } - GraphNodeKind::Action(act) => { - execute_action(&act.action, &senders, &mut receivers).await - } - GraphNodeKind::Target(tgt) => { - execute_target(&tgt.provider, &tgt.stream, node.retry(), &mut receivers).await - } - } - }; + let run = async { execute_action(&node.kind, &senders, &mut receivers).await }; - // Apply per-node timeout when configured. match node.timeout() { Some(policy) => { let compiled = CompiledTimeoutPolicy::from(policy); @@ -81,65 +62,38 @@ pub(crate) async fn execute_node( } } -/// Execute a `Source` node: read data from an external provider and send -/// items downstream. +/// Dispatches an action node: receives upstream data, logs the action kind, +/// and forwards items downstream. /// -/// Actual provider integration (S3, database, etc.) is not yet implemented — -/// source nodes currently produce no data. -async fn execute_source( - provider: &str, - stream: &str, - retry: Option<&RetryPolicy>, - senders: &[mpsc::Sender], -) -> Result { - let read_from_provider = || async { - tracing::debug!(provider, stream, "source node: reading from provider"); - - // TODO: Dispatch to provider-specific readers (S3, database, etc.) - // For now, source nodes produce no data. The Engine wrapper injects - // initial content into the graph via the first channel directly. - Ok::(0) - }; - - let count = match retry { - Some(policy) => { - CompiledRetryPolicy::from(policy) - .with_retry(read_from_provider) - .await? - } - None => read_from_provider().await?, - }; - - // Send items downstream once we have them. - // (Currently a no-op since providers are not yet wired.) - let _ = senders; - - Ok(count) -} - -/// Execute an `Action` node: receive upstream data, apply a transformation, -/// and forward the result downstream. -/// -/// Concrete action dispatch (detect, classify, redact) is orchestrated by -/// [`DefaultEngine::run`] which drives detection -> evaluation -> redaction -/// as sequential phases. The channel-level passthrough here handles any -/// action nodes that appear in the DAG but whose logic is managed externally. +/// Concrete action implementations will replace these passthrough stubs +/// as the orchestrator is built out. async fn execute_action( - action: &ActionKind, + action: &GraphNodeKind, senders: &[mpsc::Sender], receivers: &mut [mpsc::Receiver], ) -> Result { match action { - ActionKind::Ocr => tracing::trace!("action node: ocr (passthrough)"), - ActionKind::Transcribe => tracing::trace!("action node: transcribe (passthrough)"), - ActionKind::Detect => tracing::trace!("action node: detect (passthrough)"), - ActionKind::Evaluate => tracing::trace!("action node: evaluate (passthrough)"), - ActionKind::Redact => tracing::trace!("action node: redact (passthrough)"), - ActionKind::Translate => tracing::trace!("action node: translate (passthrough)"), - ActionKind::Classify => tracing::trace!("action node: classify (passthrough)"), - ActionKind::Summarize => tracing::trace!("action node: summarize (passthrough)"), - ActionKind::Audit => tracing::trace!("action node: audit (passthrough)"), - ActionKind::Export => tracing::trace!("action node: export (passthrough)"), + GraphNodeKind::LoadContext(_) => tracing::trace!("action node: load_context (passthrough)"), + GraphNodeKind::SaveContext(_) => tracing::trace!("action node: save_context (passthrough)"), + GraphNodeKind::GenerateContext(_) => { + tracing::trace!("action node: generate_context (passthrough)") + } + GraphNodeKind::VisualExtraction(_) => { + tracing::trace!("action node: visual_extraction (passthrough)") + } + GraphNodeKind::AudialExtraction(_) => { + tracing::trace!("action node: audial_extraction (passthrough)") + } + GraphNodeKind::NamedEntityRecognition(_) => { + tracing::trace!("action node: ner (passthrough)") + } + GraphNodeKind::PatternRecognition(_) => { + tracing::trace!("action node: pattern_recognition (passthrough)") + } + GraphNodeKind::Fusion(_) => tracing::trace!("action node: fusion (passthrough)"), + GraphNodeKind::Redaction(_) => tracing::trace!("action node: redaction (passthrough)"), + GraphNodeKind::Import(_) => tracing::trace!("action node: import (passthrough)"), + GraphNodeKind::Export(_) => tracing::trace!("action node: export (passthrough)"), } // Forward items from all upstream receivers to all downstream senders. @@ -155,32 +109,3 @@ async fn execute_action( Ok(count) } - -/// Execute a `Target` node: consume upstream data and write to an external -/// provider. -/// -/// Actual provider integration is not yet implemented — target nodes -/// currently consume and count items. -async fn execute_target( - provider: &str, - stream: &str, - retry: Option<&RetryPolicy>, - receivers: &mut [mpsc::Receiver], -) -> Result { - tracing::debug!(provider, stream, "target node: writing to provider"); - - // Consume all upstream items. - let mut count = 0u64; - for rx in receivers.iter_mut() { - while let Some(_item) = rx.recv().await { - count += 1; - - // TODO: Dispatch to provider-specific writers (S3, database, etc.) - // with retry support. For now we just count items consumed. - } - } - - let _ = retry; // Will be used when provider writes are implemented. - - Ok(count) -} From 7a820d4de00e93a56c47eef94494b06fcf3a9483 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Sat, 14 Mar 2026 17:24:58 +0100 Subject: [PATCH 2/6] refactor(engine): rework runs, add Runs trait, drop Action suffix from graph types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rewrite runs/mod.rs to pure data types (RunSnapshot, NodeSnapshot, RunStatus, NodeStatus, RunSummary, RunFilter); delete RunManager - Add Runs trait (get_run, list_runs, cancel_run) and implement on DefaultEngine with internal RunEntry + RwLock state - Wire run lifecycle (Pending→Running→Succeeded/PartialFailure/Failed) into Engine::run() with CancellationToken passed to run_graph - Add cooperative cancellation via tokio::select! in execute_node - Make confidence_threshold optional on NamedEntityRecognition (None disables filtering) - Remove Action suffix from all graph config structs (LoadContextAction → LoadContext, etc.) - Remove RunOutput from EngineOutput (query via Runs::get_run instead); make NodeOutput/RunOutput pub(super) - Move compiler/ → graph/, pipeline/runs.rs → runs/mod.rs, config.rs → config/mod.rs, add pipeline/plan/ module Co-Authored-By: Claude Opus 4.6 --- crates/nvisy-engine/src/compiler/mod.rs | 130 --------- crates/nvisy-engine/src/compiler/plan/mod.rs | 30 --- .../nvisy-engine/src/compiler/policy/mod.rs | 7 - .../src/{compiler => }/graph/context.rs | 22 +- .../src/{compiler => }/graph/extraction.rs | 28 +- .../src/{compiler => }/graph/lifecycle.rs | 28 +- .../src/{compiler => }/graph/mod.rs | 35 +-- crates/nvisy-engine/src/graph/policy/mod.rs | 10 + .../src/{compiler => graph}/policy/retry.rs | 2 +- .../src/{compiler => graph}/policy/timeout.rs | 4 +- .../src/{compiler => }/graph/recognition.rs | 51 ++-- .../src/{compiler => }/graph/refinement.rs | 28 +- crates/nvisy-engine/src/lib.rs | 16 +- .../src/pipeline/{config.rs => config/mod.rs} | 2 +- crates/nvisy-engine/src/pipeline/default.rs | 246 ++++++++++++++---- crates/nvisy-engine/src/pipeline/executor.rs | 62 +++-- crates/nvisy-engine/src/pipeline/mod.rs | 35 ++- crates/nvisy-engine/src/pipeline/plan/edge.rs | 29 +++ crates/nvisy-engine/src/pipeline/plan/mod.rs | 246 ++++++++++++++++++ crates/nvisy-engine/src/pipeline/plan/node.rs | 25 ++ .../nvisy-engine/src/pipeline/plan/phase.rs | 10 + .../nvisy-engine/src/pipeline/policy/mod.rs | 10 +- .../nvisy-engine/src/pipeline/policy/retry.rs | 4 +- .../src/pipeline/policy/timeout.rs | 4 +- crates/nvisy-engine/src/pipeline/runs.rs | 214 --------------- crates/nvisy-engine/src/pipeline/runs/mod.rs | 106 ++++++++ crates/nvisy-server/src/handler/process.rs | 3 +- crates/nvisy-server/src/service/mod.rs | 3 +- 28 files changed, 769 insertions(+), 621 deletions(-) delete mode 100644 crates/nvisy-engine/src/compiler/mod.rs delete mode 100644 crates/nvisy-engine/src/compiler/plan/mod.rs delete mode 100644 crates/nvisy-engine/src/compiler/policy/mod.rs rename crates/nvisy-engine/src/{compiler => }/graph/context.rs (86%) rename crates/nvisy-engine/src/{compiler => }/graph/extraction.rs (67%) rename crates/nvisy-engine/src/{compiler => }/graph/lifecycle.rs (74%) rename crates/nvisy-engine/src/{compiler => }/graph/mod.rs (90%) create mode 100644 crates/nvisy-engine/src/graph/policy/mod.rs rename crates/nvisy-engine/src/{compiler => graph}/policy/retry.rs (96%) rename crates/nvisy-engine/src/{compiler => graph}/policy/timeout.rs (89%) rename crates/nvisy-engine/src/{compiler => }/graph/recognition.rs (54%) rename crates/nvisy-engine/src/{compiler => }/graph/refinement.rs (75%) rename crates/nvisy-engine/src/pipeline/{config.rs => config/mod.rs} (99%) create mode 100644 crates/nvisy-engine/src/pipeline/plan/edge.rs create mode 100644 crates/nvisy-engine/src/pipeline/plan/mod.rs create mode 100644 crates/nvisy-engine/src/pipeline/plan/node.rs create mode 100644 crates/nvisy-engine/src/pipeline/plan/phase.rs delete mode 100644 crates/nvisy-engine/src/pipeline/runs.rs create mode 100644 crates/nvisy-engine/src/pipeline/runs/mod.rs diff --git a/crates/nvisy-engine/src/compiler/mod.rs b/crates/nvisy-engine/src/compiler/mod.rs deleted file mode 100644 index 446fd89d..00000000 --- a/crates/nvisy-engine/src/compiler/mod.rs +++ /dev/null @@ -1,130 +0,0 @@ -//! Pipeline compilation: graph construction, validation, and execution planning. -//! -//! The [`Compiler`] is the entry-point for turning a [`Graph`] into an -//! [`ExecutionPlan`]. It carries optional default retry and timeout policies -//! that are applied to nodes which don't specify their own. - -mod graph; -mod plan; -mod policy; - -use std::collections::HashMap; - -use derive_builder::Builder; -use nvisy_core::Error; -use petgraph::algo::{is_cyclic_directed, toposort}; -use petgraph::graph::DiGraph; - -pub use self::graph::{Graph, GraphEdge, GraphNode, GraphNodeKind}; -pub(crate) use self::plan::{ExecutionPlan, ResolvedNode}; -pub use self::policy::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy}; - -/// Pipeline compiler with optional default policies. -/// -/// Nodes that don't carry their own retry or timeout policy will inherit -/// the compiler-level defaults (if set) at compile time. -#[derive(Debug, Clone, Default, Builder)] -#[builder( - name = "CompilerBuilder", - pattern = "owned", - setter(into, strip_option, prefix = "with"), - build_fn(private, name = "build_inner") -)] -pub(crate) struct Compiler { - /// Default retry policy applied to nodes without one. - #[builder(default)] - pub retry: Option, - /// Default timeout policy applied to nodes without one. - #[builder(default)] - pub timeout: Option, -} - -impl CompilerBuilder { - /// Build the compiler. - pub fn build(self) -> Result { - self.build_inner() - .map_err(|e| Error::validation(e.to_string(), "compiler")) - } -} - -impl Compiler { - /// Creates a compiler with no default policies. - pub fn new() -> Self { - Self::default() - } - - /// Returns a builder for configuring compiler defaults. - pub fn builder() -> CompilerBuilder { - CompilerBuilder::default() - } - - /// Compiles a [`Graph`] into an [`ExecutionPlan`]. - /// - /// Validates the graph, applies compiler-level default policies to nodes - /// that don't specify their own, builds a petgraph representation, - /// checks for cycles, and produces a topologically-sorted plan. - pub fn compile(&self, graph: &Graph) -> Result { - let mut graph = graph.clone(); - - for node in &mut graph.nodes { - if node.retry.is_none() { - node.retry.clone_from(&self.retry); - } - if node.timeout.is_none() { - node.timeout.clone_from(&self.timeout); - } - } - - graph.validate()?; - - let pg = Self::build_petgraph(&graph)?; - - let topo = toposort(&pg, None) - .map_err(|_| Error::validation("graph contains a cycle", "compiler"))?; - - let resolved = topo - .iter() - .map(|&idx| { - let upstream_ids = pg - .neighbors_directed(idx, petgraph::Direction::Incoming) - .map(|n| pg[n].id) - .collect(); - let downstream_ids = pg - .neighbors_directed(idx, petgraph::Direction::Outgoing) - .map(|n| pg[n].id) - .collect(); - ResolvedNode { - node: pg[idx].clone(), - upstream_ids, - downstream_ids, - } - }) - .collect(); - - Ok(ExecutionPlan { nodes: resolved }) - } - - /// Builds a petgraph `DiGraph` from a validated [`Graph`] and checks - /// for cycles. - fn build_petgraph(graph: &Graph) -> Result, Error> { - let mut pg = DiGraph::with_capacity(graph.nodes.len(), graph.edges.len()); - let mut index_map = HashMap::with_capacity(graph.nodes.len()); - - for node in &graph.nodes { - let idx = pg.add_node(node.clone()); - index_map.insert(node.id, idx); - } - - for edge in &graph.edges { - let from = index_map[&edge.source]; - let to = index_map[&edge.target]; - pg.add_edge(from, to, edge.clone()); - } - - if is_cyclic_directed(&pg) { - return Err(Error::validation("graph contains a cycle", "compiler")); - } - - Ok(pg) - } -} diff --git a/crates/nvisy-engine/src/compiler/plan/mod.rs b/crates/nvisy-engine/src/compiler/plan/mod.rs deleted file mode 100644 index a2bd2682..00000000 --- a/crates/nvisy-engine/src/compiler/plan/mod.rs +++ /dev/null @@ -1,30 +0,0 @@ -//! Compiled execution plan types. -//! -//! An [`ExecutionPlan`] contains topologically-sorted [`ResolvedNode`]s -//! so the executor can wire channels and schedule tasks. - -use uuid::Uuid; - -use crate::compiler::graph::GraphNode; - -/// A graph node enriched with adjacency information. -/// -/// Order is implicit in the position within [`ExecutionPlan::nodes`]. -#[derive(Debug, Clone)] -pub struct ResolvedNode { - /// The original graph node definition. - pub node: GraphNode, - /// IDs of nodes that feed data into this node. - pub upstream_ids: Vec, - /// IDs of nodes that receive data from this node. - pub downstream_ids: Vec, -} - -/// A compiled execution plan ready for the executor. -/// -/// Contains all nodes in topological order along with their adjacency -/// information so the executor can wire channels and schedule tasks. -pub struct ExecutionPlan { - /// Resolved nodes sorted in topological order. - pub nodes: Vec, -} diff --git a/crates/nvisy-engine/src/compiler/policy/mod.rs b/crates/nvisy-engine/src/compiler/policy/mod.rs deleted file mode 100644 index 769e82f5..00000000 --- a/crates/nvisy-engine/src/compiler/policy/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Retry and timeout policies for pipeline nodes. - -mod retry; -mod timeout; - -pub use self::retry::{BackoffStrategy, RetryPolicy}; -pub use self::timeout::{TimeoutBehavior, TimeoutPolicy}; diff --git a/crates/nvisy-engine/src/compiler/graph/context.rs b/crates/nvisy-engine/src/graph/context.rs similarity index 86% rename from crates/nvisy-engine/src/compiler/graph/context.rs rename to crates/nvisy-engine/src/graph/context.rs index d0761e12..3c0aed64 100644 --- a/crates/nvisy-engine/src/compiler/graph/context.rs +++ b/crates/nvisy-engine/src/graph/context.rs @@ -7,12 +7,12 @@ use uuid::Uuid; /// Configuration for the [`LoadContext`](super::GraphNodeKind::LoadContext) action. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] -pub struct LoadContextAction { +pub struct LoadContext { /// Context identifiers to load. pub context_ids: Vec, } -impl LoadContextAction { +impl LoadContext { /// Validates that at least one context ID is specified. pub fn validate(&self) -> Result<(), Error> { if self.context_ids.is_empty() { @@ -27,12 +27,12 @@ impl LoadContextAction { /// Configuration for the [`SaveContext`](super::GraphNodeKind::SaveContext) action. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] -pub struct SaveContextAction { +pub struct SaveContext { /// Context identifiers to persist. pub context_ids: Vec, } -impl SaveContextAction { +impl SaveContext { /// Validates that at least one context ID is specified. pub fn validate(&self) -> Result<(), Error> { if self.context_ids.is_empty() { @@ -46,17 +46,9 @@ impl SaveContextAction { } /// Configuration for the [`GenerateContext`](super::GraphNodeKind::GenerateContext) action. -#[derive( - Debug, - Clone, - Default, - PartialEq, - Eq, - Serialize, - Deserialize, - JsonSchema -)] -pub struct GenerateContextAction { +#[derive(Debug, Clone, Default, PartialEq, Eq)] +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct GenerateContext { /// Include a span-level summary in the generated context. #[serde(default)] pub summarization: bool, diff --git a/crates/nvisy-engine/src/compiler/graph/extraction.rs b/crates/nvisy-engine/src/graph/extraction.rs similarity index 67% rename from crates/nvisy-engine/src/compiler/graph/extraction.rs rename to crates/nvisy-engine/src/graph/extraction.rs index d8a2e0fb..805e0d95 100644 --- a/crates/nvisy-engine/src/compiler/graph/extraction.rs +++ b/crates/nvisy-engine/src/graph/extraction.rs @@ -4,17 +4,9 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; /// Configuration for the [`VisualExtraction`](super::GraphNodeKind::VisualExtraction) action. -#[derive( - Debug, - Clone, - Default, - PartialEq, - Eq, - Serialize, - Deserialize, - JsonSchema -)] -pub struct VisualExtractionAction { +#[derive(Debug, Clone, Default, PartialEq, Eq)] +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct VisualExtraction { /// Run a secondary LLM verification pass on OCR results. #[serde(default)] pub verification: bool, @@ -24,17 +16,9 @@ pub struct VisualExtractionAction { } /// Configuration for the [`AudialExtraction`](super::GraphNodeKind::AudialExtraction) action. -#[derive( - Debug, - Clone, - Default, - PartialEq, - Eq, - Serialize, - Deserialize, - JsonSchema -)] -pub struct AudialExtractionAction { +#[derive(Debug, Clone, Default, PartialEq, Eq)] +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct AudialExtraction { /// Segment the audio by speaker identity. #[serde(default)] pub diarization: bool, diff --git a/crates/nvisy-engine/src/compiler/graph/lifecycle.rs b/crates/nvisy-engine/src/graph/lifecycle.rs similarity index 74% rename from crates/nvisy-engine/src/compiler/graph/lifecycle.rs rename to crates/nvisy-engine/src/graph/lifecycle.rs index 8be24b75..aee8f5b6 100644 --- a/crates/nvisy-engine/src/compiler/graph/lifecycle.rs +++ b/crates/nvisy-engine/src/graph/lifecycle.rs @@ -4,17 +4,9 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; /// Configuration for the [`Import`](super::GraphNodeKind::Import) action. -#[derive( - Debug, - Clone, - Default, - PartialEq, - Eq, - Serialize, - Deserialize, - JsonSchema -)] -pub struct ImportAction { +#[derive(Debug, Clone, Default, PartialEq, Eq)] +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct Import { /// Decompress the content before processing. #[serde(default)] pub decompression: bool, @@ -27,17 +19,9 @@ pub struct ImportAction { } /// Configuration for the [`Export`](super::GraphNodeKind::Export) action. -#[derive( - Debug, - Clone, - Default, - PartialEq, - Eq, - Serialize, - Deserialize, - JsonSchema -)] -pub struct ExportAction { +#[derive(Debug, Clone, Default, PartialEq, Eq)] +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct Export { /// Compress the content before publishing. #[serde(default)] pub compression: bool, diff --git a/crates/nvisy-engine/src/compiler/graph/mod.rs b/crates/nvisy-engine/src/graph/mod.rs similarity index 90% rename from crates/nvisy-engine/src/compiler/graph/mod.rs rename to crates/nvisy-engine/src/graph/mod.rs index 98cf2642..4be8ceaf 100644 --- a/crates/nvisy-engine/src/compiler/graph/mod.rs +++ b/crates/nvisy-engine/src/graph/mod.rs @@ -8,6 +8,7 @@ mod context; mod extraction; mod lifecycle; +pub mod policy; mod recognition; mod refinement; @@ -19,12 +20,12 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use validator::Validate; -pub use self::context::{GenerateContextAction, LoadContextAction, SaveContextAction}; -pub use self::extraction::{AudialExtractionAction, VisualExtractionAction}; -pub use self::lifecycle::{ExportAction, ImportAction}; -pub use self::recognition::{NamedEntityRecognitionAction, PatternRecognitionAction}; -pub use self::refinement::{FusionAction, RedactionAction}; -use super::policy::{RetryPolicy, TimeoutPolicy}; +pub use self::context::{GenerateContext, LoadContext, SaveContext}; +pub use self::extraction::{AudialExtraction, VisualExtraction}; +pub use self::lifecycle::{Export, Import}; +use self::policy::{RetryPolicy, TimeoutPolicy}; +pub use self::recognition::{NamedEntityRecognition, PatternRecognition}; +pub use self::refinement::{Fusion, Redaction}; /// The set of strongly-typed actions a pipeline node can perform. /// @@ -34,31 +35,31 @@ use super::policy::{RetryPolicy, TimeoutPolicy}; #[serde(tag = "action", rename_all = "snake_case")] pub enum GraphNodeKind { /// Loads reference-data contexts required by downstream actions. - LoadContext(LoadContextAction), + LoadContext(LoadContext), /// Persists contexts produced during the pipeline run. - SaveContext(SaveContextAction), + SaveContext(SaveContext), /// Generates a new context from detection results and content data. - GenerateContext(GenerateContextAction), + GenerateContext(GenerateContext), /// Extracts text and entities from images and scanned documents. - VisualExtraction(VisualExtractionAction), + VisualExtraction(VisualExtraction), /// Extracts text from speech audio. - AudialExtraction(AudialExtractionAction), + AudialExtraction(AudialExtraction), /// Detects named entities via language model inference. - NamedEntityRecognition(NamedEntityRecognitionAction), + NamedEntityRecognition(NamedEntityRecognition), /// Detects entities via regex, checksum, dictionary, and heuristic rules. - PatternRecognition(PatternRecognitionAction), + PatternRecognition(PatternRecognition), /// Merges and scores entities from multiple detection sources. - Fusion(FusionAction), + Fusion(Fusion), /// Applies redaction instructions to produce output content. - Redaction(RedactionAction), + Redaction(Redaction), /// Imports content into the pipeline for processing. - Import(ImportAction), + Import(Import), /// Exports processed content to a target destination. - Export(ExportAction), + Export(Export), } impl GraphNodeKind { diff --git a/crates/nvisy-engine/src/graph/policy/mod.rs b/crates/nvisy-engine/src/graph/policy/mod.rs new file mode 100644 index 00000000..ac1569d5 --- /dev/null +++ b/crates/nvisy-engine/src/graph/policy/mod.rs @@ -0,0 +1,10 @@ +//! User-facing retry and timeout policy configuration types. +//! +//! These types are fields on [`GraphNode`](super::GraphNode) and are +//! serializable via serde. They carry no async or tokio dependencies. + +mod retry; +mod timeout; + +pub use self::retry::{BackoffStrategy, RetryPolicy}; +pub use self::timeout::{TimeoutBehavior, TimeoutPolicy}; diff --git a/crates/nvisy-engine/src/compiler/policy/retry.rs b/crates/nvisy-engine/src/graph/policy/retry.rs similarity index 96% rename from crates/nvisy-engine/src/compiler/policy/retry.rs rename to crates/nvisy-engine/src/graph/policy/retry.rs index 53d8f986..fe34f6c4 100644 --- a/crates/nvisy-engine/src/compiler/policy/retry.rs +++ b/crates/nvisy-engine/src/graph/policy/retry.rs @@ -1,4 +1,4 @@ -//! Retry policy types and backoff strategies. +//! User-facing retry policy configuration. //! //! [`RetryPolicy`] configures how many times a failed node should be retried, //! the base delay between attempts, and the [`BackoffStrategy`] to use. diff --git a/crates/nvisy-engine/src/compiler/policy/timeout.rs b/crates/nvisy-engine/src/graph/policy/timeout.rs similarity index 89% rename from crates/nvisy-engine/src/compiler/policy/timeout.rs rename to crates/nvisy-engine/src/graph/policy/timeout.rs index 790095cf..811627d7 100644 --- a/crates/nvisy-engine/src/compiler/policy/timeout.rs +++ b/crates/nvisy-engine/src/graph/policy/timeout.rs @@ -1,4 +1,6 @@ -//! Timeout configuration for pipeline graph nodes. +//! User-facing timeout policy configuration. +//! +//! [`TimeoutPolicy`] controls how long a node may run before timing out. use std::time::Duration; diff --git a/crates/nvisy-engine/src/compiler/graph/recognition.rs b/crates/nvisy-engine/src/graph/recognition.rs similarity index 54% rename from crates/nvisy-engine/src/compiler/graph/recognition.rs rename to crates/nvisy-engine/src/graph/recognition.rs index 37c85d7f..4ecc72c0 100644 --- a/crates/nvisy-engine/src/compiler/graph/recognition.rs +++ b/crates/nvisy-engine/src/graph/recognition.rs @@ -5,40 +5,31 @@ use nvisy_ontology::entity::EntityKind; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -/// Default minimum confidence threshold for NER detections. -const DEFAULT_CONFIDENCE_THRESHOLD: f64 = 0.5; - /// Configuration for the [`NamedEntityRecognition`](super::GraphNodeKind::NamedEntityRecognition) action. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] -pub struct NamedEntityRecognitionAction { +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize, JsonSchema)] +pub struct NamedEntityRecognition { /// Entity kinds to detect. An empty list means all known kinds. #[serde(default)] pub entity_kinds: Vec, /// Minimum confidence threshold for detections (0.0 to 1.0). - #[serde(default = "default_confidence_threshold")] - pub confidence_threshold: f64, + /// When `None`, confidence filtering is disabled. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub confidence_threshold: Option, } -impl Default for NamedEntityRecognitionAction { - fn default() -> Self { - Self { - entity_kinds: Vec::new(), - confidence_threshold: DEFAULT_CONFIDENCE_THRESHOLD, - } - } -} - -impl NamedEntityRecognitionAction { - /// Validates that the confidence threshold is within `0.0..=1.0`. +impl NamedEntityRecognition { + /// Validates that the confidence threshold, if set, is within `0.0..=1.0`. pub fn validate(&self) -> Result<(), Error> { - if !(0.0..=1.0).contains(&self.confidence_threshold) { - return Err(Error::validation( - format!( - "confidence_threshold must be between 0.0 and 1.0, got {}", - self.confidence_threshold, - ), - "compiler", - )); + if let Some(t) = self.confidence_threshold { + if !(0.0..=1.0).contains(&t) { + return Err(Error::validation( + format!( + "confidence_threshold must be between 0.0 and 1.0, got {}", + t, + ), + "compiler", + )); + } } Ok(()) } @@ -46,7 +37,7 @@ impl NamedEntityRecognitionAction { /// Configuration for the [`PatternRecognition`](super::GraphNodeKind::PatternRecognition) action. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] -pub struct PatternRecognitionAction { +pub struct PatternRecognition { /// Enable format heuristics, entropy, and structural cues. #[serde(default)] pub heuristic: bool, @@ -58,7 +49,7 @@ pub struct PatternRecognitionAction { pub second_pass: bool, } -impl Default for PatternRecognitionAction { +impl Default for PatternRecognition { fn default() -> Self { Self { heuristic: false, @@ -71,7 +62,3 @@ impl Default for PatternRecognitionAction { fn default_true() -> bool { true } - -fn default_confidence_threshold() -> f64 { - DEFAULT_CONFIDENCE_THRESHOLD -} diff --git a/crates/nvisy-engine/src/compiler/graph/refinement.rs b/crates/nvisy-engine/src/graph/refinement.rs similarity index 75% rename from crates/nvisy-engine/src/compiler/graph/refinement.rs rename to crates/nvisy-engine/src/graph/refinement.rs index d6261338..f39f8633 100644 --- a/crates/nvisy-engine/src/compiler/graph/refinement.rs +++ b/crates/nvisy-engine/src/graph/refinement.rs @@ -4,17 +4,9 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; /// Configuration for the [`Fusion`](super::GraphNodeKind::Fusion) action. -#[derive( - Debug, - Clone, - Default, - PartialEq, - Eq, - Serialize, - Deserialize, - JsonSchema -)] -pub struct FusionAction { +#[derive(Debug, Clone, Default, PartialEq, Eq)] +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct Fusion { /// Remove overlapping duplicate entities before fusion. #[serde(default)] pub entity_deduplication: bool, @@ -27,17 +19,9 @@ pub struct FusionAction { } /// Configuration for the [`Redaction`](super::GraphNodeKind::Redaction) action. -#[derive( - Debug, - Clone, - Default, - PartialEq, - Eq, - Serialize, - Deserialize, - JsonSchema -)] -pub struct RedactionAction { +#[derive(Debug, Clone, Default, PartialEq, Eq)] +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct Redaction { /// Run a validation pass on the redacted output. #[serde(default)] pub validation: bool, diff --git a/crates/nvisy-engine/src/lib.rs b/crates/nvisy-engine/src/lib.rs index b67fca4e..58899a17 100644 --- a/crates/nvisy-engine/src/lib.rs +++ b/crates/nvisy-engine/src/lib.rs @@ -2,15 +2,17 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![doc = include_str!("../README.md")] -mod compiler; +pub mod graph; pub mod operation; pub mod pipeline; pub mod provenance; -// Re-export graph data model for pipeline definitions. -// Re-export retry and timeout policies for pipeline nodes. -pub use self::compiler::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy}; -pub use self::compiler::{Graph, GraphEdge, GraphNode, GraphNodeKind}; -pub use self::pipeline::{ - DefaultEngine, EngineSection, LlmSection, OcrSection, RuntimeConfig, SttSection, TtsSection, +pub use self::graph::policy::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy}; +pub use self::graph::{Graph, GraphEdge, GraphNode, GraphNodeKind}; +pub use self::pipeline::config::{ + EngineSection, LlmSection, OcrSection, RuntimeConfig, SttSection, TtsSection, }; +pub use self::pipeline::runs::{ + NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary, +}; +pub use self::pipeline::{DefaultEngine, Engine, EngineInput, EngineOutput, Runs}; diff --git a/crates/nvisy-engine/src/pipeline/config.rs b/crates/nvisy-engine/src/pipeline/config/mod.rs similarity index 99% rename from crates/nvisy-engine/src/pipeline/config.rs rename to crates/nvisy-engine/src/pipeline/config/mod.rs index fd4f3abd..aee34422 100644 --- a/crates/nvisy-engine/src/pipeline/config.rs +++ b/crates/nvisy-engine/src/pipeline/config/mod.rs @@ -3,7 +3,7 @@ use nvisy_rig::agent::{AgentConfig, AgentProvider}; use nvisy_rig::audio::{SttProvider, TtsProvider}; use serde::{Deserialize, Serialize}; -use crate::compiler::{RetryPolicy, TimeoutPolicy}; +use crate::graph::policy::{RetryPolicy, TimeoutPolicy}; /// OCR subsystem configuration. #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/nvisy-engine/src/pipeline/default.rs b/crates/nvisy-engine/src/pipeline/default.rs index 3a4d0440..a5b73237 100644 --- a/crates/nvisy-engine/src/pipeline/default.rs +++ b/crates/nvisy-engine/src/pipeline/default.rs @@ -12,29 +12,89 @@ use std::collections::HashMap; use std::sync::Arc; +use jiff::Timestamp; use nvisy_core::Error; use nvisy_core::content::ContentData; use nvisy_http::HttpClient; -use tokio::sync::{mpsc, watch}; +use tokio::sync::{RwLock, mpsc, watch}; use tokio::task::JoinSet; +use tokio_util::sync::CancellationToken; use uuid::Uuid; use super::executor::{NodeOutput, RunOutput, execute_node}; -use super::{Engine, EngineInput, EngineOutput}; -use crate::compiler::{Compiler, ExecutionPlan, RetryPolicy, TimeoutPolicy}; +use super::plan::{self, ExecutionPlan}; +use super::runs::{NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary}; +use super::{Engine, EngineInput, EngineOutput, Runs}; +use crate::graph::policy::{RetryPolicy, TimeoutPolicy}; use crate::operation::SharedContext; use crate::provenance::PolicyEvaluation; -/// Default buffer size for bounded inter-node MPSC channels. -const CHANNEL_BUFFER_SIZE: usize = 256; +/// Private mutable state for a single run, held inside `DefaultEngineInner`. +struct RunEntry { + actor_id: Uuid, + status: RunStatus, + created_at: Timestamp, + completed_at: Option, + nodes: Vec, + cancel: CancellationToken, +} + +impl RunEntry { + fn to_snapshot(&self, id: Uuid) -> RunSnapshot { + RunSnapshot { + id, + actor_id: self.actor_id, + status: self.status, + created_at: self.created_at, + completed_at: self.completed_at, + nodes: self.nodes.clone(), + } + } + + fn to_summary(&self, id: Uuid) -> RunSummary { + RunSummary { + id, + actor_id: self.actor_id, + status: self.status, + created_at: self.created_at, + completed_at: self.completed_at, + node_count: self.nodes.len(), + } + } +} + +impl Clone for DefaultEngineInner { + fn clone(&self) -> Self { + Self { + default_retry: self.default_retry.clone(), + default_timeout: self.default_timeout.clone(), + http_client: self.http_client.clone(), + runs: RwLock::new(HashMap::new()), + } + } +} /// Inner state shared behind an [`Arc`]. -#[derive(Clone, Default)] struct DefaultEngineInner { - /// Compiler with default retry and timeout policies. - compiler: Compiler, + /// Default retry policy for graph nodes. + default_retry: Option, + /// Default timeout policy for graph nodes. + default_timeout: Option, /// Shared HTTP client for downstream providers. http_client: HttpClient, + /// All tracked runs keyed by their UUID. + runs: RwLock>, +} + +impl Default for DefaultEngineInner { + fn default() -> Self { + Self { + default_retry: None, + default_timeout: None, + http_client: HttpClient::default(), + runs: RwLock::new(HashMap::new()), + } + } } /// Default [`Engine`] implementation. @@ -48,7 +108,8 @@ pub struct DefaultEngine { impl std::fmt::Debug for DefaultEngine { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("DefaultEngine") - .field("compiler", &self.inner.compiler) + .field("default_retry", &self.inner.default_retry) + .field("default_timeout", &self.inner.default_timeout) .field("http_client", &self.inner.http_client) .finish() } @@ -62,13 +123,13 @@ impl DefaultEngine { /// Set the default retry policy. pub fn with_retry(mut self, policy: RetryPolicy) -> Self { - Arc::make_mut(&mut self.inner).compiler.retry = Some(policy); + Arc::make_mut(&mut self.inner).default_retry = Some(policy); self } /// Set the default timeout policy. pub fn with_timeout(mut self, policy: TimeoutPolicy) -> Self { - Arc::make_mut(&mut self.inner).compiler.timeout = Some(policy); + Arc::make_mut(&mut self.inner).default_timeout = Some(policy); self } @@ -85,41 +146,40 @@ impl DefaultEngine { /// Execute a compiled [`ExecutionPlan`] by spawning concurrent tasks for /// each node. - async fn run_graph(plan: &ExecutionPlan) -> Result { - let run_id = Uuid::new_v4(); - - // Create channels for each edge + async fn run_graph( + plan: &ExecutionPlan, + cancel: CancellationToken, + ) -> Result { + // Create channels for each edge using pre-computed config let mut senders: HashMap>> = HashMap::new(); let mut receivers: HashMap>> = HashMap::new(); - for node in &plan.nodes { - let node_id = node.node.id; - for downstream_id in &node.downstream_ids { - let (tx, rx) = mpsc::channel(CHANNEL_BUFFER_SIZE); - senders.entry(node_id).or_default().push(tx); - receivers.entry(*downstream_id).or_default().push(rx); - } + for edge in plan.edges() { + let (tx, rx) = mpsc::channel(edge.config.channel_buffer); + senders.entry(edge.source).or_default().push(tx); + receivers.entry(edge.target).or_default().push(rx); } // Create completion signals per node let mut signal_senders: HashMap> = HashMap::new(); let mut signal_receivers: HashMap> = HashMap::new(); - for node in &plan.nodes { + for resolved in plan.nodes() { let (tx, rx) = watch::channel(false); - signal_senders.insert(node.node.id, tx); - signal_receivers.insert(node.node.id, rx); + signal_senders.insert(resolved.node.id, tx); + signal_receivers.insert(resolved.node.id, rx); } // Spawn tasks let mut join_set: JoinSet = JoinSet::new(); - for resolved in &plan.nodes { - let node = resolved.node.clone(); - let node_id = node.id; - let upstream_ids = resolved.upstream_ids.clone(); + for resolved in plan.nodes() { + let resolved = resolved.clone(); + let node_id = resolved.node.id; + let cancel = cancel.clone(); - let upstream_watches: Vec> = upstream_ids + let upstream_watches: Vec> = resolved + .upstream_ids .iter() .filter_map(|id| signal_receivers.get(id).cloned()) .collect(); @@ -134,7 +194,7 @@ impl DefaultEngine { let _ = rx.wait_for(|&done| done).await; } - let result = execute_node(&node, node_senders, node_receivers).await; + let result = execute_node(&resolved, node_senders, node_receivers, cancel).await; // Signal completion if let Some(tx) = completion_tx { @@ -169,47 +229,87 @@ impl DefaultEngine { } } - let success = node_results.iter().all(|r| r.error.is_none()); + Ok(RunOutput { node_results }) + } - Ok(RunOutput { - run_id, - node_results, - success, - }) + /// Build [`NodeSnapshot`]s from a completed [`RunOutput`]. + fn node_snapshots(run_output: &RunOutput) -> Vec { + run_output + .node_results + .iter() + .map(|nr| NodeSnapshot { + node_id: nr.node_id, + status: if nr.error.is_none() { + NodeStatus::Succeeded + } else { + NodeStatus::Failed + }, + items_processed: nr.items_processed, + error: nr.error.clone(), + }) + .collect() } } impl Engine for DefaultEngine { async fn run(&self, input: EngineInput) -> Result { let run_id = Uuid::new_v4(); + let cancel = CancellationToken::new(); + + // Register the run as Pending + { + let entry = RunEntry { + actor_id: input.actor_id, + status: RunStatus::Pending, + created_at: Timestamp::now(), + completed_at: None, + nodes: Vec::new(), + cancel: cancel.clone(), + }; + self.inner.runs.write().await.insert(run_id, entry); + } + + // Transition to Running + if let Some(entry) = self.inner.runs.write().await.get_mut(&run_id) { + entry.status = RunStatus::Running; + } let _shared = SharedContext::new(run_id, input.actor_id) .with_policies(input.policies.clone()) .with_contexts(input.contexts.clone()); // Phase 1: Detection - // - // Detection is handled externally (via DetectionService / NER / Pattern / - // CV layers) before the engine is called. The engine receives entities as - // part of a higher-level orchestration layer. For now, we create an empty - // detection output and let the execution graph handle detection actions. let detection = nvisy_ontology::entity::DetectionOutput::new( nvisy_core::content::ContentSource::new(), Vec::new(), ); // Phase 2: Policy Evaluation - // - // Policy evaluation is handled by the execution graph. For now we - // produce an empty evaluation. let evaluation = PolicyEvaluation::new(Uuid::nil()); // Phase 3: DAG Execution - // - // Compile the graph into a topologically-sorted execution plan and - // run Source/Action/Target nodes concurrently. - let plan = self.inner.compiler.compile(&input.graph)?; - let run_output = Self::run_graph(&plan).await?; + let compiled = plan::compile( + &input.graph, + self.inner.default_retry.as_ref(), + self.inner.default_timeout.as_ref(), + )?; + let run_output = Self::run_graph(&compiled, cancel).await?; + + // Transition to Succeeded/Failed and populate node snapshots + { + let snapshots = Self::node_snapshots(&run_output); + if let Some(entry) = self.inner.runs.write().await.get_mut(&run_id) { + let any_ok = run_output.node_results.iter().any(|r| r.error.is_none()); + let any_err = run_output.node_results.iter().any(|r| r.error.is_some()); + entry.status = match (any_ok, any_err) { + (_, false) => RunStatus::Succeeded, + (true, true) => RunStatus::PartialFailure, + _ => RunStatus::Failed, + }; + entry.completed_at = Some(Timestamp::now()); + entry.nodes = snapshots; + } + } Ok(EngineOutput { run_id, @@ -218,7 +318,51 @@ impl Engine for DefaultEngine { summaries: Vec::new(), file_audits: Vec::new(), redaction_maps: Vec::new(), - run_output, }) } } + +impl Runs for DefaultEngine { + async fn get_run(&self, id: Uuid) -> Option { + self.inner + .runs + .read() + .await + .get(&id) + .map(|entry| entry.to_snapshot(id)) + } + + async fn list_runs(&self, filter: RunFilter) -> Vec { + self.inner + .runs + .read() + .await + .iter() + .filter(|(_, entry)| { + filter.status.is_none_or(|s| entry.status == s) + && filter.actor_id.is_none_or(|a| entry.actor_id == a) + }) + .map(|(&id, entry)| entry.to_summary(id)) + .collect() + } + + async fn cancel_run(&self, id: Uuid) -> Result<(), Error> { + let mut runs = self.inner.runs.write().await; + let entry = runs + .get_mut(&id) + .ok_or_else(|| Error::new(nvisy_core::ErrorKind::NotFound, "run not found"))?; + + match entry.status { + RunStatus::Pending | RunStatus::Running => { + entry.cancel.cancel(); + entry.status = RunStatus::Cancelled; + entry.completed_at = Some(Timestamp::now()); + Ok(()) + } + _ => Err(Error::new( + nvisy_core::ErrorKind::Validation, + "run has already finished", + )), + } + } +} diff --git a/crates/nvisy-engine/src/pipeline/executor.rs b/crates/nvisy-engine/src/pipeline/executor.rs index 9e445126..ba91012b 100644 --- a/crates/nvisy-engine/src/pipeline/executor.rs +++ b/crates/nvisy-engine/src/pipeline/executor.rs @@ -1,23 +1,24 @@ //! Node-level execution dispatchers. //! //! [`execute_node`] dispatches each graph node to the appropriate handler -//! based on its [`GraphNodeKind`]. A per-node timeout is applied when -//! configured, with [`TimeoutBehavior`] controlling whether a timeout -//! is treated as an error or silently yields zero items. +//! based on its [`GraphNodeKind`]. Pre-compiled timeout and retry policies +//! from the [`ResolvedNode`] are applied directly, with +//! [`TimeoutBehavior`] controlling whether a timeout is treated as an error +//! or silently yields zero items. use nvisy_core::content::ContentData; use nvisy_core::{Error, ErrorKind}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; +use tokio_util::sync::CancellationToken; use uuid::Uuid; -use super::policy::CompiledTimeoutPolicy; -use crate::compiler::{GraphNode, GraphNodeKind, TimeoutBehavior}; +use super::plan::ResolvedNode; +use crate::graph::GraphNodeKind; +use crate::graph::policy::TimeoutBehavior; /// Outcome of executing a single node in the pipeline. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct NodeOutput { +#[derive(Debug, Clone)] +pub(super) struct NodeOutput { /// ID of the node that produced this result. pub node_id: Uuid, /// Number of data items processed by this node. @@ -27,32 +28,41 @@ pub struct NodeOutput { } /// Aggregate outcome of executing an entire pipeline graph. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct RunOutput { - /// Unique identifier for this execution run. - pub run_id: Uuid, +#[derive(Debug, Clone)] +pub(super) struct RunOutput { /// Per-node results in completion order. pub node_results: Vec, - /// `true` if all nodes completed without error. - pub success: bool, } -/// Executes a single graph node by dispatching on its [`GraphNodeKind`]. +/// Executes a single resolved node by dispatching on its [`GraphNodeKind`]. /// -/// A per-node timeout is applied when configured. The [`TimeoutBehavior`] -/// determines whether a timeout is treated as an error (`Fail`) or silently -/// yields zero items (`Skip`). -pub(crate) async fn execute_node( - node: &GraphNode, +/// Uses pre-compiled timeout and retry policies from the [`ResolvedNode`] +/// instead of compiling them inline. Checks the `cancel` token before and +/// during execution to support cooperative cancellation. +pub(super) async fn execute_node( + resolved: &ResolvedNode, senders: Vec>, mut receivers: Vec>, + cancel: CancellationToken, ) -> Result { - let run = async { execute_action(&node.kind, &senders, &mut receivers).await }; + if cancel.is_cancelled() { + return Err(Error::cancellation("run cancelled")); + } + + let run = async { + tokio::select! { + _ = cancel.cancelled() => { + Err(Error::cancellation("run cancelled")) + } + result = execute_action(&resolved.node.kind, &senders, &mut receivers) => { + result + } + } + }; - match node.timeout() { - Some(policy) => { - let compiled = CompiledTimeoutPolicy::from(policy); - let result = compiled.with_timeout(run).await; + match &resolved.compiled_timeout { + Some(compiled) => { + let result: Result = compiled.with_timeout(run).await; match (&result, &compiled.on_timeout) { (Err(e), TimeoutBehavior::Skip) if e.kind == ErrorKind::Timeout => Ok(0), _ => result, diff --git a/crates/nvisy-engine/src/pipeline/mod.rs b/crates/nvisy-engine/src/pipeline/mod.rs index 350a0986..e6a96fe9 100644 --- a/crates/nvisy-engine/src/pipeline/mod.rs +++ b/crates/nvisy-engine/src/pipeline/mod.rs @@ -7,12 +7,13 @@ //! [`DefaultEngine`] is the standard implementation that orchestrates the //! detect -> evaluate -> redact pipeline and drives the DAG execution graph. -mod config; +pub mod config; mod default; mod executor; mod ontology; -mod policy; -mod runs; +mod plan; +pub(crate) mod policy; +pub mod runs; use std::future::Future; @@ -26,10 +27,11 @@ pub use self::config::{ EngineSection, LlmSection, OcrSection, RuntimeConfig, SttSection, TtsSection, }; pub use self::default::DefaultEngine; -pub use self::executor::{NodeOutput, RunOutput}; pub use self::ontology::{Explainable, Explanation}; -pub use self::runs::{NodeProgress, RunManager, RunState, RunStatus, RunSummary}; -use crate::compiler::Graph; +pub use self::runs::{ + NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary, +}; +use crate::graph::Graph; use crate::provenance::{Audit, PolicyEvaluation, RedactionMap}; /// Everything the caller must provide to run a redaction pipeline. @@ -57,7 +59,7 @@ pub struct EngineInput { /// Full result of a pipeline run. /// /// Contains per-phase breakdown (detection, classification, policy evaluation), -/// per-source summaries, audit records, and the raw DAG execution result. +/// per-source summaries, and audit records. pub struct EngineOutput { /// Unique run identifier. pub run_id: Uuid, @@ -71,8 +73,6 @@ pub struct EngineOutput { pub file_audits: Vec, /// Redaction mapping artifacts. pub redaction_maps: Vec, - /// Per-node execution results from the DAG runner. - pub run_output: RunOutput, } /// The top-level redaction engine contract. @@ -83,3 +83,20 @@ pub trait Engine: Send + Sync { /// Execute a full redaction pipeline. fn run(&self, input: EngineInput) -> impl Future> + Send; } + +/// Read-only access to pipeline run state. +/// +/// Runs are created internally by [`Engine::run()`]. External callers +/// can inspect and cancel runs through this trait. +pub trait Runs: Send + Sync { + /// Get a full snapshot of a single run. + fn get_run(&self, id: Uuid) -> impl Future> + Send; + + /// List runs matching the given filter. + fn list_runs(&self, filter: RunFilter) -> impl Future> + Send; + + /// Request cancellation of an in-progress run. + /// + /// Returns `Err` if the run was not found or has already finished. + fn cancel_run(&self, id: Uuid) -> impl Future> + Send; +} diff --git a/crates/nvisy-engine/src/pipeline/plan/edge.rs b/crates/nvisy-engine/src/pipeline/plan/edge.rs new file mode 100644 index 00000000..62e2b8dd --- /dev/null +++ b/crates/nvisy-engine/src/pipeline/plan/edge.rs @@ -0,0 +1,29 @@ +//! Edge types for the compiled execution plan. + +use uuid::Uuid; + +/// Channel configuration for a resolved edge. +#[derive(Debug, Clone)] +pub struct EdgeConfig { + /// Buffer size for the bounded MPSC channel on this edge. + pub channel_buffer: usize, +} + +impl Default for EdgeConfig { + fn default() -> Self { + Self { + channel_buffer: 256, + } + } +} + +/// A directed edge with pre-computed channel configuration. +#[derive(Debug, Clone)] +pub struct ResolvedEdge { + /// ID of the upstream node. + pub source: Uuid, + /// ID of the downstream node. + pub target: Uuid, + /// Channel configuration for this edge. + pub config: EdgeConfig, +} diff --git a/crates/nvisy-engine/src/pipeline/plan/mod.rs b/crates/nvisy-engine/src/pipeline/plan/mod.rs new file mode 100644 index 00000000..69a8fce3 --- /dev/null +++ b/crates/nvisy-engine/src/pipeline/plan/mod.rs @@ -0,0 +1,246 @@ +//! Compiled execution plan types and the `compile()` entry point. +//! +//! An [`ExecutionPlan`] is the central orchestration artifact produced by +//! [`compile()`]. It contains topologically-sorted [`ResolvedNode`]s, +//! pre-computed adjacency information, [`ResolvedEdge`]s with channel +//! configuration, and [`PhaseGroup`]s for phase-aware scheduling. + +mod edge; +mod node; +mod phase; + +use std::collections::HashMap; + +use nvisy_core::{Error, Result}; +use petgraph::algo::{is_cyclic_directed, toposort}; +use petgraph::graph::{DiGraph, NodeIndex}; +use uuid::Uuid; + +pub use self::edge::{EdgeConfig, ResolvedEdge}; +pub use self::node::ResolvedNode; +pub use self::phase::PhaseGroup; +use super::policy::{CompiledRetryPolicy, CompiledTimeoutPolicy}; +use crate::graph::policy::{RetryPolicy, TimeoutPolicy}; +use crate::graph::{Graph, GraphEdge, GraphNode}; + +/// Compiles a [`Graph`] into an [`ExecutionPlan`]. +/// +/// Validates the graph, applies default policies to nodes that don't specify +/// their own, builds a petgraph representation, checks for cycles, and +/// produces a topologically-sorted plan. +pub(crate) fn compile( + graph: &Graph, + default_retry: Option<&RetryPolicy>, + default_timeout: Option<&TimeoutPolicy>, +) -> Result { + let mut graph = graph.clone(); + + for node in &mut graph.nodes { + if node.retry.is_none() { + node.retry = default_retry.cloned(); + } + if node.timeout.is_none() { + node.timeout = default_timeout.cloned(); + } + } + + graph.validate()?; + + let pg = build_petgraph(&graph)?; + + let topo = + toposort(&pg, None).map_err(|_| Error::validation("graph contains a cycle", "compiler"))?; + + Ok(ExecutionPlan::from_graph(&pg, &topo)) +} + +/// Builds a petgraph `DiGraph` from a validated [`Graph`] and checks +/// for cycles. +fn build_petgraph(graph: &Graph) -> Result> { + let mut pg = DiGraph::with_capacity(graph.nodes.len(), graph.edges.len()); + let mut index_map = HashMap::with_capacity(graph.nodes.len()); + + for node in &graph.nodes { + let idx = pg.add_node(node.clone()); + index_map.insert(node.id, idx); + } + + for edge in &graph.edges { + let from = index_map[&edge.source]; + let to = index_map[&edge.target]; + pg.add_edge(from, to, edge.clone()); + } + + if is_cyclic_directed(&pg) { + return Err(Error::validation("graph contains a cycle", "compiler")); + } + + Ok(pg) +} + +/// A compiled execution plan ready for the executor. +/// +/// Contains all nodes in topological order, edges with channel configuration, +/// phase groupings, and pre-computed root/leaf indices. Constructed only via +/// [`compile()`]. +pub struct ExecutionPlan { + nodes: Vec, + edges: Vec, + index_map: HashMap, + phases: Vec, + roots: Vec, + leaves: Vec, +} + +impl ExecutionPlan { + /// Builds an execution plan from a petgraph and its topological ordering. + fn from_graph(pg: &DiGraph, topo: &[NodeIndex]) -> Self { + let mut index_map = HashMap::with_capacity(topo.len()); + let mut nodes = Vec::with_capacity(topo.len()); + + for (i, &idx) in topo.iter().enumerate() { + let graph_node = &pg[idx]; + let upstream_ids: Vec = pg + .neighbors_directed(idx, petgraph::Direction::Incoming) + .map(|n| pg[n].id) + .collect(); + let downstream_ids: Vec = pg + .neighbors_directed(idx, petgraph::Direction::Outgoing) + .map(|n| pg[n].id) + .collect(); + + let compiled_retry = graph_node.retry().map(CompiledRetryPolicy::from); + let compiled_timeout = graph_node.timeout().map(CompiledTimeoutPolicy::from); + + index_map.insert(graph_node.id, i); + nodes.push(ResolvedNode { + phase: graph_node.kind.phase(), + node: graph_node.clone(), + upstream_ids, + downstream_ids, + compiled_retry, + compiled_timeout, + }); + } + + let edges: Vec = pg + .edge_indices() + .map(|ei| { + let (src_idx, tgt_idx) = pg.edge_endpoints(ei).unwrap(); + ResolvedEdge { + source: pg[src_idx].id, + target: pg[tgt_idx].id, + config: EdgeConfig::default(), + } + }) + .collect(); + + let roots: Vec = nodes + .iter() + .enumerate() + .filter(|(_, n)| n.upstream_ids.is_empty()) + .map(|(i, _)| i) + .collect(); + + let leaves: Vec = nodes + .iter() + .enumerate() + .filter(|(_, n)| n.downstream_ids.is_empty()) + .map(|(i, _)| i) + .collect(); + + let mut phase_map: HashMap> = HashMap::new(); + for (i, node) in nodes.iter().enumerate() { + phase_map.entry(node.phase).or_default().push(i); + } + let mut phases: Vec = phase_map + .into_iter() + .map(|(phase, node_indices)| PhaseGroup { + phase, + node_indices, + }) + .collect(); + phases.sort_by_key(|g| g.phase); + + Self { + nodes, + edges, + index_map, + phases, + roots, + leaves, + } + } + + /// Number of nodes in the plan. + pub fn len(&self) -> usize { + self.nodes.len() + } + + /// Returns `true` if the plan contains no nodes. + pub fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + /// Number of edges in the plan. + pub fn edge_count(&self) -> usize { + self.edges.len() + } + + /// All nodes in topological order. + pub fn nodes(&self) -> &[ResolvedNode] { + &self.nodes + } + + /// All edges with their channel configuration. + pub fn edges(&self) -> &[ResolvedEdge] { + &self.edges + } + + /// Look up a node by its UUID in O(1). + pub fn node_by_id(&self, id: Uuid) -> Option<&ResolvedNode> { + self.index_map.get(&id).map(|&i| &self.nodes[i]) + } + + /// Returns the topological index for a node UUID. + pub fn index_of(&self, id: Uuid) -> Option { + self.index_map.get(&id).copied() + } + + /// Indices of root nodes (no upstream dependencies). + pub fn roots(&self) -> &[usize] { + &self.roots + } + + /// Indices of leaf nodes (no downstream dependents). + pub fn leaves(&self) -> &[usize] { + &self.leaves + } + + /// Phase groups sorted by phase number, containing only occupied phases. + pub fn phases(&self) -> &[PhaseGroup] { + &self.phases + } + + /// Iterator over edges originating from the given node. + pub fn outgoing_edges(&self, id: Uuid) -> impl Iterator { + self.edges.iter().filter(move |e| e.source == id) + } + + /// Iterator over edges targeting the given node. + pub fn incoming_edges(&self, id: Uuid) -> impl Iterator { + self.edges.iter().filter(move |e| e.target == id) + } +} + +impl std::fmt::Debug for ExecutionPlan { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ExecutionPlan") + .field("nodes", &self.nodes.len()) + .field("edges", &self.edges.len()) + .field("phases", &self.phases.len()) + .field("roots", &self.roots) + .field("leaves", &self.leaves) + .finish() + } +} diff --git a/crates/nvisy-engine/src/pipeline/plan/node.rs b/crates/nvisy-engine/src/pipeline/plan/node.rs new file mode 100644 index 00000000..72201fbb --- /dev/null +++ b/crates/nvisy-engine/src/pipeline/plan/node.rs @@ -0,0 +1,25 @@ +//! Resolved node type for the compiled execution plan. + +use uuid::Uuid; + +use crate::graph::GraphNode; +use crate::pipeline::policy::{CompiledRetryPolicy, CompiledTimeoutPolicy}; + +/// A graph node enriched with adjacency information and compiled policies. +/// +/// Order is implicit in the position within [`ExecutionPlan::nodes`](super::ExecutionPlan::nodes). +#[derive(Debug, Clone)] +pub struct ResolvedNode { + /// The original graph node definition. + pub node: GraphNode, + /// Pipeline phase for this node (derived from the node kind). + pub phase: u8, + /// IDs of nodes that feed data into this node. + pub upstream_ids: Vec, + /// IDs of nodes that receive data from this node. + pub downstream_ids: Vec, + /// Pre-compiled retry policy, if configured. + pub compiled_retry: Option, + /// Pre-compiled timeout policy, if configured. + pub compiled_timeout: Option, +} diff --git a/crates/nvisy-engine/src/pipeline/plan/phase.rs b/crates/nvisy-engine/src/pipeline/plan/phase.rs new file mode 100644 index 00000000..a2b0de72 --- /dev/null +++ b/crates/nvisy-engine/src/pipeline/plan/phase.rs @@ -0,0 +1,10 @@ +//! Phase grouping for the compiled execution plan. + +/// A group of node indices that share the same pipeline phase. +#[derive(Debug, Clone)] +pub struct PhaseGroup { + /// The pipeline phase number (0–5). + pub phase: u8, + /// Indices into `ExecutionPlan::nodes()` for nodes in this phase. + pub node_indices: Vec, +} diff --git a/crates/nvisy-engine/src/pipeline/policy/mod.rs b/crates/nvisy-engine/src/pipeline/policy/mod.rs index a5b29ec7..32f1ba7c 100644 --- a/crates/nvisy-engine/src/pipeline/policy/mod.rs +++ b/crates/nvisy-engine/src/pipeline/policy/mod.rs @@ -1,10 +1,8 @@ -//! Compiled runtime policies and execution helpers. +//! Compiled runtime policy types stored on [`ResolvedNode`](super::ResolvedNode). //! -//! The compiler-level [`RetryPolicy`](crate::compiler::RetryPolicy) and -//! [`TimeoutPolicy`](crate::compiler::TimeoutPolicy) are user-facing -//! configuration types. This module provides their compiled runtime -//! counterparts ([`CompiledRetryPolicy`], [`CompiledTimeoutPolicy`]) -//! with `with_retry` and `with_timeout` methods. +//! These types convert user-facing config types from [`crate::graph::policy`] +//! into runtime representations with pre-computed [`Duration`](std::time::Duration) +//! values and async execution helpers. mod retry; mod timeout; diff --git a/crates/nvisy-engine/src/pipeline/policy/retry.rs b/crates/nvisy-engine/src/pipeline/policy/retry.rs index 54fd5be5..e6ff1b5f 100644 --- a/crates/nvisy-engine/src/pipeline/policy/retry.rs +++ b/crates/nvisy-engine/src/pipeline/policy/retry.rs @@ -1,11 +1,11 @@ -//! Compiled retry policy and execution helper. +//! Compiled retry policy with pre-computed delay and async execution helper. use std::time::Duration; use nvisy_core::Error; use tokio::time; -use crate::compiler::{BackoffStrategy, RetryPolicy}; +use crate::graph::policy::{BackoffStrategy, RetryPolicy}; /// Pre-compiled retry policy ready for runtime use. /// diff --git a/crates/nvisy-engine/src/pipeline/policy/timeout.rs b/crates/nvisy-engine/src/pipeline/policy/timeout.rs index 45b4b9a9..e7e8a36e 100644 --- a/crates/nvisy-engine/src/pipeline/policy/timeout.rs +++ b/crates/nvisy-engine/src/pipeline/policy/timeout.rs @@ -1,11 +1,11 @@ -//! Compiled timeout policy and execution helper. +//! Compiled timeout policy with pre-computed duration and async execution helper. use std::time::Duration; use nvisy_core::Error; use tokio::time; -use crate::compiler::{TimeoutBehavior, TimeoutPolicy}; +use crate::graph::policy::{TimeoutBehavior, TimeoutPolicy}; /// Pre-compiled timeout policy ready for runtime use. /// diff --git a/crates/nvisy-engine/src/pipeline/runs.rs b/crates/nvisy-engine/src/pipeline/runs.rs deleted file mode 100644 index d810e448..00000000 --- a/crates/nvisy-engine/src/pipeline/runs.rs +++ /dev/null @@ -1,214 +0,0 @@ -//! Pipeline run lifecycle management. -//! -//! Tracks the status of every pipeline execution from creation through -//! completion or cancellation. Provides [`RunManager`] for concurrent -//! read/write access to run state. - -use std::collections::HashMap; -use std::sync::Arc; - -use jiff::Timestamp; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use tokio::sync::RwLock; -use tokio_util::sync::CancellationToken; -use uuid::Uuid; - -use super::executor::{NodeOutput, RunOutput}; - -/// Lifecycle status of a pipeline run. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[derive(Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub enum RunStatus { - /// The run has been created but not yet started. - Pending, - /// The run is actively executing nodes. - Running, - /// All nodes completed without error. - Success, - /// Some nodes succeeded while others failed. - PartialFailure, - /// All nodes failed. - Failure, - /// The run was cancelled by the caller. - Cancelled, -} - -/// Execution progress of a single node within a run. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct NodeProgress { - /// ID of the node this progress belongs to. - pub node_id: Uuid, - /// Current status of this node. - pub status: RunStatus, - /// Number of data items processed so far. - pub items_processed: u64, - /// Error message if the node failed. - #[serde(skip_serializing_if = "Option::is_none")] - pub error: Option, -} - -impl From<&NodeOutput> for NodeProgress { - fn from(nr: &NodeOutput) -> Self { - Self { - node_id: nr.node_id, - status: if nr.error.is_none() { - RunStatus::Success - } else { - RunStatus::Failure - }, - items_processed: nr.items_processed, - error: nr.error.clone(), - } - } -} - -/// Complete mutable state of a pipeline run. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct RunState { - /// Unique run identifier. - pub id: Uuid, - /// Current overall status. - pub status: RunStatus, - /// Timestamp when the run was created. - #[schemars(with = "String")] - pub created_at: Timestamp, - /// Timestamp when the run finished, if applicable. - #[serde(skip_serializing_if = "Option::is_none")] - #[schemars(with = "Option")] - pub completed_at: Option, - /// Per-node progress keyed by node ID. - pub node_progress: HashMap, - /// Final result after the run completes. - #[serde(skip_serializing_if = "Option::is_none")] - pub result: Option, -} - -/// Lightweight summary of a run for listing endpoints. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct RunSummary { - /// Unique run identifier. - pub id: Uuid, - /// Current overall status. - pub status: RunStatus, - /// Timestamp when the run was created. - #[schemars(with = "String")] - pub created_at: Timestamp, - /// Timestamp when the run finished, if applicable. - #[serde(skip_serializing_if = "Option::is_none")] - #[schemars(with = "Option")] - pub completed_at: Option, -} - -/// Thread-safe manager that tracks all pipeline runs. -/// -/// Internally uses [`RwLock`]-protected maps so multiple readers can inspect -/// run state concurrently while writes are serialized. -pub struct RunManager { - /// All known runs keyed by their UUID. - runs: Arc>>, - /// Cancellation tokens for runs that are still in progress. - cancel_tokens: Arc>>, -} - -impl RunManager { - /// Creates a new, empty run manager. - pub fn new() -> Self { - Self { - runs: Arc::new(RwLock::new(HashMap::new())), - cancel_tokens: Arc::new(RwLock::new(HashMap::new())), - } - } - - /// Create a new pending run and return its ID and cancellation token. - pub async fn create_run(&self) -> (Uuid, CancellationToken) { - let id = Uuid::new_v4(); - let token = CancellationToken::new(); - - let state = RunState { - id, - status: RunStatus::Pending, - created_at: Timestamp::now(), - completed_at: None, - node_progress: HashMap::new(), - result: None, - }; - - self.runs.write().await.insert(id, state); - self.cancel_tokens.write().await.insert(id, token.clone()); - - (id, token) - } - - /// Update a run to running status. - pub async fn set_running(&self, id: Uuid) { - if let Some(state) = self.runs.write().await.get_mut(&id) { - state.status = RunStatus::Running; - } - } - - /// Complete a run with a result. - pub async fn complete_run(&self, id: Uuid, result: RunOutput) { - if let Some(state) = self.runs.write().await.get_mut(&id) { - state.status = if result.success { - RunStatus::Success - } else if result.node_results.iter().any(|r| r.error.is_none()) { - RunStatus::PartialFailure - } else { - RunStatus::Failure - }; - state.completed_at = Some(Timestamp::now()); - - for nr in &result.node_results { - state - .node_progress - .insert(nr.node_id, NodeProgress::from(nr)); - } - - state.result = Some(result); - } - self.cancel_tokens.write().await.remove(&id); - } - - /// Get the current state of a run. - pub async fn get(&self, id: Uuid) -> Option { - self.runs.read().await.get(&id).cloned() - } - - /// List all runs, optionally filtered by status. - pub async fn list(&self, status: Option) -> Vec { - self.runs - .read() - .await - .values() - .filter(|s| status.is_none_or(|st| s.status == st)) - .map(|s| RunSummary { - id: s.id, - status: s.status, - created_at: s.created_at, - completed_at: s.completed_at, - }) - .collect() - } - - /// Cancel a running or pending run. Returns false if not found or already finished. - pub async fn cancel(&self, id: Uuid) -> bool { - if let Some(token) = self.cancel_tokens.read().await.get(&id) { - token.cancel(); - if let Some(state) = self.runs.write().await.get_mut(&id) { - state.status = RunStatus::Cancelled; - state.completed_at = Some(Timestamp::now()); - } - true - } else { - false - } - } -} - -impl Default for RunManager { - fn default() -> Self { - Self::new() - } -} diff --git a/crates/nvisy-engine/src/pipeline/runs/mod.rs b/crates/nvisy-engine/src/pipeline/runs/mod.rs new file mode 100644 index 00000000..eca4e5fb --- /dev/null +++ b/crates/nvisy-engine/src/pipeline/runs/mod.rs @@ -0,0 +1,106 @@ +//! Pipeline run data types. +//! +//! Pure data definitions for run lifecycle tracking. All mutation and +//! querying happens through the [`Runs`](super::Runs) trait. + +use jiff::Timestamp; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Lifecycle status of a pipeline run. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum RunStatus { + /// The run has been created but not yet started. + Pending, + /// The run is actively executing nodes. + Running, + /// All nodes completed without error. + Succeeded, + /// Some nodes succeeded while others failed. + PartialFailure, + /// All nodes failed. + Failed, + /// The run was cancelled by the caller. + Cancelled, +} + +/// Lifecycle status of a single node within a run. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum NodeStatus { + /// The node has not yet started. + Pending, + /// The node is actively executing. + Running, + /// The node completed without error. + Succeeded, + /// The node failed. + Failed, +} + +/// Point-in-time snapshot of a single node within a run. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct NodeSnapshot { + /// ID of the node. + pub node_id: Uuid, + /// Current status of this node. + pub status: NodeStatus, + /// Number of data items processed so far. + pub items_processed: u64, + /// Error message if the node failed. + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +/// Full point-in-time snapshot of a pipeline run. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct RunSnapshot { + /// Unique run identifier. + pub id: Uuid, + /// Identity of the actor who initiated the run. + pub actor_id: Uuid, + /// Current overall status. + pub status: RunStatus, + /// Timestamp when the run was created. + #[schemars(with = "String")] + pub created_at: Timestamp, + /// Timestamp when the run finished, if applicable. + #[serde(skip_serializing_if = "Option::is_none")] + #[schemars(with = "Option")] + pub completed_at: Option, + /// Per-node snapshots. + pub nodes: Vec, +} + +/// Lightweight summary of a run for listing endpoints. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct RunSummary { + /// Unique run identifier. + pub id: Uuid, + /// Identity of the actor who initiated the run. + pub actor_id: Uuid, + /// Current overall status. + pub status: RunStatus, + /// Timestamp when the run was created. + #[schemars(with = "String")] + pub created_at: Timestamp, + /// Timestamp when the run finished, if applicable. + #[serde(skip_serializing_if = "Option::is_none")] + #[schemars(with = "Option")] + pub completed_at: Option, + /// Number of nodes in the execution graph. + pub node_count: usize, +} + +/// Filter criteria for listing runs. +#[derive(Debug, Clone, Default)] +pub struct RunFilter { + /// If set, only return runs with this status. + pub status: Option, + /// If set, only return runs belonging to this actor. + pub actor_id: Option, +} diff --git a/crates/nvisy-server/src/handler/process.rs b/crates/nvisy-server/src/handler/process.rs index f1006a2c..1c64cde8 100644 --- a/crates/nvisy-server/src/handler/process.rs +++ b/crates/nvisy-server/src/handler/process.rs @@ -15,8 +15,7 @@ use aide::axum::ApiRouter; use aide::axum::routing::post_with; use aide::transform::TransformOperation; use axum::extract::State; -use nvisy_engine::RuntimeConfig; -use nvisy_engine::pipeline::{DefaultEngine, Engine, EngineInput}; +use nvisy_engine::{DefaultEngine, Engine, EngineInput, RuntimeConfig}; use super::error::Result; use super::request::NewProcess; diff --git a/crates/nvisy-server/src/service/mod.rs b/crates/nvisy-server/src/service/mod.rs index 3432feb8..7d651cc7 100644 --- a/crates/nvisy-server/src/service/mod.rs +++ b/crates/nvisy-server/src/service/mod.rs @@ -7,8 +7,7 @@ use std::path::PathBuf; -use nvisy_engine::RuntimeConfig; -use nvisy_engine::pipeline::DefaultEngine; +use nvisy_engine::{DefaultEngine, RuntimeConfig}; use nvisy_http::HttpClient; use nvisy_registry::Registry; From 1c6d63b84cb64f45263fe3b9580ff033ba9571d8 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Sat, 14 Mar 2026 17:28:04 +0100 Subject: [PATCH 3/6] style(engine): format pipeline mod imports Co-Authored-By: Claude Opus 4.6 --- crates/nvisy-engine/src/pipeline/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/nvisy-engine/src/pipeline/mod.rs b/crates/nvisy-engine/src/pipeline/mod.rs index e6a96fe9..ed648c41 100644 --- a/crates/nvisy-engine/src/pipeline/mod.rs +++ b/crates/nvisy-engine/src/pipeline/mod.rs @@ -28,9 +28,7 @@ pub use self::config::{ }; pub use self::default::DefaultEngine; pub use self::ontology::{Explainable, Explanation}; -pub use self::runs::{ - NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary, -}; +pub use self::runs::{NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary}; use crate::graph::Graph; use crate::provenance::{Audit, PolicyEvaluation, RedactionMap}; From 7f9e036d69a28f75faea11c97631ab3ecf78c9e5 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Sun, 15 Mar 2026 23:29:09 +0100 Subject: [PATCH 4/6] refactor(server, engine): merge process into runs, add ActorId extractor, move config into engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Merge process handlers into runs: POST /runs (full pipeline), POST /runs/scan (read-only), removing /process/* endpoints entirely - Add ActorId custom header extractor (X-Actor-Id) replacing ActorQuery params and actor_id request body fields across all handlers - Move RuntimeConfig into DefaultEngine (with_config builder), removing it from ServiceState and per-handler State extraction - Remove redundant data_dir field from ServiceState, delegate to Registry::base_dir() - Rename request/process.rs → request/runs.rs, delete response/process.rs (RunResult merged into response/runs.rs) - Use derive_more for Deref/Display on ActorId - Simplify EngineInput: replace ocr/llm/stt/tts fields with config: Option for per-request overrides Co-Authored-By: Claude Opus 4.6 --- Cargo.lock | 1 + crates/nvisy-engine/src/graph/mod.rs | 28 +++ crates/nvisy-engine/src/graph/recognition.rs | 17 +- crates/nvisy-engine/src/lib.rs | 10 +- crates/nvisy-engine/src/pipeline/default.rs | 21 +- crates/nvisy-engine/src/pipeline/mod.rs | 35 +-- crates/nvisy-engine/src/pipeline/runs/mod.rs | 25 +- crates/nvisy-server/Cargo.toml | 3 + crates/nvisy-server/src/extract/actor.rs | 48 ++++ crates/nvisy-server/src/extract/mod.rs | 2 + crates/nvisy-server/src/handler/contexts.rs | 23 +- crates/nvisy-server/src/handler/files.rs | 19 +- crates/nvisy-server/src/handler/mod.rs | 4 +- crates/nvisy-server/src/handler/process.rs | 181 -------------- .../src/handler/request/contexts.rs | 3 - .../nvisy-server/src/handler/request/files.rs | 3 - .../nvisy-server/src/handler/request/mod.rs | 8 +- .../nvisy-server/src/handler/request/path.rs | 11 +- .../handler/request/{process.rs => runs.rs} | 8 +- .../nvisy-server/src/handler/response/mod.rs | 9 +- .../src/handler/response/process.rs | 17 -- .../nvisy-server/src/handler/response/runs.rs | 33 +++ crates/nvisy-server/src/handler/runs.rs | 229 ++++++++++++++++++ crates/nvisy-server/src/service/mod.rs | 34 +-- 24 files changed, 463 insertions(+), 309 deletions(-) create mode 100644 crates/nvisy-server/src/extract/actor.rs delete mode 100644 crates/nvisy-server/src/handler/process.rs rename crates/nvisy-server/src/handler/request/{process.rs => runs.rs} (77%) delete mode 100644 crates/nvisy-server/src/handler/response/process.rs create mode 100644 crates/nvisy-server/src/handler/response/runs.rs create mode 100644 crates/nvisy-server/src/handler/runs.rs diff --git a/Cargo.lock b/Cargo.lock index 20460fcd..15e6ac6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3107,6 +3107,7 @@ dependencies = [ "aide", "axum", "base64", + "derive_more", "futures", "jiff", "nvisy-core", diff --git a/crates/nvisy-engine/src/graph/mod.rs b/crates/nvisy-engine/src/graph/mod.rs index 4be8ceaf..1ca2c175 100644 --- a/crates/nvisy-engine/src/graph/mod.rs +++ b/crates/nvisy-engine/src/graph/mod.rs @@ -207,6 +207,10 @@ impl Graph { })?; } + // Collect in-degree and out-degree per node for structural checks + let mut in_degree: HashMap = HashMap::new(); + let mut out_degree: HashMap = HashMap::new(); + let mut seen_edges = HashSet::with_capacity(self.edges.len()); for edge in &self.edges { if edge.source == edge.target { @@ -248,6 +252,30 @@ impl Graph { "compiler", )); } + + *out_degree.entry(edge.source).or_default() += 1; + *in_degree.entry(edge.target).or_default() += 1; + } + + for node in &self.nodes { + let incoming = in_degree.get(&node.id).copied().unwrap_or(0); + let outgoing = out_degree.get(&node.id).copied().unwrap_or(0); + + match &node.kind { + GraphNodeKind::Import(_) if incoming > 0 => { + return Err(Error::validation( + format!("import node {} must not have incoming edges", node.id), + "compiler", + )); + } + GraphNodeKind::Export(_) if outgoing > 0 => { + return Err(Error::validation( + format!("export node {} must not have outgoing edges", node.id), + "compiler", + )); + } + _ => {} + } } Ok(()) diff --git a/crates/nvisy-engine/src/graph/recognition.rs b/crates/nvisy-engine/src/graph/recognition.rs index 4ecc72c0..6d57a613 100644 --- a/crates/nvisy-engine/src/graph/recognition.rs +++ b/crates/nvisy-engine/src/graph/recognition.rs @@ -20,16 +20,13 @@ pub struct NamedEntityRecognition { impl NamedEntityRecognition { /// Validates that the confidence threshold, if set, is within `0.0..=1.0`. pub fn validate(&self) -> Result<(), Error> { - if let Some(t) = self.confidence_threshold { - if !(0.0..=1.0).contains(&t) { - return Err(Error::validation( - format!( - "confidence_threshold must be between 0.0 and 1.0, got {}", - t, - ), - "compiler", - )); - } + if let Some(t) = self.confidence_threshold + && !(0.0..=1.0).contains(&t) + { + return Err(Error::validation( + format!("confidence_threshold must be between 0.0 and 1.0, got {t}"), + "compiler", + )); } Ok(()) } diff --git a/crates/nvisy-engine/src/lib.rs b/crates/nvisy-engine/src/lib.rs index 58899a17..4bc74383 100644 --- a/crates/nvisy-engine/src/lib.rs +++ b/crates/nvisy-engine/src/lib.rs @@ -9,10 +9,8 @@ pub mod provenance; pub use self::graph::policy::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy}; pub use self::graph::{Graph, GraphEdge, GraphNode, GraphNodeKind}; -pub use self::pipeline::config::{ - EngineSection, LlmSection, OcrSection, RuntimeConfig, SttSection, TtsSection, +pub use self::pipeline::{ + DefaultEngine, Engine, EngineInput, EngineOutput, EngineSection, LlmSection, NodeSnapshot, + NodeStatus, OcrSection, RunFilter, RunSnapshot, RunStatus, RunSummary, Runs, RuntimeConfig, + SttSection, TtsSection, }; -pub use self::pipeline::runs::{ - NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary, -}; -pub use self::pipeline::{DefaultEngine, Engine, EngineInput, EngineOutput, Runs}; diff --git a/crates/nvisy-engine/src/pipeline/default.rs b/crates/nvisy-engine/src/pipeline/default.rs index a5b73237..e11ca684 100644 --- a/crates/nvisy-engine/src/pipeline/default.rs +++ b/crates/nvisy-engine/src/pipeline/default.rs @@ -21,10 +21,11 @@ use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; use uuid::Uuid; +use super::config::RuntimeConfig; use super::executor::{NodeOutput, RunOutput, execute_node}; use super::plan::{self, ExecutionPlan}; -use super::runs::{NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary}; -use super::{Engine, EngineInput, EngineOutput, Runs}; +use super::runs::{NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary, Runs}; +use super::{Engine, EngineInput, EngineOutput}; use crate::graph::policy::{RetryPolicy, TimeoutPolicy}; use crate::operation::SharedContext; use crate::provenance::PolicyEvaluation; @@ -66,6 +67,7 @@ impl RunEntry { impl Clone for DefaultEngineInner { fn clone(&self) -> Self { Self { + config: self.config.clone(), default_retry: self.default_retry.clone(), default_timeout: self.default_timeout.clone(), http_client: self.http_client.clone(), @@ -76,6 +78,8 @@ impl Clone for DefaultEngineInner { /// Inner state shared behind an [`Arc`]. struct DefaultEngineInner { + /// Base runtime configuration (OCR, LLM, STT, TTS sections). + config: RuntimeConfig, /// Default retry policy for graph nodes. default_retry: Option, /// Default timeout policy for graph nodes. @@ -89,6 +93,7 @@ struct DefaultEngineInner { impl Default for DefaultEngineInner { fn default() -> Self { Self { + config: RuntimeConfig::default(), default_retry: None, default_timeout: None, http_client: HttpClient::default(), @@ -108,6 +113,7 @@ pub struct DefaultEngine { impl std::fmt::Debug for DefaultEngine { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("DefaultEngine") + .field("config", &self.inner.config) .field("default_retry", &self.inner.default_retry) .field("default_timeout", &self.inner.default_timeout) .field("http_client", &self.inner.http_client) @@ -121,6 +127,12 @@ impl DefaultEngine { Self::default() } + /// Set the base runtime configuration. + pub fn with_config(mut self, config: RuntimeConfig) -> Self { + Arc::make_mut(&mut self.inner).config = config; + self + } + /// Set the default retry policy. pub fn with_retry(mut self, policy: RetryPolicy) -> Self { Arc::make_mut(&mut self.inner).default_retry = Some(policy); @@ -139,6 +151,11 @@ impl DefaultEngine { self } + /// Returns the base runtime configuration. + pub fn config(&self) -> &RuntimeConfig { + &self.inner.config + } + /// Returns the shared HTTP client. pub fn http_client(&self) -> &HttpClient { &self.inner.http_client diff --git a/crates/nvisy-engine/src/pipeline/mod.rs b/crates/nvisy-engine/src/pipeline/mod.rs index ed648c41..4a80fcbf 100644 --- a/crates/nvisy-engine/src/pipeline/mod.rs +++ b/crates/nvisy-engine/src/pipeline/mod.rs @@ -7,13 +7,13 @@ //! [`DefaultEngine`] is the standard implementation that orchestrates the //! detect -> evaluate -> redact pipeline and drives the DAG execution graph. -pub mod config; +mod config; mod default; mod executor; mod ontology; mod plan; pub(crate) mod policy; -pub mod runs; +mod runs; use std::future::Future; @@ -28,7 +28,9 @@ pub use self::config::{ }; pub use self::default::DefaultEngine; pub use self::ontology::{Explainable, Explanation}; -pub use self::runs::{NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary}; +pub use self::runs::{ + NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary, Runs, +}; use crate::graph::Graph; use crate::provenance::{Audit, PolicyEvaluation, RedactionMap}; @@ -44,14 +46,8 @@ pub struct EngineInput { pub graph: Graph, /// Reference-data contexts for detection. pub contexts: Contexts, - /// OCR subsystem configuration. - pub ocr: Option, - /// LLM subsystem configuration. - pub llm: Option, - /// Speech-to-text subsystem configuration. - pub stt: Option, - /// Text-to-speech subsystem configuration. - pub tts: Option, + /// Per-request configuration overrides (merged with engine defaults). + pub config: Option, } /// Full result of a pipeline run. @@ -81,20 +77,3 @@ pub trait Engine: Send + Sync { /// Execute a full redaction pipeline. fn run(&self, input: EngineInput) -> impl Future> + Send; } - -/// Read-only access to pipeline run state. -/// -/// Runs are created internally by [`Engine::run()`]. External callers -/// can inspect and cancel runs through this trait. -pub trait Runs: Send + Sync { - /// Get a full snapshot of a single run. - fn get_run(&self, id: Uuid) -> impl Future> + Send; - - /// List runs matching the given filter. - fn list_runs(&self, filter: RunFilter) -> impl Future> + Send; - - /// Request cancellation of an in-progress run. - /// - /// Returns `Err` if the run was not found or has already finished. - fn cancel_run(&self, id: Uuid) -> impl Future> + Send; -} diff --git a/crates/nvisy-engine/src/pipeline/runs/mod.rs b/crates/nvisy-engine/src/pipeline/runs/mod.rs index eca4e5fb..271531c3 100644 --- a/crates/nvisy-engine/src/pipeline/runs/mod.rs +++ b/crates/nvisy-engine/src/pipeline/runs/mod.rs @@ -1,9 +1,13 @@ -//! Pipeline run data types. +//! Pipeline run data types and the [`Runs`] trait. //! //! Pure data definitions for run lifecycle tracking. All mutation and -//! querying happens through the [`Runs`](super::Runs) trait. +//! querying happens through the [`Runs`] trait, implemented on +//! [`DefaultEngine`](super::DefaultEngine). + +use std::future::Future; use jiff::Timestamp; +use nvisy_core::Error; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -104,3 +108,20 @@ pub struct RunFilter { /// If set, only return runs belonging to this actor. pub actor_id: Option, } + +/// Read-only access to pipeline run state. +/// +/// Runs are created internally by [`Engine::run()`](super::Engine::run). +/// External callers can inspect and cancel runs through this trait. +pub trait Runs: Send + Sync { + /// Get a full snapshot of a single run. + fn get_run(&self, id: Uuid) -> impl Future> + Send; + + /// List runs matching the given filter. + fn list_runs(&self, filter: RunFilter) -> impl Future> + Send; + + /// Request cancellation of an in-progress run. + /// + /// Returns `Err` if the run was not found or has already finished. + fn cancel_run(&self, id: Uuid) -> impl Future> + Send; +} diff --git a/crates/nvisy-server/Cargo.toml b/crates/nvisy-server/Cargo.toml index e0d4464c..333d6569 100644 --- a/crates/nvisy-server/Cargo.toml +++ b/crates/nvisy-server/Cargo.toml @@ -62,6 +62,9 @@ schemars = { workspace = true, features = [] } # Encoding and hashing base64 = { workspace = true, features = [] } +# Derive macros +derive_more = { workspace = true, features = ["deref", "display"] } + # Primitive datatypes uuid = { workspace = true, features = [] } jiff = { workspace = true, features = [] } diff --git a/crates/nvisy-server/src/extract/actor.rs b/crates/nvisy-server/src/extract/actor.rs new file mode 100644 index 00000000..871cb3ef --- /dev/null +++ b/crates/nvisy-server/src/extract/actor.rs @@ -0,0 +1,48 @@ +//! Custom `ActorId` extractor that reads the `X-Actor-Id` header. +//! +//! Wraps the raw header value into a typed [`ActorId`] newtype, +//! rejecting requests that omit the header or supply an invalid UUID +//! with our standard [`ErrorResponse`](crate::handler::response::ErrorResponse). + +use aide::OperationInput; +use axum::extract::FromRequestParts; +use axum::http::request::Parts; +use derive_more::{Deref, Display}; +use uuid::Uuid; + +use crate::handler::error::{Error, ErrorKind}; + +/// The header name used to identify the calling actor. +pub const ACTOR_ID_HEADER: &str = "x-actor-id"; + +/// Actor identity extracted from the `X-Actor-Id` request header. +/// +/// Every request that operates on actor-scoped resources must include +/// this header. The extractor parses the value as a UUID and rejects +/// with [`ErrorKind::Unauthorized`] when the header is missing, or +/// [`ErrorKind::BadRequest`] when it cannot be parsed. +#[derive(Debug, Clone, Copy, Deref, Display)] +pub struct ActorId(pub Uuid); + +impl OperationInput for ActorId {} + +impl FromRequestParts for ActorId { + type Rejection = Error<'static>; + + async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result { + let value = parts + .headers + .get(ACTOR_ID_HEADER) + .ok_or_else(|| ErrorKind::Unauthorized.with_message("missing X-Actor-Id header"))? + .to_str() + .map_err(|_| { + ErrorKind::BadRequest.with_message("X-Actor-Id header contains invalid characters") + })?; + + let id = value.parse::().map_err(|_| { + ErrorKind::BadRequest.with_message("X-Actor-Id header is not a valid UUID") + })?; + + Ok(Self(id)) + } +} diff --git a/crates/nvisy-server/src/extract/mod.rs b/crates/nvisy-server/src/extract/mod.rs index bc696e5a..9fc2309a 100644 --- a/crates/nvisy-server/src/extract/mod.rs +++ b/crates/nvisy-server/src/extract/mod.rs @@ -1,9 +1,11 @@ //! Custom extractors for axum handlers. +mod actor; mod json; mod path; mod version; +pub use self::actor::ActorId; pub use self::json::Json; pub use self::path::Path; pub use self::version::Version; diff --git a/crates/nvisy-server/src/handler/contexts.rs b/crates/nvisy-server/src/handler/contexts.rs index 8dabc8cb..12df05d6 100644 --- a/crates/nvisy-server/src/handler/contexts.rs +++ b/crates/nvisy-server/src/handler/contexts.rs @@ -13,14 +13,14 @@ use aide::axum::ApiRouter; use aide::axum::routing::{get_with, post_with}; use aide::transform::TransformOperation; -use axum::extract::{Query, State}; +use axum::extract::State; use axum::http::StatusCode; use nvisy_registry::Registry; use super::error::Result; -use super::request::{ActorQuery, ContextPath, NewContext}; +use super::request::{ContextPath, NewContext}; use super::response::{Context, ContextId, ContextList}; -use crate::extract::{Json, Path}; +use crate::extract::{ActorId, Json, Path}; use crate::service::ServiceState; const TARGET: &str = "nvisy_server::contexts"; @@ -29,13 +29,14 @@ const TARGET: &str = "nvisy_server::contexts"; #[tracing::instrument( target = "nvisy_server::contexts", skip_all, - fields(%req.actor_id), + fields(%actor_id), )] async fn upload( State(registry): State, + ActorId(actor_id): ActorId, Json(req): Json, ) -> Result<(StatusCode, Json)> { - let handle = registry.register_context(req.actor_id, req.context).await?; + let handle = registry.register_context(actor_id, req.context).await?; let id = handle.source().as_uuid(); tracing::info!(target: TARGET, %id, "context uploaded"); @@ -48,8 +49,8 @@ fn upload_docs(op: TransformOperation) -> TransformOperation { .tag("contexts") .summary("Upload a typed context") .description( - "Accepts a JSON body with a `context` field containing the Context struct \ - and an `actorId` identifying the owning actor.", + "Accepts a JSON body with a `context` field. The owning actor is \ + identified by the `X-Actor-Id` header.", ) } @@ -61,7 +62,7 @@ fn upload_docs(op: TransformOperation) -> TransformOperation { )] async fn list( State(registry): State, - Query(ActorQuery { actor_id }): Query, + ActorId(actor_id): ActorId, ) -> Result> { let contexts = registry.list_contexts(actor_id).await?; tracing::debug!(target: TARGET, count = contexts.len(), "contexts listed"); @@ -83,8 +84,8 @@ fn list_docs(op: TransformOperation) -> TransformOperation { )] async fn download( State(registry): State, + ActorId(actor_id): ActorId, Path(ContextPath { id }): Path, - Query(ActorQuery { actor_id }): Query, ) -> Result> { let handle = registry.read_context(actor_id, id).await?; let context = handle.context().await?; @@ -107,8 +108,8 @@ fn download_docs(op: TransformOperation) -> TransformOperation { )] async fn delete( State(registry): State, + ActorId(actor_id): ActorId, Path(ContextPath { id }): Path, - Query(ActorQuery { actor_id }): Query, ) -> Result { registry.unregister_context(actor_id, id).await?; tracing::info!(target: TARGET, "context deleted"); @@ -130,7 +131,7 @@ fn delete_docs(op: TransformOperation) -> TransformOperation { )] async fn delete_all( State(registry): State, - Query(ActorQuery { actor_id }): Query, + ActorId(actor_id): ActorId, ) -> Result { let deleted = registry.unregister_all_contexts(actor_id).await?; tracing::info!(target: TARGET, deleted, "all contexts deleted"); diff --git a/crates/nvisy-server/src/handler/files.rs b/crates/nvisy-server/src/handler/files.rs index 3c7ed0cd..941b55c6 100644 --- a/crates/nvisy-server/src/handler/files.rs +++ b/crates/nvisy-server/src/handler/files.rs @@ -13,16 +13,16 @@ use aide::axum::ApiRouter; use aide::axum::routing::{get_with, post_with}; use aide::transform::TransformOperation; -use axum::extract::{Query, State}; +use axum::extract::State; use axum::http::StatusCode; use nvisy_core::content::{Content, ContentData, ContentMetadata}; use nvisy_registry::Registry; use super::error::Result; -use super::request::{ActorQuery, ContentPath, NewFile}; +use super::request::{ContentPath, NewFile}; use super::response::{File, FileId, FileList}; use super::utility::Base64; -use crate::extract::{Json, Path}; +use crate::extract::{ActorId, Json, Path}; use crate::service::ServiceState; const TARGET: &str = "nvisy_server::files"; @@ -31,10 +31,11 @@ const TARGET: &str = "nvisy_server::files"; #[tracing::instrument( target = "nvisy_server::files", skip_all, - fields(%req.actor_id, filename = req.filename.as_deref()), + fields(%actor_id, filename = req.filename.as_deref()), )] async fn upload( State(registry): State, + ActorId(actor_id): ActorId, Json(req): Json, ) -> Result<(StatusCode, Json)> { let bytes = req.content.decode()?; @@ -57,7 +58,7 @@ async fn upload( } let content = Content::with_metadata(content_data, metadata); - let handle = registry.register_content(req.actor_id, content).await?; + let handle = registry.register_content(actor_id, content).await?; let id = handle.content_source().as_uuid(); tracing::info!( @@ -89,8 +90,8 @@ fn upload_docs(op: TransformOperation) -> TransformOperation { )] async fn download( State(registry): State, + ActorId(actor_id): ActorId, Path(ContentPath { id }): Path, - Query(ActorQuery { actor_id }): Query, ) -> Result> { let handle = registry.read_content(actor_id, id).await?; let content_data = handle.content_data().await?; @@ -124,7 +125,7 @@ fn download_docs(op: TransformOperation) -> TransformOperation { )] async fn list( State(registry): State, - Query(ActorQuery { actor_id }): Query, + ActorId(actor_id): ActorId, ) -> Result> { let files = registry.list_content(actor_id).await?; tracing::debug!(target: TARGET, count = files.len(), "files listed"); @@ -146,8 +147,8 @@ fn list_docs(op: TransformOperation) -> TransformOperation { )] async fn delete( State(registry): State, + ActorId(actor_id): ActorId, Path(ContentPath { id }): Path, - Query(ActorQuery { actor_id }): Query, ) -> Result { registry.unregister_content(actor_id, id).await?; tracing::info!(target: TARGET, "file deleted"); @@ -169,7 +170,7 @@ fn delete_docs(op: TransformOperation) -> TransformOperation { )] async fn delete_all( State(registry): State, - Query(ActorQuery { actor_id }): Query, + ActorId(actor_id): ActorId, ) -> Result { let deleted = registry.unregister_all_content(actor_id).await?; tracing::info!(target: TARGET, deleted, "all files deleted"); diff --git a/crates/nvisy-server/src/handler/mod.rs b/crates/nvisy-server/src/handler/mod.rs index 2a7cc769..99ba8de1 100644 --- a/crates/nvisy-server/src/handler/mod.rs +++ b/crates/nvisy-server/src/handler/mod.rs @@ -15,7 +15,7 @@ pub mod utility; mod check; mod contexts; mod files; -mod process; +mod runs; mod request; mod response; @@ -31,5 +31,5 @@ pub fn routes() -> ApiRouter { .merge(check::routes()) .merge(contexts::routes()) .merge(files::routes()) - .merge(process::routes()) + .merge(runs::routes()) } diff --git a/crates/nvisy-server/src/handler/process.rs b/crates/nvisy-server/src/handler/process.rs deleted file mode 100644 index 1c64cde8..00000000 --- a/crates/nvisy-server/src/handler/process.rs +++ /dev/null @@ -1,181 +0,0 @@ -//! Processing pipeline handlers. -//! -//! # Endpoints -//! -//! | Method | Path | Description | -//! |--------|----------------------------|------------------------------------------------| -//! | `POST` | `/api/v1/process/scan` | Run OCR on previously uploaded content | -//! | `POST` | `/api/v1/process/analyze` | Run OCR + LLM analysis on uploaded content | -//! | `POST` | `/api/v1/process/redact` | Run the full redaction pipeline | -//! -//! All endpoints expect a JSON body with `content_ids` referencing previously -//! uploaded content, along with policies and an execution graph. - -use aide::axum::ApiRouter; -use aide::axum::routing::post_with; -use aide::transform::TransformOperation; -use axum::extract::State; -use nvisy_engine::{DefaultEngine, Engine, EngineInput, RuntimeConfig}; - -use super::error::Result; -use super::request::NewProcess; -use super::response::ProcessResult; -use crate::extract::Json; -use crate::service::ServiceState; - -const TARGET: &str = "nvisy_server::process"; - -/// Build an [`EngineInput`] from a [`NewProcess`] with merged config. -fn engine_input(req: NewProcess, config: RuntimeConfig) -> EngineInput { - EngineInput { - actor_id: req.actor_id, - content_ids: req.content_ids, - policies: req.policies, - graph: req.graph, - contexts: Default::default(), - ocr: config.ocr, - llm: config.llm, - stt: config.stt, - tts: config.tts, - } -} - -/// `POST /api/v1/process/scan`: run OCR on uploaded content. -/// -/// Extracts text and structural information from the content without -/// further classification or redaction. -#[tracing::instrument( - target = "nvisy_server::process", - skip_all, - fields(%req.actor_id, content_count = req.content_ids.len(), mode = "scan"), -)] -async fn scan( - State(engine): State, - State(base_config): State, - Json(req): Json, -) -> Result> { - let config = match &req.config { - Some(overrides) => base_config.merge(overrides), - None => base_config.clone(), - }; - let input = engine_input(req, config); - let output = engine.run(input).await?; - - tracing::info!( - target: TARGET, - run_id = %output.run_id, - "scan complete", - ); - - Ok(Json(ProcessResult { - run_id: output.run_id, - summaries: serde_json::to_value(&output.summaries).unwrap_or_default(), - audits: serde_json::to_value(&output.file_audits).unwrap_or_default(), - })) -} - -fn scan_docs(op: TransformOperation) -> TransformOperation { - op.id("scanContent") - .tag("process") - .summary("Run OCR on uploaded content") - .description( - "Runs OCR on previously uploaded content identified by content_ids. \ - Extracts text and structural information without classification or redaction.", - ) -} - -/// `POST /api/v1/process/analyze`: run OCR + LLM analysis on uploaded content. -/// -/// Extracts text via OCR and classifies entities using an LLM, without -/// applying any redactions. -#[tracing::instrument( - target = "nvisy_server::process", - skip_all, - fields(%req.actor_id, content_count = req.content_ids.len(), mode = "analyze"), -)] -async fn analyze( - State(engine): State, - State(base_config): State, - Json(req): Json, -) -> Result> { - let config = match &req.config { - Some(overrides) => base_config.merge(overrides), - None => base_config.clone(), - }; - let input = engine_input(req, config); - let output = engine.run(input).await?; - - tracing::info!( - target: TARGET, - run_id = %output.run_id, - "analysis complete", - ); - - Ok(Json(ProcessResult { - run_id: output.run_id, - summaries: serde_json::to_value(&output.summaries).unwrap_or_default(), - audits: serde_json::to_value(&output.file_audits).unwrap_or_default(), - })) -} - -fn analyze_docs(op: TransformOperation) -> TransformOperation { - op.id("analyzeContent") - .tag("process") - .summary("Run OCR + LLM analysis on uploaded content") - .description( - "Runs OCR followed by LLM-based entity classification on previously \ - uploaded content. Returns detected entities without applying redactions.", - ) -} - -/// `POST /api/v1/process/redact`: run the full redaction pipeline. -/// -/// Performs OCR, entity classification, policy evaluation, and redaction -/// on previously uploaded content. -#[tracing::instrument( - target = "nvisy_server::process", - skip_all, - fields(%req.actor_id, content_count = req.content_ids.len(), mode = "redact"), -)] -async fn redact( - State(engine): State, - State(base_config): State, - Json(req): Json, -) -> Result> { - let config = match &req.config { - Some(overrides) => base_config.merge(overrides), - None => base_config.clone(), - }; - let input = engine_input(req, config); - let output = engine.run(input).await?; - - tracing::info!( - target: TARGET, - run_id = %output.run_id, - "redaction complete", - ); - - Ok(Json(ProcessResult { - run_id: output.run_id, - summaries: serde_json::to_value(&output.summaries).unwrap_or_default(), - audits: serde_json::to_value(&output.file_audits).unwrap_or_default(), - })) -} - -fn redact_docs(op: TransformOperation) -> TransformOperation { - op.id("redactContent") - .tag("process") - .summary("Run the full redaction pipeline on uploaded content") - .description( - "Runs the complete pipeline (OCR \u{2192} entity classification \u{2192} policy \ - evaluation \u{2192} redaction) on previously uploaded content.", - ) -} - -/// Process routes. -pub fn routes() -> ApiRouter { - ApiRouter::new() - .api_route("/api/v1/process/scan", post_with(scan, scan_docs)) - .api_route("/api/v1/process/analyze", post_with(analyze, analyze_docs)) - .api_route("/api/v1/process/redact", post_with(redact, redact_docs)) -} diff --git a/crates/nvisy-server/src/handler/request/contexts.rs b/crates/nvisy-server/src/handler/request/contexts.rs index 080c38ea..69bb229a 100644 --- a/crates/nvisy-server/src/handler/request/contexts.rs +++ b/crates/nvisy-server/src/handler/request/contexts.rs @@ -3,14 +3,11 @@ use nvisy_ontology::context::Context; use schemars::JsonSchema; use serde::Deserialize; -use uuid::Uuid; /// JSON request body for typed context upload. #[derive(Debug, Deserialize, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct NewContext { - /// Actor identity that owns the context. - pub actor_id: Uuid, /// The context to store. pub context: Context, } diff --git a/crates/nvisy-server/src/handler/request/files.rs b/crates/nvisy-server/src/handler/request/files.rs index ce2e031f..7bcc6b1f 100644 --- a/crates/nvisy-server/src/handler/request/files.rs +++ b/crates/nvisy-server/src/handler/request/files.rs @@ -2,7 +2,6 @@ use schemars::JsonSchema; use serde::Deserialize; -use uuid::Uuid; use crate::handler::utility::Base64; @@ -10,8 +9,6 @@ use crate::handler::utility::Base64; #[derive(Debug, Deserialize, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct NewFile { - /// Actor identity that owns the file. - pub actor_id: Uuid, /// Base64-encoded file bytes. pub content: Base64, /// Optional original filename. diff --git a/crates/nvisy-server/src/handler/request/mod.rs b/crates/nvisy-server/src/handler/request/mod.rs index d01612b2..f88ba522 100644 --- a/crates/nvisy-server/src/handler/request/mod.rs +++ b/crates/nvisy-server/src/handler/request/mod.rs @@ -1,4 +1,4 @@ -//! Typed request bodies for API endpoints. +//! Typed request bodies and path parameters for API endpoints. //! //! Each struct derives [`Deserialize`](serde::Deserialize) and //! [`JsonSchema`](schemars::JsonSchema) for automatic OpenAPI schema @@ -7,9 +7,9 @@ mod contexts; mod files; mod path; -mod process; +mod runs; pub use self::contexts::NewContext; pub use self::files::NewFile; -pub use self::path::{ActorQuery, ContentPath, ContextPath}; -pub use self::process::NewProcess; +pub use self::path::{ContentPath, ContextPath, RunPath}; +pub use self::runs::NewRun; diff --git a/crates/nvisy-server/src/handler/request/path.rs b/crates/nvisy-server/src/handler/request/path.rs index 6a101deb..9d01bee5 100644 --- a/crates/nvisy-server/src/handler/request/path.rs +++ b/crates/nvisy-server/src/handler/request/path.rs @@ -1,4 +1,4 @@ -//! Typed path and query parameters for API endpoints. +//! Typed path parameters for API endpoints. use schemars::JsonSchema; use serde::Deserialize; @@ -18,10 +18,9 @@ pub struct ContextPath { pub id: Uuid, } -/// Query parameter for endpoints that need actor scoping. +/// Path parameter for run endpoints. #[derive(Debug, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct ActorQuery { - /// Actor identity. - pub actor_id: Uuid, +pub struct RunPath { + /// Run identifier. + pub id: Uuid, } diff --git a/crates/nvisy-server/src/handler/request/process.rs b/crates/nvisy-server/src/handler/request/runs.rs similarity index 77% rename from crates/nvisy-server/src/handler/request/process.rs rename to crates/nvisy-server/src/handler/request/runs.rs index 9ecf64cd..44c17cda 100644 --- a/crates/nvisy-server/src/handler/request/process.rs +++ b/crates/nvisy-server/src/handler/request/runs.rs @@ -1,4 +1,4 @@ -//! Process request types. +//! Run request types. use nvisy_engine::{Graph, RuntimeConfig}; use nvisy_ontology::policy::Policies; @@ -6,12 +6,10 @@ use schemars::JsonSchema; use serde::Deserialize; use uuid::Uuid; -/// Request body for `POST /api/v1/process/*` endpoints. +/// Request body for `POST /api/v1/runs` and `POST /api/v1/runs/scan`. #[derive(Debug, Deserialize, JsonSchema)] #[serde(rename_all = "camelCase")] -pub struct NewProcess { - /// Actor identity for registry lookups. - pub actor_id: Uuid, +pub struct NewRun { /// Identifiers of previously uploaded content. pub content_ids: Vec, /// Policies to apply during processing. diff --git a/crates/nvisy-server/src/handler/response/mod.rs b/crates/nvisy-server/src/handler/response/mod.rs index 6c57c58b..c6117979 100644 --- a/crates/nvisy-server/src/handler/response/mod.rs +++ b/crates/nvisy-server/src/handler/response/mod.rs @@ -1,18 +1,17 @@ -//! Typed response bodies and error types for API endpoints. +//! Typed response bodies for API endpoints. //! //! Each struct derives [`Serialize`](serde::Serialize) and //! [`JsonSchema`](schemars::JsonSchema) for automatic OpenAPI schema -//! generation via aide. [`ErrorResponse`] is the serializable JSON -//! body returned by every error path. +//! generation via aide. mod check; mod contexts; mod error; mod files; -mod process; +mod runs; pub use self::check::{Analytics, Health, ServiceStatus}; pub use self::contexts::{Context, ContextId, ContextList}; pub use self::error::ErrorResponse; pub use self::files::{File, FileId, FileList}; -pub use self::process::ProcessResult; +pub use self::runs::{Run, RunList, RunResult}; diff --git a/crates/nvisy-server/src/handler/response/process.rs b/crates/nvisy-server/src/handler/response/process.rs deleted file mode 100644 index 04fe6688..00000000 --- a/crates/nvisy-server/src/handler/response/process.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! Process response types. - -use schemars::JsonSchema; -use serde::Serialize; -use uuid::Uuid; - -/// Response body for `POST /api/v1/process/*` endpoints. -#[derive(Debug, Serialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct ProcessResult { - /// Unique run identifier. - pub run_id: Uuid, - /// Per-source result summaries as opaque JSON. - pub summaries: serde_json::Value, - /// Audit trail entries as opaque JSON. - pub audits: serde_json::Value, -} diff --git a/crates/nvisy-server/src/handler/response/runs.rs b/crates/nvisy-server/src/handler/response/runs.rs new file mode 100644 index 00000000..ef6ead49 --- /dev/null +++ b/crates/nvisy-server/src/handler/response/runs.rs @@ -0,0 +1,33 @@ +//! Run response types. + +use nvisy_engine::{RunSnapshot, RunSummary}; +use schemars::JsonSchema; +use serde::Serialize; +use uuid::Uuid; + +/// Response body for `POST /api/v1/runs` and `POST /api/v1/runs/scan`. +#[derive(Debug, Serialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct RunResult { + /// Unique run identifier. + pub run_id: Uuid, + /// Per-source result summaries as opaque JSON. + pub summaries: serde_json::Value, + /// Audit trail entries as opaque JSON. + pub audits: serde_json::Value, +} + +/// Response body for `GET /api/v1/runs/{id}`. +#[derive(Debug, Serialize, JsonSchema)] +pub struct Run { + /// Full run snapshot. + #[serde(flatten)] + pub run: RunSnapshot, +} + +/// Response body for `GET /api/v1/runs`. +#[derive(Debug, Serialize, JsonSchema)] +pub struct RunList { + /// List of run summaries. + pub runs: Vec, +} diff --git a/crates/nvisy-server/src/handler/runs.rs b/crates/nvisy-server/src/handler/runs.rs new file mode 100644 index 00000000..eebe9141 --- /dev/null +++ b/crates/nvisy-server/src/handler/runs.rs @@ -0,0 +1,229 @@ +//! Pipeline run creation, inspection, and cancellation handlers. +//! +//! # Endpoints +//! +//! | Method | Path | Description | +//! |--------|----------------------------|--------------------------------------| +//! | `POST` | `/api/v1/runs` | Run the full pipeline | +//! | `POST` | `/api/v1/runs/scan` | Run a read-only scan (no redaction) | +//! | `GET` | `/api/v1/runs` | List runs with optional filters | +//! | `GET` | `/api/v1/runs/{id}` | Get a full run snapshot | +//! | `POST` | `/api/v1/runs/{id}/cancel` | Cancel an in-progress run | + +use aide::axum::ApiRouter; +use aide::axum::routing::{get_with, post_with}; +use aide::transform::TransformOperation; +use axum::extract::{Query, State}; +use axum::http::StatusCode; +use nvisy_engine::{DefaultEngine, Engine, EngineInput, RunFilter, Runs}; + +use super::error::{ErrorKind, Result}; +use super::request::{NewRun, RunPath}; +use super::response::{Run, RunList, RunResult}; +use crate::extract::{ActorId, Json, Path}; +use crate::service::ServiceState; + +/// Optional query parameters for listing runs. +#[derive(Debug, serde::Deserialize, schemars::JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct RunQuery { + /// Filter by run status (e.g. `running`, `succeeded`). + #[serde(default)] + pub status: Option, + /// Filter by actor identity. + #[serde(default)] + pub actor_id: Option, +} + +const TARGET: &str = "nvisy_server::runs"; + +/// `POST /api/v1/runs`: run the full pipeline on uploaded content. +/// +/// Performs extraction, detection, policy evaluation, and redaction +/// on previously uploaded content identified by `content_ids`. +#[tracing::instrument( + target = "nvisy_server::runs", + skip_all, + fields(%actor_id, content_count = req.content_ids.len(), mode = "full"), +)] +async fn create( + State(engine): State, + ActorId(actor_id): ActorId, + Json(req): Json, +) -> Result<(StatusCode, Json)> { + let input = EngineInput { + actor_id, + content_ids: req.content_ids, + policies: req.policies, + graph: req.graph, + contexts: Default::default(), + config: req.config, + }; + let output = engine.run(input).await?; + + tracing::info!( + target: TARGET, + run_id = %output.run_id, + "pipeline complete", + ); + + Ok(( + StatusCode::CREATED, + Json(RunResult { + run_id: output.run_id, + summaries: serde_json::to_value(&output.summaries).unwrap_or_default(), + audits: serde_json::to_value(&output.file_audits).unwrap_or_default(), + }), + )) +} + +fn create_docs(op: TransformOperation) -> TransformOperation { + op.id("createRun") + .tag("runs") + .summary("Run the full pipeline on uploaded content") + .description( + "Runs the complete pipeline (extraction \u{2192} detection \u{2192} policy \ + evaluation \u{2192} redaction) on previously uploaded content.", + ) +} + +/// `POST /api/v1/runs/scan`: run a read-only scan on uploaded content. +/// +/// Extracts text and detects entities without applying redactions. +/// The pipeline behaviour is determined by the graph in the request body. +#[tracing::instrument( + target = "nvisy_server::runs", + skip_all, + fields(%actor_id, content_count = req.content_ids.len(), mode = "scan"), +)] +async fn scan( + State(engine): State, + ActorId(actor_id): ActorId, + Json(req): Json, +) -> Result<(StatusCode, Json)> { + let input = EngineInput { + actor_id, + content_ids: req.content_ids, + policies: req.policies, + graph: req.graph, + contexts: Default::default(), + config: req.config, + }; + let output = engine.run(input).await?; + + tracing::info!( + target: TARGET, + run_id = %output.run_id, + "scan complete", + ); + + Ok(( + StatusCode::CREATED, + Json(RunResult { + run_id: output.run_id, + summaries: serde_json::to_value(&output.summaries).unwrap_or_default(), + audits: serde_json::to_value(&output.file_audits).unwrap_or_default(), + }), + )) +} + +fn scan_docs(op: TransformOperation) -> TransformOperation { + op.id("scanContent") + .tag("runs") + .summary("Run a read-only scan on uploaded content") + .description( + "Extracts text and detects entities without applying redactions. \ + The pipeline behaviour is determined by the graph in the request body.", + ) +} + +/// `GET /api/v1/runs`: list runs with optional status/actor filters. +#[tracing::instrument( + target = "nvisy_server::runs", + skip_all, + fields(?query.status, ?query.actor_id), +)] +async fn list( + State(engine): State, + Query(query): Query, +) -> Result> { + let filter = RunFilter { + status: query.status, + actor_id: query.actor_id, + }; + let runs = engine.list_runs(filter).await; + tracing::debug!(target: TARGET, count = runs.len(), "runs listed"); + Ok(Json(RunList { runs })) +} + +fn list_docs(op: TransformOperation) -> TransformOperation { + op.id("listRuns") + .tag("runs") + .summary("List pipeline runs") + .description( + "Returns a list of run summaries, optionally filtered by status or actor identity.", + ) +} + +/// `GET /api/v1/runs/{id}`: get a full run snapshot. +#[tracing::instrument( + target = "nvisy_server::runs", + skip_all, + fields(%id), +)] +async fn get( + State(engine): State, + Path(RunPath { id }): Path, +) -> Result> { + let run = engine + .get_run(id) + .await + .ok_or_else(|| ErrorKind::NotFound.with_resource("run"))?; + tracing::debug!(target: TARGET, "run retrieved"); + Ok(Json(Run { run })) +} + +fn get_docs(op: TransformOperation) -> TransformOperation { + op.id("getRun") + .tag("runs") + .summary("Get a pipeline run") + .description("Returns the full snapshot of a single run including per-node status.") +} + +/// `POST /api/v1/runs/{id}/cancel`: cancel an in-progress run. +#[tracing::instrument( + target = "nvisy_server::runs", + skip_all, + fields(%id), +)] +async fn cancel( + State(engine): State, + Path(RunPath { id }): Path, +) -> Result { + engine.cancel_run(id).await?; + tracing::info!(target: TARGET, "run cancelled"); + Ok(StatusCode::NO_CONTENT) +} + +fn cancel_docs(op: TransformOperation) -> TransformOperation { + op.id("cancelRun") + .tag("runs") + .summary("Cancel a pipeline run") + .description( + "Requests cancellation of a pending or running pipeline run. \ + Returns 204 on success, 404 if the run does not exist, \ + or 400 if the run has already finished.", + ) +} + +/// Run routes. +pub fn routes() -> ApiRouter { + ApiRouter::new() + .api_route( + "/api/v1/runs", + post_with(create, create_docs).get_with(list, list_docs), + ) + .api_route("/api/v1/runs/scan", post_with(scan, scan_docs)) + .api_route("/api/v1/runs/{id}", get_with(get, get_docs)) + .api_route("/api/v1/runs/{id}/cancel", post_with(cancel, cancel_docs)) +} diff --git a/crates/nvisy-server/src/service/mod.rs b/crates/nvisy-server/src/service/mod.rs index 7d651cc7..1c97cf3b 100644 --- a/crates/nvisy-server/src/service/mod.rs +++ b/crates/nvisy-server/src/service/mod.rs @@ -17,8 +17,6 @@ use nvisy_registry::Registry; pub struct ServiceState { engine: DefaultEngine, registry: Registry, - config: RuntimeConfig, - data_dir: PathBuf, } impl ServiceState { @@ -28,7 +26,7 @@ impl ServiceState { /// /// Returns an error if the registry database cannot be opened. pub fn new(config: RuntimeConfig, data_dir: PathBuf) -> nvisy_core::Result { - let registry = Registry::open(data_dir.clone())?; + let registry = Registry::open(data_dir)?; let http_config = config .engine @@ -37,25 +35,32 @@ impl ServiceState { .unwrap_or_default(); let http_client = HttpClient::new(&http_config); - let mut engine = DefaultEngine::new().with_http_client(http_client); - if let Some(retry) = config.engine.as_ref().and_then(|e| e.retry.clone()) { + let mut engine = DefaultEngine::new() + .with_config(config) + .with_http_client(http_client); + if let Some(retry) = engine + .config() + .engine + .as_ref() + .and_then(|e| e.retry.clone()) + { engine = engine.with_retry(retry); } - if let Some(timeout) = config.engine.as_ref().and_then(|e| e.timeout.clone()) { + if let Some(timeout) = engine + .config() + .engine + .as_ref() + .and_then(|e| e.timeout.clone()) + { engine = engine.with_timeout(timeout); } - Ok(Self { - engine, - registry, - config, - data_dir, - }) + Ok(Self { engine, registry }) } - /// Returns the resolved data directory. + /// Returns the data directory path from the registry. pub fn data_dir(&self) -> &std::path::Path { - &self.data_dir + self.registry.base_dir() } } @@ -72,5 +77,4 @@ macro_rules! impl_di { impl_di!( engine: DefaultEngine, registry: Registry, - config: RuntimeConfig, ); From b7a23b61c41388b8ebe445461d1de6d0c7b825bd Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 16 Mar 2026 02:33:54 +0100 Subject: [PATCH 5/6] refactor(engine, server): wire typed pipeline execution, clean up dead code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire Import/Export dispatch in NodeExecutor with Arc channels, populate EngineOutput from collected envelopes, and connect the analytics endpoint to live engine run data. - Add NodeExecutor struct replacing free execute_node/execute_action fns - Add EngineAnalytics trait + AnalyticsSnapshot in pipeline/analytics module - Rename Runs → EngineRuns, Analytics → EngineAnalytics for clarity - Remove dead ExecutionPlan fields (index_map, phases, roots, leaves) - Remove PhaseGroup type and phase.rs, phase field from ResolvedNode - Remove unused Explainable/Explanation (covered by provenance module) - Make pipeline::policy private, re-export compiled types - Flatten config/mod.rs → config.rs - Wire server analytics handler via EngineAnalytics::snapshot() Co-Authored-By: Claude Opus 4.6 --- crates/nvisy-engine/src/lib.rs | 6 +- .../src/pipeline/analytics/mod.rs | 32 +++ .../src/pipeline/{config/mod.rs => config.rs} | 0 crates/nvisy-engine/src/pipeline/default.rs | 86 ++++--- crates/nvisy-engine/src/pipeline/executor.rs | 230 ++++++++++++------ crates/nvisy-engine/src/pipeline/mod.rs | 9 +- crates/nvisy-engine/src/pipeline/ontology.rs | 47 ---- crates/nvisy-engine/src/pipeline/plan/mod.rs | 114 +-------- crates/nvisy-engine/src/pipeline/plan/node.rs | 2 - .../nvisy-engine/src/pipeline/plan/phase.rs | 10 - crates/nvisy-engine/src/pipeline/runs/mod.rs | 2 +- crates/nvisy-server/src/handler/check.rs | 15 +- crates/nvisy-server/src/handler/runs.rs | 2 +- 13 files changed, 269 insertions(+), 286 deletions(-) create mode 100644 crates/nvisy-engine/src/pipeline/analytics/mod.rs rename crates/nvisy-engine/src/pipeline/{config/mod.rs => config.rs} (100%) delete mode 100644 crates/nvisy-engine/src/pipeline/ontology.rs delete mode 100644 crates/nvisy-engine/src/pipeline/plan/phase.rs diff --git a/crates/nvisy-engine/src/lib.rs b/crates/nvisy-engine/src/lib.rs index 4bc74383..a016f4f0 100644 --- a/crates/nvisy-engine/src/lib.rs +++ b/crates/nvisy-engine/src/lib.rs @@ -10,7 +10,7 @@ pub mod provenance; pub use self::graph::policy::{BackoffStrategy, RetryPolicy, TimeoutBehavior, TimeoutPolicy}; pub use self::graph::{Graph, GraphEdge, GraphNode, GraphNodeKind}; pub use self::pipeline::{ - DefaultEngine, Engine, EngineInput, EngineOutput, EngineSection, LlmSection, NodeSnapshot, - NodeStatus, OcrSection, RunFilter, RunSnapshot, RunStatus, RunSummary, Runs, RuntimeConfig, - SttSection, TtsSection, + AnalyticsSnapshot, DefaultEngine, Engine, EngineAnalytics, EngineInput, EngineOutput, + EngineRuns, EngineSection, LlmSection, NodeSnapshot, NodeStatus, OcrSection, RunFilter, + RunSnapshot, RunStatus, RunSummary, RuntimeConfig, SttSection, TtsSection, }; diff --git a/crates/nvisy-engine/src/pipeline/analytics/mod.rs b/crates/nvisy-engine/src/pipeline/analytics/mod.rs new file mode 100644 index 00000000..5c83b841 --- /dev/null +++ b/crates/nvisy-engine/src/pipeline/analytics/mod.rs @@ -0,0 +1,32 @@ +//! Aggregate pipeline analytics types and the [`Analytics`] trait. +//! +//! Pure data definitions for pipeline-wide metrics. Querying happens +//! through the [`Analytics`] trait, implemented on +//! [`DefaultEngine`](super::DefaultEngine). + +use std::future::Future; + +use jiff::Timestamp; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Point-in-time aggregate metrics across all pipeline runs. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct AnalyticsSnapshot { + /// Timestamp when the snapshot was taken. + #[schemars(with = "String")] + pub timestamp: Timestamp, + /// Total number of pipeline runs tracked by the engine. + pub total_runs: u64, + /// Total number of entities detected across all runs. + pub total_entities_detected: u64, + /// Total number of redactions applied across all runs. + pub total_redactions_applied: u64, +} + +/// Read-only access to aggregate pipeline analytics. +pub trait EngineAnalytics: Send + Sync { + /// Collect a point-in-time analytics snapshot. + fn snapshot(&self) -> impl Future + Send; +} diff --git a/crates/nvisy-engine/src/pipeline/config/mod.rs b/crates/nvisy-engine/src/pipeline/config.rs similarity index 100% rename from crates/nvisy-engine/src/pipeline/config/mod.rs rename to crates/nvisy-engine/src/pipeline/config.rs diff --git a/crates/nvisy-engine/src/pipeline/default.rs b/crates/nvisy-engine/src/pipeline/default.rs index e11ca684..806347e9 100644 --- a/crates/nvisy-engine/src/pipeline/default.rs +++ b/crates/nvisy-engine/src/pipeline/default.rs @@ -14,20 +14,21 @@ use std::sync::Arc; use jiff::Timestamp; use nvisy_core::Error; -use nvisy_core::content::ContentData; +use nvisy_core::content::ContentSource; use nvisy_http::HttpClient; use tokio::sync::{RwLock, mpsc, watch}; use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; use uuid::Uuid; +use super::analytics::{AnalyticsSnapshot, EngineAnalytics}; use super::config::RuntimeConfig; -use super::executor::{NodeOutput, RunOutput, execute_node}; +use super::executor::{NodeExecutor, NodeOutput, RunOutput}; use super::plan::{self, ExecutionPlan}; -use super::runs::{NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary, Runs}; +use super::runs::{EngineRuns, NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary}; use super::{Engine, EngineInput, EngineOutput}; use crate::graph::policy::{RetryPolicy, TimeoutPolicy}; -use crate::operation::SharedContext; +use crate::operation::{DocumentEnvelope, SharedContext}; use crate::provenance::PolicyEvaluation; /// Private mutable state for a single run, held inside `DefaultEngineInner`. @@ -166,10 +167,11 @@ impl DefaultEngine { async fn run_graph( plan: &ExecutionPlan, cancel: CancellationToken, + shared: SharedContext, ) -> Result { - // Create channels for each edge using pre-computed config - let mut senders: HashMap>> = HashMap::new(); - let mut receivers: HashMap>> = HashMap::new(); + let mut senders: HashMap>>> = HashMap::new(); + let mut receivers: HashMap>>> = + HashMap::new(); for edge in plan.edges() { let (tx, rx) = mpsc::channel(edge.config.channel_buffer); @@ -177,7 +179,6 @@ impl DefaultEngine { receivers.entry(edge.target).or_default().push(rx); } - // Create completion signals per node let mut signal_senders: HashMap> = HashMap::new(); let mut signal_receivers: HashMap> = HashMap::new(); @@ -187,13 +188,12 @@ impl DefaultEngine { signal_receivers.insert(resolved.node.id, rx); } - // Spawn tasks let mut join_set: JoinSet = JoinSet::new(); for resolved in plan.nodes() { let resolved = resolved.clone(); let node_id = resolved.node.id; - let cancel = cancel.clone(); + let executor = NodeExecutor::new(shared.clone(), cancel.clone()); let upstream_watches: Vec> = resolved .upstream_ids @@ -206,34 +206,28 @@ impl DefaultEngine { let node_receivers = receivers.remove(&node_id).unwrap_or_default(); join_set.spawn(async move { - // Wait for upstream nodes to complete for mut rx in upstream_watches { let _ = rx.wait_for(|&done| done).await; } - let result = execute_node(&resolved, node_senders, node_receivers, cancel).await; + let result = executor.execute(&resolved, node_senders, node_receivers).await; - // Signal completion if let Some(tx) = completion_tx { let _ = tx.send(true); } match result { - Ok(count) => NodeOutput { - node_id, - items_processed: count, - error: None, - }, + Ok(output) => output, Err(e) => NodeOutput { node_id, items_processed: 0, error: Some(e.to_string()), + envelopes: Vec::new(), }, } }); } - // Collect results let mut node_results = Vec::new(); while let Some(result) = join_set.join_next().await { match result { @@ -242,6 +236,7 @@ impl DefaultEngine { node_id: Uuid::nil(), items_processed: 0, error: Some(format!("Task panicked: {}", e)), + envelopes: Vec::new(), }), } } @@ -273,7 +268,6 @@ impl Engine for DefaultEngine { let run_id = Uuid::new_v4(); let cancel = CancellationToken::new(); - // Register the run as Pending { let entry = RunEntry { actor_id: input.actor_id, @@ -286,31 +280,20 @@ impl Engine for DefaultEngine { self.inner.runs.write().await.insert(run_id, entry); } - // Transition to Running if let Some(entry) = self.inner.runs.write().await.get_mut(&run_id) { entry.status = RunStatus::Running; } - let _shared = SharedContext::new(run_id, input.actor_id) + let shared = SharedContext::new(run_id, input.actor_id) .with_policies(input.policies.clone()) .with_contexts(input.contexts.clone()); - // Phase 1: Detection - let detection = nvisy_ontology::entity::DetectionOutput::new( - nvisy_core::content::ContentSource::new(), - Vec::new(), - ); - - // Phase 2: Policy Evaluation - let evaluation = PolicyEvaluation::new(Uuid::nil()); - - // Phase 3: DAG Execution let compiled = plan::compile( &input.graph, self.inner.default_retry.as_ref(), self.inner.default_timeout.as_ref(), )?; - let run_output = Self::run_graph(&compiled, cancel).await?; + let run_output = Self::run_graph(&compiled, cancel, shared).await?; // Transition to Succeeded/Failed and populate node snapshots { @@ -328,18 +311,51 @@ impl Engine for DefaultEngine { } } + // Collect envelopes from all nodes (export nodes accumulate them) + let mut all_entities = nvisy_ontology::entity::Entities::new(); + let mut file_audits = Vec::new(); + let first_policy_id = input + .policies + .policies + .first() + .map(|p| p.id) + .unwrap_or(Uuid::nil()); + + for nr in &run_output.node_results { + for envelope in &nr.envelopes { + all_entities.extend(envelope.entities.iter().cloned()); + file_audits.push(envelope.audit.clone()); + } + } + + let detection = + nvisy_ontology::entity::DetectionOutput::new(ContentSource::new(), all_entities); + let evaluation = PolicyEvaluation::new(first_policy_id); + Ok(EngineOutput { run_id, detection, evaluation, summaries: Vec::new(), - file_audits: Vec::new(), + file_audits, redaction_maps: Vec::new(), }) } } -impl Runs for DefaultEngine { +impl EngineAnalytics for DefaultEngine { + async fn snapshot(&self) -> AnalyticsSnapshot { + let runs = self.inner.runs.read().await; + AnalyticsSnapshot { + timestamp: Timestamp::now(), + total_runs: runs.len() as u64, + total_entities_detected: 0, + total_redactions_applied: 0, + } + } +} + +impl EngineRuns for DefaultEngine { async fn get_run(&self, id: Uuid) -> Option { self.inner .runs diff --git a/crates/nvisy-engine/src/pipeline/executor.rs b/crates/nvisy-engine/src/pipeline/executor.rs index ba91012b..6f842891 100644 --- a/crates/nvisy-engine/src/pipeline/executor.rs +++ b/crates/nvisy-engine/src/pipeline/executor.rs @@ -1,12 +1,13 @@ //! Node-level execution dispatchers. //! -//! [`execute_node`] dispatches each graph node to the appropriate handler +//! [`NodeExecutor`] dispatches each graph node to the appropriate handler //! based on its [`GraphNodeKind`]. Pre-compiled timeout and retry policies //! from the [`ResolvedNode`] are applied directly, with //! [`TimeoutBehavior`] controlling whether a timeout is treated as an error //! or silently yields zero items. -use nvisy_core::content::ContentData; +use std::sync::Arc; + use nvisy_core::{Error, ErrorKind}; use tokio::sync::mpsc; use tokio_util::sync::CancellationToken; @@ -15,9 +16,10 @@ use uuid::Uuid; use super::plan::ResolvedNode; use crate::graph::GraphNodeKind; use crate::graph::policy::TimeoutBehavior; +use crate::operation::{DocumentEnvelope, SharedContext}; /// Outcome of executing a single node in the pipeline. -#[derive(Debug, Clone)] +#[derive(Debug)] pub(super) struct NodeOutput { /// ID of the node that produced this result. pub node_id: Uuid, @@ -25,97 +27,181 @@ pub(super) struct NodeOutput { pub items_processed: u64, /// Error message if the node failed, or `None` on success. pub error: Option, + /// Envelopes collected by terminal (export) nodes. + pub envelopes: Vec>, } /// Aggregate outcome of executing an entire pipeline graph. -#[derive(Debug, Clone)] +#[derive(Debug)] pub(super) struct RunOutput { /// Per-node results in completion order. pub node_results: Vec, } -/// Executes a single resolved node by dispatching on its [`GraphNodeKind`]. -/// -/// Uses pre-compiled timeout and retry policies from the [`ResolvedNode`] -/// instead of compiling them inline. Checks the `cancel` token before and -/// during execution to support cooperative cancellation. -pub(super) async fn execute_node( - resolved: &ResolvedNode, - senders: Vec>, - mut receivers: Vec>, +/// Executes a single resolved node within a pipeline run. +pub(super) struct NodeExecutor { + shared: SharedContext, cancel: CancellationToken, -) -> Result { - if cancel.is_cancelled() { - return Err(Error::cancellation("run cancelled")); +} + +impl NodeExecutor { + pub fn new(shared: SharedContext, cancel: CancellationToken) -> Self { + Self { shared, cancel } } - let run = async { - tokio::select! { - _ = cancel.cancelled() => { - Err(Error::cancellation("run cancelled")) - } - result = execute_action(&resolved.node.kind, &senders, &mut receivers) => { - result - } + /// Execute a resolved node, applying timeout policies and cancellation. + pub async fn execute( + &self, + resolved: &ResolvedNode, + senders: Vec>>, + mut receivers: Vec>>, + ) -> Result { + if self.cancel.is_cancelled() { + return Err(Error::cancellation("run cancelled")); } - }; - - match &resolved.compiled_timeout { - Some(compiled) => { - let result: Result = compiled.with_timeout(run).await; - match (&result, &compiled.on_timeout) { - (Err(e), TimeoutBehavior::Skip) if e.kind == ErrorKind::Timeout => Ok(0), - _ => result, + + let node_id = resolved.node.id; + let cancel = self.cancel.clone(); + + let run = async { + tokio::select! { + _ = cancel.cancelled() => { + Err(Error::cancellation("run cancelled")) + } + result = self.dispatch(node_id, &resolved.node.kind, &senders, &mut receivers) => { + result + } } + }; + + match &resolved.compiled_timeout { + Some(compiled) => { + let result: Result = compiled.with_timeout(run).await; + match (&result, &compiled.on_timeout) { + (Err(e), TimeoutBehavior::Skip) if e.kind == ErrorKind::Timeout => { + Ok(NodeOutput { + node_id, + items_processed: 0, + error: None, + envelopes: Vec::new(), + }) + } + _ => result, + } + } + None => run.await, } - None => run.await, } -} -/// Dispatches an action node: receives upstream data, logs the action kind, -/// and forwards items downstream. -/// -/// Concrete action implementations will replace these passthrough stubs -/// as the orchestrator is built out. -async fn execute_action( - action: &GraphNodeKind, - senders: &[mpsc::Sender], - receivers: &mut [mpsc::Receiver], -) -> Result { - match action { - GraphNodeKind::LoadContext(_) => tracing::trace!("action node: load_context (passthrough)"), - GraphNodeKind::SaveContext(_) => tracing::trace!("action node: save_context (passthrough)"), - GraphNodeKind::GenerateContext(_) => { - tracing::trace!("action node: generate_context (passthrough)") - } - GraphNodeKind::VisualExtraction(_) => { - tracing::trace!("action node: visual_extraction (passthrough)") + /// Dispatch based on node kind: Import decodes content, Export collects + /// envelopes, all others pass through. + async fn dispatch( + &self, + node_id: Uuid, + action: &GraphNodeKind, + senders: &[mpsc::Sender>], + receivers: &mut [mpsc::Receiver>], + ) -> Result { + match action { + GraphNodeKind::Import(_) => self.execute_import(node_id, senders, receivers).await, + GraphNodeKind::Export(_) => self.execute_export(node_id, receivers).await, + kind => self.execute_passthrough(node_id, kind, senders, receivers).await, } - GraphNodeKind::AudialExtraction(_) => { - tracing::trace!("action node: audial_extraction (passthrough)") - } - GraphNodeKind::NamedEntityRecognition(_) => { - tracing::trace!("action node: ner (passthrough)") - } - GraphNodeKind::PatternRecognition(_) => { - tracing::trace!("action node: pattern_recognition (passthrough)") + } + + async fn execute_import( + &self, + node_id: Uuid, + senders: &[mpsc::Sender>], + receivers: &mut [mpsc::Receiver>], + ) -> Result { + // TODO: Import nodes should receive ContentData from the registry, + // not from upstream channels. For now this is a stub that will be + // wired when content storage is connected. + let mut count = 0u64; + let mut envelopes = Vec::new(); + + for rx in receivers.iter_mut() { + while let Some(envelope) = rx.recv().await { + for tx in senders { + let _ = tx.send(Arc::clone(&envelope)).await; + } + count += 1; + envelopes.push(envelope); + } } - GraphNodeKind::Fusion(_) => tracing::trace!("action node: fusion (passthrough)"), - GraphNodeKind::Redaction(_) => tracing::trace!("action node: redaction (passthrough)"), - GraphNodeKind::Import(_) => tracing::trace!("action node: import (passthrough)"), - GraphNodeKind::Export(_) => tracing::trace!("action node: export (passthrough)"), + + Ok(NodeOutput { + node_id, + items_processed: count, + error: None, + envelopes, + }) } - // Forward items from all upstream receivers to all downstream senders. - let mut count = 0u64; - for rx in receivers.iter_mut() { - while let Some(item) = rx.recv().await { - count += 1; - for tx in senders { - let _ = tx.send(item.clone()).await; + async fn execute_export( + &self, + node_id: Uuid, + receivers: &mut [mpsc::Receiver>], + ) -> Result { + let mut count = 0u64; + let mut envelopes = Vec::new(); + + for rx in receivers.iter_mut() { + while let Some(envelope) = rx.recv().await { + count += 1; + envelopes.push(envelope); } } + + tracing::debug!(count, "export node collected envelopes"); + + Ok(NodeOutput { + node_id, + items_processed: count, + error: None, + envelopes, + }) } - Ok(count) + async fn execute_passthrough( + &self, + node_id: Uuid, + kind: &GraphNodeKind, + senders: &[mpsc::Sender>], + receivers: &mut [mpsc::Receiver>], + ) -> Result { + let label = match kind { + GraphNodeKind::LoadContext(_) => "load_context", + GraphNodeKind::SaveContext(_) => "save_context", + GraphNodeKind::GenerateContext(_) => "generate_context", + GraphNodeKind::VisualExtraction(_) => "visual_extraction", + GraphNodeKind::AudialExtraction(_) => "audial_extraction", + GraphNodeKind::NamedEntityRecognition(_) => "ner", + GraphNodeKind::PatternRecognition(_) => "pattern_recognition", + GraphNodeKind::Fusion(_) => "fusion", + GraphNodeKind::Redaction(_) => "redaction", + GraphNodeKind::Import(_) | GraphNodeKind::Export(_) => unreachable!(), + }; + + // TODO: wire operation dispatch + tracing::trace!(action = label, "passthrough"); + + let mut count = 0u64; + for rx in receivers.iter_mut() { + while let Some(item) = rx.recv().await { + count += 1; + for tx in senders { + let _ = tx.send(Arc::clone(&item)).await; + } + } + } + + Ok(NodeOutput { + node_id, + items_processed: count, + error: None, + envelopes: Vec::new(), + }) + } } diff --git a/crates/nvisy-engine/src/pipeline/mod.rs b/crates/nvisy-engine/src/pipeline/mod.rs index 4a80fcbf..4caf35d5 100644 --- a/crates/nvisy-engine/src/pipeline/mod.rs +++ b/crates/nvisy-engine/src/pipeline/mod.rs @@ -7,12 +7,12 @@ //! [`DefaultEngine`] is the standard implementation that orchestrates the //! detect -> evaluate -> redact pipeline and drives the DAG execution graph. +mod analytics; mod config; mod default; mod executor; -mod ontology; mod plan; -pub(crate) mod policy; +mod policy; mod runs; use std::future::Future; @@ -23,13 +23,14 @@ use nvisy_ontology::entity::DetectionOutput; use nvisy_ontology::policy::{Policies, RedactionSummary}; use uuid::Uuid; +pub use self::analytics::{AnalyticsSnapshot, EngineAnalytics}; pub use self::config::{ EngineSection, LlmSection, OcrSection, RuntimeConfig, SttSection, TtsSection, }; pub use self::default::DefaultEngine; -pub use self::ontology::{Explainable, Explanation}; +pub use self::policy::{CompiledRetryPolicy, CompiledTimeoutPolicy}; pub use self::runs::{ - NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary, Runs, + EngineRuns, NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary, }; use crate::graph::Graph; use crate::provenance::{Audit, PolicyEvaluation, RedactionMap}; diff --git a/crates/nvisy-engine/src/pipeline/ontology.rs b/crates/nvisy-engine/src/pipeline/ontology.rs deleted file mode 100644 index fefb1503..00000000 --- a/crates/nvisy-engine/src/pipeline/ontology.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Explainability metadata for data protection decisions. -//! -//! An [`Explanation`] records why an action was taken: which model, rule, -//! and confidence level were involved. Types that carry this metadata -//! implement the [`Explainable`] trait. - -use nvisy_ontology::entity::{ModelInfo, RecognitionMethod}; -use schemars::JsonSchema; -use semver::Version; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// Types that carry explainability metadata. -pub trait Explainable { - /// Why this action was taken. - fn explanation(&self) -> Option<&Explanation>; -} - -/// Structured explainability metadata for a data protection decision. -/// -/// Records why an action was taken, which model and rule were involved, -/// and who reviewed it. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct Explanation { - /// Detection model that produced the decision. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, - /// Identifier of the policy rule that triggered the action. - #[serde(skip_serializing_if = "Option::is_none")] - pub rule_id: Option, - /// Detection confidence score. - #[serde(skip_serializing_if = "Option::is_none")] - pub confidence: Option, - /// Recognition method used. - #[serde(skip_serializing_if = "Option::is_none")] - pub recognition_method: Option, - /// Human-readable reason for the action. - #[serde(skip_serializing_if = "Option::is_none")] - pub reason: Option, - /// Version of the policy that was evaluated. - #[serde(skip_serializing_if = "Option::is_none")] - #[schemars(with = "Option")] - pub policy_version: Option, - /// Identifier of the reviewer who approved/rejected. - #[serde(skip_serializing_if = "Option::is_none")] - pub reviewer_id: Option, -} diff --git a/crates/nvisy-engine/src/pipeline/plan/mod.rs b/crates/nvisy-engine/src/pipeline/plan/mod.rs index 69a8fce3..113b6551 100644 --- a/crates/nvisy-engine/src/pipeline/plan/mod.rs +++ b/crates/nvisy-engine/src/pipeline/plan/mod.rs @@ -1,15 +1,11 @@ //! Compiled execution plan types and the `compile()` entry point. //! //! An [`ExecutionPlan`] is the central orchestration artifact produced by -//! [`compile()`]. It contains topologically-sorted [`ResolvedNode`]s, -//! pre-computed adjacency information, [`ResolvedEdge`]s with channel -//! configuration, and [`PhaseGroup`]s for phase-aware scheduling. +//! [`compile()`]. It contains topologically-sorted [`ResolvedNode`]s and +//! pre-computed [`ResolvedEdge`]s with channel configuration. mod edge; mod node; -mod phase; - -use std::collections::HashMap; use nvisy_core::{Error, Result}; use petgraph::algo::{is_cyclic_directed, toposort}; @@ -18,7 +14,6 @@ use uuid::Uuid; pub use self::edge::{EdgeConfig, ResolvedEdge}; pub use self::node::ResolvedNode; -pub use self::phase::PhaseGroup; use super::policy::{CompiledRetryPolicy, CompiledTimeoutPolicy}; use crate::graph::policy::{RetryPolicy, TimeoutPolicy}; use crate::graph::{Graph, GraphEdge, GraphNode}; @@ -58,7 +53,7 @@ pub(crate) fn compile( /// for cycles. fn build_petgraph(graph: &Graph) -> Result> { let mut pg = DiGraph::with_capacity(graph.nodes.len(), graph.edges.len()); - let mut index_map = HashMap::with_capacity(graph.nodes.len()); + let mut index_map = std::collections::HashMap::with_capacity(graph.nodes.len()); for node in &graph.nodes { let idx = pg.add_node(node.clone()); @@ -80,25 +75,19 @@ fn build_petgraph(graph: &Graph) -> Result> { /// A compiled execution plan ready for the executor. /// -/// Contains all nodes in topological order, edges with channel configuration, -/// phase groupings, and pre-computed root/leaf indices. Constructed only via -/// [`compile()`]. +/// Contains all nodes in topological order and edges with channel +/// configuration. Constructed only via [`compile()`]. pub struct ExecutionPlan { nodes: Vec, edges: Vec, - index_map: HashMap, - phases: Vec, - roots: Vec, - leaves: Vec, } impl ExecutionPlan { /// Builds an execution plan from a petgraph and its topological ordering. fn from_graph(pg: &DiGraph, topo: &[NodeIndex]) -> Self { - let mut index_map = HashMap::with_capacity(topo.len()); let mut nodes = Vec::with_capacity(topo.len()); - for (i, &idx) in topo.iter().enumerate() { + for &idx in topo { let graph_node = &pg[idx]; let upstream_ids: Vec = pg .neighbors_directed(idx, petgraph::Direction::Incoming) @@ -112,9 +101,7 @@ impl ExecutionPlan { let compiled_retry = graph_node.retry().map(CompiledRetryPolicy::from); let compiled_timeout = graph_node.timeout().map(CompiledTimeoutPolicy::from); - index_map.insert(graph_node.id, i); nodes.push(ResolvedNode { - phase: graph_node.kind.phase(), node: graph_node.clone(), upstream_ids, downstream_ids, @@ -135,56 +122,7 @@ impl ExecutionPlan { }) .collect(); - let roots: Vec = nodes - .iter() - .enumerate() - .filter(|(_, n)| n.upstream_ids.is_empty()) - .map(|(i, _)| i) - .collect(); - - let leaves: Vec = nodes - .iter() - .enumerate() - .filter(|(_, n)| n.downstream_ids.is_empty()) - .map(|(i, _)| i) - .collect(); - - let mut phase_map: HashMap> = HashMap::new(); - for (i, node) in nodes.iter().enumerate() { - phase_map.entry(node.phase).or_default().push(i); - } - let mut phases: Vec = phase_map - .into_iter() - .map(|(phase, node_indices)| PhaseGroup { - phase, - node_indices, - }) - .collect(); - phases.sort_by_key(|g| g.phase); - - Self { - nodes, - edges, - index_map, - phases, - roots, - leaves, - } - } - - /// Number of nodes in the plan. - pub fn len(&self) -> usize { - self.nodes.len() - } - - /// Returns `true` if the plan contains no nodes. - pub fn is_empty(&self) -> bool { - self.nodes.is_empty() - } - - /// Number of edges in the plan. - pub fn edge_count(&self) -> usize { - self.edges.len() + Self { nodes, edges } } /// All nodes in topological order. @@ -196,41 +134,6 @@ impl ExecutionPlan { pub fn edges(&self) -> &[ResolvedEdge] { &self.edges } - - /// Look up a node by its UUID in O(1). - pub fn node_by_id(&self, id: Uuid) -> Option<&ResolvedNode> { - self.index_map.get(&id).map(|&i| &self.nodes[i]) - } - - /// Returns the topological index for a node UUID. - pub fn index_of(&self, id: Uuid) -> Option { - self.index_map.get(&id).copied() - } - - /// Indices of root nodes (no upstream dependencies). - pub fn roots(&self) -> &[usize] { - &self.roots - } - - /// Indices of leaf nodes (no downstream dependents). - pub fn leaves(&self) -> &[usize] { - &self.leaves - } - - /// Phase groups sorted by phase number, containing only occupied phases. - pub fn phases(&self) -> &[PhaseGroup] { - &self.phases - } - - /// Iterator over edges originating from the given node. - pub fn outgoing_edges(&self, id: Uuid) -> impl Iterator { - self.edges.iter().filter(move |e| e.source == id) - } - - /// Iterator over edges targeting the given node. - pub fn incoming_edges(&self, id: Uuid) -> impl Iterator { - self.edges.iter().filter(move |e| e.target == id) - } } impl std::fmt::Debug for ExecutionPlan { @@ -238,9 +141,6 @@ impl std::fmt::Debug for ExecutionPlan { f.debug_struct("ExecutionPlan") .field("nodes", &self.nodes.len()) .field("edges", &self.edges.len()) - .field("phases", &self.phases.len()) - .field("roots", &self.roots) - .field("leaves", &self.leaves) .finish() } } diff --git a/crates/nvisy-engine/src/pipeline/plan/node.rs b/crates/nvisy-engine/src/pipeline/plan/node.rs index 72201fbb..51368aa4 100644 --- a/crates/nvisy-engine/src/pipeline/plan/node.rs +++ b/crates/nvisy-engine/src/pipeline/plan/node.rs @@ -12,8 +12,6 @@ use crate::pipeline::policy::{CompiledRetryPolicy, CompiledTimeoutPolicy}; pub struct ResolvedNode { /// The original graph node definition. pub node: GraphNode, - /// Pipeline phase for this node (derived from the node kind). - pub phase: u8, /// IDs of nodes that feed data into this node. pub upstream_ids: Vec, /// IDs of nodes that receive data from this node. diff --git a/crates/nvisy-engine/src/pipeline/plan/phase.rs b/crates/nvisy-engine/src/pipeline/plan/phase.rs deleted file mode 100644 index a2b0de72..00000000 --- a/crates/nvisy-engine/src/pipeline/plan/phase.rs +++ /dev/null @@ -1,10 +0,0 @@ -//! Phase grouping for the compiled execution plan. - -/// A group of node indices that share the same pipeline phase. -#[derive(Debug, Clone)] -pub struct PhaseGroup { - /// The pipeline phase number (0–5). - pub phase: u8, - /// Indices into `ExecutionPlan::nodes()` for nodes in this phase. - pub node_indices: Vec, -} diff --git a/crates/nvisy-engine/src/pipeline/runs/mod.rs b/crates/nvisy-engine/src/pipeline/runs/mod.rs index 271531c3..bf7520b7 100644 --- a/crates/nvisy-engine/src/pipeline/runs/mod.rs +++ b/crates/nvisy-engine/src/pipeline/runs/mod.rs @@ -113,7 +113,7 @@ pub struct RunFilter { /// /// Runs are created internally by [`Engine::run()`](super::Engine::run). /// External callers can inspect and cancel runs through this trait. -pub trait Runs: Send + Sync { +pub trait EngineRuns: Send + Sync { /// Get a full snapshot of a single run. fn get_run(&self, id: Uuid) -> impl Future> + Send; diff --git a/crates/nvisy-server/src/handler/check.rs b/crates/nvisy-server/src/handler/check.rs index df3fde76..a346f6ff 100644 --- a/crates/nvisy-server/src/handler/check.rs +++ b/crates/nvisy-server/src/handler/check.rs @@ -5,13 +5,14 @@ //! | Method | Path | Description | //! |--------|-----------------------|--------------------------------------| //! | `GET` | `/health` | Liveness probe (`{"status": "ok"}`) | -//! | `GET` | `/api/v1/analytics` | Aggregate pipeline metrics (stub) | +//! | `GET` | `/api/v1/analytics` | Aggregate pipeline metrics | use aide::axum::ApiRouter; use aide::axum::routing::get_with; use aide::transform::TransformOperation; +use axum::extract::State; +use nvisy_engine::{DefaultEngine, EngineAnalytics}; -use super::error::{ErrorKind, Result}; use super::response::{Analytics, Health, ServiceStatus}; use crate::extract::Json; use crate::service::ServiceState; @@ -37,8 +38,14 @@ fn health_docs(op: TransformOperation) -> TransformOperation { /// `GET /api/v1/analytics`: retrieve aggregate pipeline analytics. #[tracing::instrument(target = "nvisy_server::check", skip_all)] -async fn analytics() -> Result> { - Err(ErrorKind::NotImplemented.with_message("analytics endpoint not yet implemented")) +async fn analytics(State(engine): State) -> Json { + let snapshot = engine.snapshot().await; + Json(Analytics { + timestamp: snapshot.timestamp, + total_runs: snapshot.total_runs, + total_entities_detected: snapshot.total_entities_detected, + total_redactions_applied: snapshot.total_redactions_applied, + }) } fn analytics_docs(op: TransformOperation) -> TransformOperation { diff --git a/crates/nvisy-server/src/handler/runs.rs b/crates/nvisy-server/src/handler/runs.rs index eebe9141..b1ac4039 100644 --- a/crates/nvisy-server/src/handler/runs.rs +++ b/crates/nvisy-server/src/handler/runs.rs @@ -15,7 +15,7 @@ use aide::axum::routing::{get_with, post_with}; use aide::transform::TransformOperation; use axum::extract::{Query, State}; use axum::http::StatusCode; -use nvisy_engine::{DefaultEngine, Engine, EngineInput, RunFilter, Runs}; +use nvisy_engine::{DefaultEngine, Engine, EngineInput, EngineRuns, RunFilter}; use super::error::{ErrorKind, Result}; use super::request::{NewRun, RunPath}; From 9b5b46426a055a1fbe49799edb42227deb9375d8 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 16 Mar 2026 03:27:52 +0100 Subject: [PATCH 6/6] refactor(engine, server): move Registry into DefaultEngine, internalize policy extraction Make Registry a required constructor parameter for DefaultEngine instead of an optional field set externally. The engine now owns all its dependencies and auto-extracts retry/timeout policies from RuntimeConfig in with_config(), removing that responsibility from the server. Co-Authored-By: Claude Opus 4.6 --- crates/nvisy-engine/src/pipeline/default.rs | 66 +++++++++++++------- crates/nvisy-engine/src/pipeline/executor.rs | 5 +- crates/nvisy-server/src/service/mod.rs | 56 ++++++----------- 3 files changed, 66 insertions(+), 61 deletions(-) diff --git a/crates/nvisy-engine/src/pipeline/default.rs b/crates/nvisy-engine/src/pipeline/default.rs index 806347e9..9711eb9f 100644 --- a/crates/nvisy-engine/src/pipeline/default.rs +++ b/crates/nvisy-engine/src/pipeline/default.rs @@ -16,6 +16,7 @@ use jiff::Timestamp; use nvisy_core::Error; use nvisy_core::content::ContentSource; use nvisy_http::HttpClient; +use nvisy_registry::Registry; use tokio::sync::{RwLock, mpsc, watch}; use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; @@ -25,7 +26,9 @@ use super::analytics::{AnalyticsSnapshot, EngineAnalytics}; use super::config::RuntimeConfig; use super::executor::{NodeExecutor, NodeOutput, RunOutput}; use super::plan::{self, ExecutionPlan}; -use super::runs::{EngineRuns, NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary}; +use super::runs::{ + EngineRuns, NodeSnapshot, NodeStatus, RunFilter, RunSnapshot, RunStatus, RunSummary, +}; use super::{Engine, EngineInput, EngineOutput}; use crate::graph::policy::{RetryPolicy, TimeoutPolicy}; use crate::operation::{DocumentEnvelope, SharedContext}; @@ -72,6 +75,7 @@ impl Clone for DefaultEngineInner { default_retry: self.default_retry.clone(), default_timeout: self.default_timeout.clone(), http_client: self.http_client.clone(), + registry: self.registry.clone(), runs: RwLock::new(HashMap::new()), } } @@ -87,26 +91,16 @@ struct DefaultEngineInner { default_timeout: Option, /// Shared HTTP client for downstream providers. http_client: HttpClient, + /// Content and context storage. + registry: Registry, /// All tracked runs keyed by their UUID. runs: RwLock>, } -impl Default for DefaultEngineInner { - fn default() -> Self { - Self { - config: RuntimeConfig::default(), - default_retry: None, - default_timeout: None, - http_client: HttpClient::default(), - runs: RwLock::new(HashMap::new()), - } - } -} - /// Default [`Engine`] implementation. /// -/// Wraps policies in an `Arc` so cloning is cheap. -#[derive(Clone, Default)] +/// Wraps state in an `Arc` so cloning is cheap. +#[derive(Clone)] pub struct DefaultEngine { inner: Arc, } @@ -123,24 +117,45 @@ impl std::fmt::Debug for DefaultEngine { } impl DefaultEngine { - /// Create a new engine with no default policies. - pub fn new() -> Self { - Self::default() + /// Create a new engine backed by the given registry. + pub fn new(registry: Registry) -> Self { + Self { + inner: Arc::new(DefaultEngineInner { + config: RuntimeConfig::default(), + default_retry: None, + default_timeout: None, + http_client: HttpClient::default(), + registry, + runs: RwLock::new(HashMap::new()), + }), + } } /// Set the base runtime configuration. + /// + /// Automatically extracts default retry and timeout policies from the + /// `[engine]` section, if present. pub fn with_config(mut self, config: RuntimeConfig) -> Self { - Arc::make_mut(&mut self.inner).config = config; + let inner = Arc::make_mut(&mut self.inner); + if let Some(engine) = &config.engine { + if inner.default_retry.is_none() { + inner.default_retry = engine.retry.clone(); + } + if inner.default_timeout.is_none() { + inner.default_timeout = engine.timeout.clone(); + } + } + inner.config = config; self } - /// Set the default retry policy. + /// Override the default retry policy. pub fn with_retry(mut self, policy: RetryPolicy) -> Self { Arc::make_mut(&mut self.inner).default_retry = Some(policy); self } - /// Set the default timeout policy. + /// Override the default timeout policy. pub fn with_timeout(mut self, policy: TimeoutPolicy) -> Self { Arc::make_mut(&mut self.inner).default_timeout = Some(policy); self @@ -162,6 +177,11 @@ impl DefaultEngine { &self.inner.http_client } + /// Returns the content and context registry. + pub fn registry(&self) -> &Registry { + &self.inner.registry + } + /// Execute a compiled [`ExecutionPlan`] by spawning concurrent tasks for /// each node. async fn run_graph( @@ -210,7 +230,9 @@ impl DefaultEngine { let _ = rx.wait_for(|&done| done).await; } - let result = executor.execute(&resolved, node_senders, node_receivers).await; + let result = executor + .execute(&resolved, node_senders, node_receivers) + .await; if let Some(tx) = completion_tx { let _ = tx.send(true); diff --git a/crates/nvisy-engine/src/pipeline/executor.rs b/crates/nvisy-engine/src/pipeline/executor.rs index 6f842891..2410c820 100644 --- a/crates/nvisy-engine/src/pipeline/executor.rs +++ b/crates/nvisy-engine/src/pipeline/executor.rs @@ -105,7 +105,10 @@ impl NodeExecutor { match action { GraphNodeKind::Import(_) => self.execute_import(node_id, senders, receivers).await, GraphNodeKind::Export(_) => self.execute_export(node_id, receivers).await, - kind => self.execute_passthrough(node_id, kind, senders, receivers).await, + kind => { + self.execute_passthrough(node_id, kind, senders, receivers) + .await + } } } diff --git a/crates/nvisy-server/src/service/mod.rs b/crates/nvisy-server/src/service/mod.rs index 1c97cf3b..ae1bc901 100644 --- a/crates/nvisy-server/src/service/mod.rs +++ b/crates/nvisy-server/src/service/mod.rs @@ -1,11 +1,11 @@ //! Application state and dependency injection. //! -//! [`ServiceState`] holds shared dependencies (engine, registry) and is -//! threaded through every handler via Axum's `State` extractor. Individual -//! handlers extract only the dependency they need (e.g. `State`) -//! rather than the full state. +//! [`ServiceState`] holds the [`DefaultEngine`] which owns all shared +//! dependencies (registry, HTTP client, policies). Individual handlers +//! extract the dependency they need (e.g. `State`) via +//! `FromRef` implementations that pull from the engine. -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use nvisy_engine::{DefaultEngine, RuntimeConfig}; use nvisy_http::HttpClient; @@ -16,7 +16,6 @@ use nvisy_registry::Registry; #[derive(Clone)] pub struct ServiceState { engine: DefaultEngine, - registry: Registry, } impl ServiceState { @@ -35,46 +34,27 @@ impl ServiceState { .unwrap_or_default(); let http_client = HttpClient::new(&http_config); - let mut engine = DefaultEngine::new() + let engine = DefaultEngine::new(registry) .with_config(config) .with_http_client(http_client); - if let Some(retry) = engine - .config() - .engine - .as_ref() - .and_then(|e| e.retry.clone()) - { - engine = engine.with_retry(retry); - } - if let Some(timeout) = engine - .config() - .engine - .as_ref() - .and_then(|e| e.timeout.clone()) - { - engine = engine.with_timeout(timeout); - } - Ok(Self { engine, registry }) + Ok(Self { engine }) } /// Returns the data directory path from the registry. - pub fn data_dir(&self) -> &std::path::Path { - self.registry.base_dir() + pub fn data_dir(&self) -> &Path { + self.engine.registry().base_dir() } } -macro_rules! impl_di { - ($($f:ident: $t:ty),+ $(,)?) => {$( - impl axum::extract::FromRef for $t { - fn from_ref(state: &ServiceState) -> Self { - state.$f.clone() - } - } - )+}; +impl axum::extract::FromRef for DefaultEngine { + fn from_ref(state: &ServiceState) -> Self { + state.engine.clone() + } } -impl_di!( - engine: DefaultEngine, - registry: Registry, -); +impl axum::extract::FromRef for Registry { + fn from_ref(state: &ServiceState) -> Self { + state.engine.registry().clone() + } +}