diff --git a/libs/@local/hashql/core/src/symbol/sym.rs b/libs/@local/hashql/core/src/symbol/sym.rs index 23a675bb88b..388b5a4c65e 100644 --- a/libs/@local/hashql/core/src/symbol/sym.rs +++ b/libs/@local/hashql/core/src/symbol/sym.rs @@ -9,6 +9,7 @@ hashql_macros::define_symbols! { archived, archived_by_id, bar, + base_url, BaseUrl, bit_and, bit_not, @@ -110,6 +111,7 @@ hashql_macros::define_symbols! { unknown, Url, vectors, + version, web_id, // [tidy] sort alphabetically end diff --git a/libs/@local/hashql/mir/src/body/place.rs b/libs/@local/hashql/mir/src/body/place.rs index 32062a9098a..35ab48adb2e 100644 --- a/libs/@local/hashql/mir/src/body/place.rs +++ b/libs/@local/hashql/mir/src/body/place.rs @@ -447,6 +447,12 @@ pub struct Projection<'heap> { pub kind: ProjectionKind<'heap>, } +impl AsRef for Projection<'_> { + fn as_ref(&self) -> &Self { + self + } +} + /// A projection operation that navigates within structured data. /// /// Projections allow places to reference nested data within structured types. diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 8bd3f25080a..7d0472a7dbd 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -13,8 +13,10 @@ mod island; mod placement; mod splitting; mod statement_placement; +pub mod storage; mod target; mod terminator_placement; +mod vertex; use core::{alloc::Allocator, assert_matches}; @@ -25,6 +27,7 @@ pub use self::{ island::{Island, IslandId, IslandVec}, placement::error::PlacementDiagnosticCategory, target::TargetId, + vertex::VertexType, }; use self::{ fusion::BasicBlockFusion, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs index a99d024999a..06fb609d65e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs @@ -16,6 +16,7 @@ use crate::{ local::Local, location::Location, operand::Operand, + place::Projection, rvalue::RValue, statement::{Assign, Statement, StatementKind}, terminator::TerminatorKind, @@ -29,6 +30,7 @@ use crate::{ execution::{ Cost, cost::{StatementCostVec, TraversalCostVec}, + storage::{Access, EntityPath}, }, }, visit::Visitor, @@ -319,3 +321,17 @@ where Ok(()) } } + +/// Determines which backend can access an entity field projection. +/// +/// Walks the projection path through the entity schema to determine whether the field is stored in +/// Postgres (as a column or JSONB path) or in the embedding store. Returns `None` if the path +/// doesn't map to any supported backend storage. +/// +/// For example: +/// - `entity.properties.foo` → `Some(Access::Postgres(Direct))` (JSONB) +/// - `entity.encodings.vectors` → `Some(Access::Embedding(Direct))` +/// - `entity.metadata.record_id.entity_id.web_id` → `Some(Access::Postgres(Direct))` +pub(crate) fn entity_projection_access(projections: &[Projection<'_>]) -> Option { + EntityPath::resolve(projections).map(|(path, _)| path.access()) +} diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs index 047cd59cfa5..2a5e91af656 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs @@ -1,9 +1,6 @@ use core::alloc::Allocator; -use hashql_core::{ - id::{Id as _, bit_vec::DenseBitSet}, - symbol::sym, -}; +use hashql_core::id::{Id as _, bit_vec::DenseBitSet}; use super::{ StatementPlacement, @@ -14,9 +11,10 @@ use crate::{ context::MirContext, pass::{ execution::{ - Cost, + Cost, VertexType, cost::{StatementCostVec, TraversalCostVec}, - statement_placement::lookup::{Access, entity_projection_access}, + statement_placement::common::entity_projection_access, + storage::Access, }, transform::Traversals, }, @@ -35,22 +33,19 @@ fn is_supported_place<'heap>( // For GraphReadFilter bodies, local 1 is the filter argument (vertex). Check if the // projection path maps to an Embedding-accessible field. if matches!(body.source, Source::GraphReadFilter(_)) && place.local.as_usize() == 1 { - let local_type = body.local_decls[place.local].r#type; - let type_name = context - .env - .r#type(local_type) - .kind - .opaque() - .map_or_else(|| unreachable!(), |opaque| opaque.name); - - if type_name == sym::path::Entity { - return matches!( - entity_projection_access(&place.projections), - Some(Access::Embedding(_)) - ); - } + let decl = &body.local_decls[place.local]; + let Some(vertex_type) = VertexType::from_local(context.env, decl) else { + unimplemented!("lookup for declared type") + }; - unimplemented!("unimplemented lookup for declared type") + match vertex_type { + VertexType::Entity => { + return matches!( + entity_projection_access(&place.projections), + Some(Access::Embedding(_)) + ); + } + } } domain.contains(place.local) diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/entity.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/entity.rs deleted file mode 100644 index 1d194e9d27f..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/entity.rs +++ /dev/null @@ -1,135 +0,0 @@ -use hashql_core::symbol::sym; - -use super::trie::{Access, AccessMode, PathNode}; - -/// Entity path access trie mapping field paths to backend access types. -/// -/// The trie structure mirrors the entity schema, with paths mapping to their storage location: -/// -/// - `properties` → JSONB column in `entity_editions` -/// - `encodings.vectors` → Embedding backend -/// - `metadata.*` → Various columns in `entity_temporal_metadata`, `entity_editions`, etc. -/// - `link_data.*` → `entity_edge` table via joins -/// -/// Entry point is the `entity_temporal_metadata` table which joins to `entity_ids`, -/// `entity_editions`, `entity_is_of_type`, and `entity_edge`. -// The static ref here is required, so that the symbols are not duplicated across crates and have -// the same interned string. -pub(super) static ENTITY_PATHS: PathNode = PathNode::root(&[ - // entity_editions.properties (JSONB) - PathNode::jsonb(sym::properties), - // (tbd) encodings - PathNode::branch( - sym::encodings, - None, - &[ - // Vectors are stored outside the entity inside of an embeddings database - PathNode::branch(sym::vectors, Access::Embedding(AccessMode::Direct), &[]), - ], - ), - PathNode::branch( - sym::metadata, - None, - &[ - // entity_temporal_metadata: web_id, entity_uuid, draft_id, entity_edition_id - PathNode::branch( - sym::record_id, - Access::Postgres(AccessMode::Composite), - &[ - // entity_temporal_metadata: web_id, entity_uuid, draft_id - PathNode::branch( - sym::entity_id, - Access::Postgres(AccessMode::Composite), - &[ - // entity_temporal_metadata.web_id - PathNode::leaf(sym::web_id, Access::Postgres(AccessMode::Direct)), - // entity_temporal_metadata.entity_uuid - PathNode::leaf(sym::entity_uuid, Access::Postgres(AccessMode::Direct)), - // entity_temporal_metadata.draft_id - PathNode::leaf(sym::draft_id, Access::Postgres(AccessMode::Direct)), - ], - ), - // entity_temporal_metadata.entity_edition_id - PathNode::leaf(sym::edition_id, Access::Postgres(AccessMode::Direct)), - ], - ), - // entity_temporal_metadata: decision_time, transaction_time - PathNode::branch( - sym::temporal_versioning, - Access::Postgres(AccessMode::Composite), - &[ - // entity_temporal_metadata.decision_time - PathNode::leaf(sym::decision_time, Access::Postgres(AccessMode::Direct)), - // entity_temporal_metadata.transaction_time - PathNode::leaf(sym::transaction_time, Access::Postgres(AccessMode::Direct)), - ], - ), - // entity_is_of_type (via JOIN) - PathNode::leaf(sym::entity_type_ids, Access::Postgres(AccessMode::Direct)), - // entity_editions.archived - PathNode::leaf(sym::archived, Access::Postgres(AccessMode::Direct)), - // entity_editions.confidence - PathNode::leaf(sym::confidence, Access::Postgres(AccessMode::Direct)), - // spans entity_ids.provenance + entity_editions.provenance - PathNode::branch( - sym::provenance, - None, - &[ - // entity_ids.provenance (JSONB) - PathNode::jsonb(sym::inferred), - // entity_editions.provenance (JSONB) - PathNode::jsonb(sym::edition), - ], - ), - // entity_editions.property_metadata (JSONB) - PathNode::jsonb(sym::properties), - ], - ), - // contains synthesized draft_id fields - PathNode::branch( - sym::link_data, - None, - &[ - // draft_id is synthesized (always None), not stored - PathNode::branch( - sym::left_entity_id, - None, - &[ - // entity_has_left_entity -> entity_edge.target_web_id - PathNode::leaf(sym::web_id, Access::Postgres(AccessMode::Direct)), - // entity_has_left_entity -> entity_edge.target_entity_uuid - PathNode::leaf(sym::entity_uuid, Access::Postgres(AccessMode::Direct)), - // synthesized, not in entity_edge - PathNode::leaf(sym::draft_id, None), - ], - ), - // draft_id is synthesized (always None), not stored - PathNode::branch( - sym::right_entity_id, - None, - &[ - // entity_has_right_entity -> entity_edge.target_web_id - PathNode::leaf(sym::web_id, Access::Postgres(AccessMode::Direct)), - // entity_has_right_entity -> entity_edge.target_entity_uuid - PathNode::leaf(sym::entity_uuid, Access::Postgres(AccessMode::Direct)), - // synthesized, not in entity_edge - PathNode::leaf(sym::draft_id, None), - ], - ), - // entity_edge.confidence (via entity_has_left_entity) - PathNode::leaf( - sym::left_entity_confidence, - Access::Postgres(AccessMode::Direct), - ), - // entity_edge.provenance (JSONB, via entity_has_left_entity) - PathNode::jsonb(sym::left_entity_provenance), - // entity_edge.confidence (via entity_has_right_entity) - PathNode::leaf( - sym::right_entity_confidence, - Access::Postgres(AccessMode::Direct), - ), - // entity_edge.provenance (JSONB, via entity_has_right_entity) - PathNode::jsonb(sym::right_entity_provenance), - ], - ), -]); diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/mod.rs deleted file mode 100644 index e7a24e6b0a7..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/mod.rs +++ /dev/null @@ -1,41 +0,0 @@ -mod entity; -mod trie; - -#[cfg(test)] -mod tests; - -use self::entity::ENTITY_PATHS; -pub(crate) use self::trie::Access; -use crate::body::place::{Projection, ProjectionKind}; - -/// Determines which backend can access an entity field projection. -/// -/// Walks the projection path through the entity schema trie to determine whether the field is -/// stored in Postgres (as a column or JSONB path) or in the embedding store. Returns `None` if -/// the path doesn't map to any supported backend storage. -/// -/// For example: -/// - `entity.properties.foo` → `Some(Access::Postgres(Direct))` (JSONB) -/// - `entity.encodings.vectors` → `Some(Access::Embedding(Direct))` -/// - `entity.metadata.record_id.entity_id.web_id` → `Some(Access::Postgres(Direct))` -pub(crate) fn entity_projection_access(projections: &[Projection<'_>]) -> Option { - let mut node = &ENTITY_PATHS; - - for projection in projections { - if node.children.is_empty() { - return node.otherwise; - } - - let ProjectionKind::FieldByName(name) = projection.kind else { - return node.otherwise; - }; - - let Some(next_node) = node.lookup(name) else { - return node.otherwise; - }; - - node = next_node; - } - - node.access -} diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/tests.rs deleted file mode 100644 index e3a338d5cc5..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/tests.rs +++ /dev/null @@ -1,109 +0,0 @@ -//! Unit tests for entity projection path lookup. - -use hashql_core::{symbol::sym, r#type::TypeId}; - -use super::{ - entity_projection_access, - trie::{Access, AccessMode}, -}; -use crate::body::place::{Projection, ProjectionKind}; - -/// Helper to create a `FieldByName` projection. -fn proj(name: impl Into>) -> Projection<'static> { - Projection { - kind: ProjectionKind::FieldByName(name.into()), - r#type: TypeId::PLACEHOLDER, - } -} - -/// `[.properties]` → `Access::Postgres(Direct)` (JSONB column). -#[test] -fn properties_is_postgres() { - let projections = &[proj(sym::properties)]; - let access = entity_projection_access(projections); - - assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); -} - -/// `[.properties.foo.bar]` → Postgres (JSONB otherwise). -/// -/// JSONB nodes have `otherwise` set, so any sub-path is also Postgres-accessible. -#[test] -fn properties_subpath_is_postgres() { - let projections = &[proj(sym::properties), proj(sym::foo), proj(sym::bar)]; - let access = entity_projection_access(projections); - - assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); -} - -/// `[.encodings.vectors]` → `Access::Embedding(Direct)`. -#[test] -fn vectors_is_embedding() { - let projections = &[proj(sym::encodings), proj(sym::vectors)]; - let access = entity_projection_access(projections); - - assert_eq!(access, Some(Access::Embedding(AccessMode::Direct))); -} - -/// Various metadata paths map to Postgres columns. -#[test] -fn metadata_columns_are_postgres() { - // metadata.archived -> Direct - let projections = &[proj(sym::metadata), proj(sym::archived)]; - assert_eq!( - entity_projection_access(projections), - Some(Access::Postgres(AccessMode::Direct)) - ); - - // metadata.record_id -> Composite - let projections = &[proj(sym::metadata), proj(sym::record_id)]; - assert_eq!( - entity_projection_access(projections), - Some(Access::Postgres(AccessMode::Composite)) - ); - - // metadata.record_id.entity_id.web_id -> Direct - let projections = &[ - proj(sym::metadata), - proj(sym::record_id), - proj(sym::entity_id), - proj(sym::web_id), - ]; - assert_eq!( - entity_projection_access(projections), - Some(Access::Postgres(AccessMode::Direct)) - ); - - // metadata.temporal_versioning.decision_time -> Direct - let projections = &[ - proj(sym::metadata), - proj(sym::temporal_versioning), - proj(sym::decision_time), - ]; - assert_eq!( - entity_projection_access(projections), - Some(Access::Postgres(AccessMode::Direct)) - ); -} - -/// `link_data.left_entity_id.draft_id` → `None` (synthesized, not stored). -#[test] -fn link_data_synthesized_is_none() { - let projections = &[ - proj(sym::link_data), - proj(sym::left_entity_id), - proj(sym::draft_id), - ]; - let access = entity_projection_access(projections); - - assert_eq!(access, None); -} - -/// Invalid path like `[.unknown]` → `None`. -#[test] -fn unknown_path_returns_none() { - let projections = &[proj(sym::unknown)]; - let access = entity_projection_access(projections); - - assert_eq!(access, None); -} diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/trie.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/trie.rs deleted file mode 100644 index f7f56e2f2f5..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/trie.rs +++ /dev/null @@ -1,82 +0,0 @@ -use hashql_core::symbol::{Symbol, sym}; - -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub(crate) enum AccessMode { - Direct, - Composite, -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub(crate) enum Access { - Postgres(AccessMode), - Embedding(AccessMode), -} - -/// A node in the path access trie. -/// -/// Each node represents a field in a path hierarchy and defines: -/// - The field name this node matches (`name`). -/// - What access applies when the path ends at this node (`access`). -/// - What access applies for unknown/deeper paths (`otherwise`). -/// - What children exist for further path traversal. -#[derive(Debug, Copy, Clone)] -pub(crate) struct PathNode { - /// Field name this node matches (empty string for root). - pub name: Symbol<'static>, - /// Access level when the path ends at this node (no more projections). - pub access: Option, - /// Access level for paths beyond known children (e.g., JSONB allows any sub-path). - pub otherwise: Option, - /// Child nodes. - pub children: &'static [Self], -} - -impl PathNode { - pub(crate) const fn root(children: &'static [Self]) -> Self { - Self { - name: sym::entity, - access: None, - otherwise: None, - children, - } - } - - pub(crate) const fn leaf( - name: Symbol<'static>, - access: impl [const] Into>, - ) -> Self { - Self { - name, - access: access.into(), - otherwise: None, - children: &[], - } - } - - /// Creates a JSONB node where any sub-path is also Postgres-accessible. - pub(crate) const fn jsonb(name: Symbol<'static>) -> Self { - Self { - name, - access: Some(Access::Postgres(AccessMode::Direct)), - otherwise: Some(Access::Postgres(AccessMode::Direct)), - children: &[], - } - } - - pub(crate) const fn branch( - name: Symbol<'static>, - access: impl [const] Into>, - children: &'static [Self], - ) -> Self { - Self { - name, - access: access.into(), - otherwise: None, - children, - } - } - - pub(crate) fn lookup(&self, name: Symbol<'_>) -> Option<&Self> { - self.children.iter().find(|node| node.name == name) - } -} diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs index 326e3d5d396..d39c0c77200 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs @@ -17,7 +17,6 @@ mod tests; mod common; mod embedding; mod interpret; -mod lookup; mod postgres; pub(crate) use self::{ diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs index ac99b437435..2a34f13341e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs @@ -4,7 +4,6 @@ use core::{alloc::Allocator, ops::ControlFlow}; use hashql_core::{ debug_panic, id::{Id as _, bit_vec::DenseBitSet}, - symbol::sym, sync::lock::LocalLock, r#type::{ self, RecursionBoundary, Type, TypeId, @@ -30,8 +29,10 @@ use crate::{ context::MirContext, pass::{ execution::{ + VertexType, cost::{Cost, StatementCostVec, TraversalCostVec}, - statement_placement::lookup::{Access, entity_projection_access}, + statement_placement::common::entity_projection_access, + storage::Access, }, transform::Traversals, }, @@ -372,22 +373,17 @@ impl<'heap, A: Allocator> PostgresSupported<'_, 'heap, A> { Some(self.env_domain.contains(field)) } Local::VERTEX => { - let local_type = body.local_decls[place.local].r#type; - let type_name = context - .env - .r#type(local_type) - .kind - .opaque() - .map_or_else(|| unreachable!(), |opaque| opaque.name); - - if type_name == sym::path::Entity { - return Some(matches!( + let decl = &body.local_decls[place.local]; + let Some(vertex_type) = VertexType::from_local(context.env, decl) else { + unimplemented!("lookup for declared type") + }; + + match vertex_type { + VertexType::Entity => Some(matches!( entity_projection_access(&place.projections), Some(Access::Postgres(_)) - )); + )), } - - unimplemented!("unimplemented lookup for declared type") } _ => None, } diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/access.rs b/libs/@local/hashql/mir/src/pass/execution/storage/access.rs new file mode 100644 index 00000000000..581408bf766 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/storage/access.rs @@ -0,0 +1,11 @@ +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub(crate) enum AccessMode { + Direct, + Composite, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub(crate) enum Access { + Postgres(AccessMode), + Embedding(AccessMode), +} diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs new file mode 100644 index 00000000000..c227f170a40 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs @@ -0,0 +1,264 @@ +use hashql_core::{ + id::{Id, bit_vec::FiniteBitSet}, + symbol::{ConstantSymbol, sym}, +}; + +use super::access::{Access, AccessMode}; +use crate::body::place::{Projection, ProjectionKind}; + +macro_rules! sym { + ($($sym:tt)::*) => { + sym::$($sym)::*::CONST + }; +} + +/// Resolved entity field path. +/// +/// Each variant identifies a specific storage location in the entity schema. Consumers can +/// exhaustively match on this to generate backend-specific access (SQL expressions, placement +/// decisions, etc.) without duplicating path resolution logic. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Id)] +pub enum EntityPath { + /// `properties.*` — JSONB column in `entity_editions`. + Properties, + /// `encodings.vectors` — embedding backend. + Vectors, + + /// `metadata.record_id` — composite of [`EntityId`] + [`EditionId`]. + /// + /// [`EntityId`]: Self::EntityId + /// [`EditionId`]: Self::EditionId + RecordId, + /// `metadata.record_id.entity_id` — composite of `web_id` + `entity_uuid` + `draft_id`. + EntityId, + /// `metadata.record_id.entity_id.web_id` — `entity_temporal_metadata.web_id`. + WebId, + /// `metadata.record_id.entity_id.entity_uuid` — `entity_temporal_metadata.entity_uuid`. + EntityUuid, + /// `metadata.record_id.entity_id.draft_id` — `entity_temporal_metadata.draft_id`. + DraftId, + /// `metadata.record_id.edition_id` — `entity_temporal_metadata.entity_edition_id`. + EditionId, + + /// `metadata.temporal_versioning` — composite of [`DecisionTime`] + [`TransactionTime`]. + /// + /// [`DecisionTime`]: Self::DecisionTime + /// [`TransactionTime`]: Self::TransactionTime + TemporalVersioning, + /// `metadata.temporal_versioning.decision_time` — `entity_temporal_metadata.decision_time`. + DecisionTime, + /// `metadata.temporal_versioning.transaction_time` — + /// `entity_temporal_metadata.transaction_time`. + TransactionTime, + + /// `metadata.entity_type_ids` — `entity_is_of_type` table (via JOIN). + EntityTypeIds, + /// `metadata.archived` — `entity_editions.archived`. + Archived, + /// `metadata.confidence` — `entity_editions.confidence`. + Confidence, + + /// `metadata.provenance.inferred` — JSONB in `entity_ids.provenance`. + ProvenanceInferred, + /// `metadata.provenance.edition` — JSONB in `entity_editions.provenance`. + ProvenanceEdition, + /// `metadata.properties.*` — JSONB (`property_metadata`) in `entity_editions`. + PropertyMetadata, + + /// `link_data.left_entity_id.web_id` — `entity_edge.target_web_id` (via + /// `entity_has_left_entity`). + LeftEntityWebId, + /// `link_data.left_entity_id.entity_uuid` — `entity_edge.target_entity_uuid` (via + /// `entity_has_left_entity`). + LeftEntityUuid, + /// `link_data.right_entity_id.web_id` — `entity_edge.target_web_id` (via + /// `entity_has_right_entity`). + RightEntityWebId, + /// `link_data.right_entity_id.entity_uuid` — `entity_edge.target_entity_uuid` (via + /// `entity_has_right_entity`). + RightEntityUuid, + /// `link_data.left_entity_confidence` — `entity_edge.confidence` (via + /// `entity_has_left_entity`). + LeftEntityConfidence, + /// `link_data.right_entity_confidence` — `entity_edge.confidence` (via + /// `entity_has_right_entity`). + RightEntityConfidence, + /// `link_data.left_entity_provenance` — JSONB in `entity_edge.provenance` (via + /// `entity_has_left_entity`). + LeftEntityProvenance, + /// `link_data.right_entity_provenance` — JSONB in `entity_edge.provenance` (via + /// `entity_has_right_entity`). + RightEntityProvenance, +} + +type FiniteBitSetWidth = u32; +const _: () = { + assert!( + (FiniteBitSetWidth::BITS as usize) >= core::mem::variant_count::(), + "entity path count exceeds finite bitset width" + ); +}; + +pub type EntityPathBitSet = FiniteBitSet; + +impl EntityPath { + #[must_use] + pub fn resolve(projections: &[Projection<'_>]) -> Option<(Self, usize)> { + resolve(projections) + } + + /// Returns the backend access mode for this path. + pub(crate) const fn access(self) -> Access { + match self { + Self::Vectors => Access::Embedding(AccessMode::Direct), + + Self::RecordId | Self::EntityId | Self::TemporalVersioning => { + Access::Postgres(AccessMode::Composite) + } + + Self::Properties + | Self::WebId + | Self::EntityUuid + | Self::DraftId + | Self::EditionId + | Self::DecisionTime + | Self::TransactionTime + | Self::EntityTypeIds + | Self::Archived + | Self::Confidence + | Self::ProvenanceInferred + | Self::ProvenanceEdition + | Self::PropertyMetadata + | Self::LeftEntityWebId + | Self::LeftEntityUuid + | Self::RightEntityWebId + | Self::RightEntityUuid + | Self::LeftEntityConfidence + | Self::RightEntityConfidence + | Self::LeftEntityProvenance + | Self::RightEntityProvenance => Access::Postgres(AccessMode::Direct), + } + } + + const fn is_jsonb(self) -> bool { + matches!( + self, + Self::Properties + | Self::ProvenanceInferred + | Self::ProvenanceEdition + | Self::PropertyMetadata + | Self::LeftEntityProvenance + | Self::RightEntityProvenance + ) + } +} + +#[inline] +fn project(projections: &[Projection<'_>], index: &mut usize) -> Option { + let projection = projections.get(*index).and_then(|projection| { + if let ProjectionKind::FieldByName(name) = projection.kind { + name.as_constant() + } else { + None + } + }); + + if projection.is_some() { + *index += 1; + } + + projection +} + +/// Resolves an entity field path to an [`EntityPath`]. +/// +/// Walks a sequence of field name projections through the entity schema and returns the resolved +/// path, or `None` if the path doesn't map to any known storage location (including synthesized +/// fields like `link_data.*.draft_id`). +#[expect(clippy::match_same_arms, clippy::allow_attributes)] +fn resolve(projections: &[Projection<'_>]) -> Option<(EntityPath, usize)> { + #[allow(clippy::enum_glob_use, reason = "clarity")] + use EntityPath::*; + + let mut index = 0; + + macro_rules! next { + () => { + project(projections, &mut index) + }; + + (else $cond:expr) => {{ + if index >= projections.len() { + return Some(($cond, index)); + } + + next!()? + }}; + } + + let path = match next!()? { + // entity_editions.properties (JSONB) + sym!(properties) => Properties, + sym!(encodings) => match next!()? { + sym!(vectors) => Vectors, + _ => return None, + }, + sym!(metadata) => match next!()? { + sym!(record_id) => match next!(else RecordId) { + sym!(entity_id) => match next!(else EntityId) { + sym!(web_id) => WebId, + sym!(entity_uuid) => EntityUuid, + sym!(draft_id) => DraftId, + _ => return None, + }, + sym!(edition_id) => EditionId, + _ => return None, + }, + sym!(temporal_versioning) => match next!(else TemporalVersioning) { + sym!(decision_time) => DecisionTime, + sym!(transaction_time) => TransactionTime, + _ => return None, + }, + sym!(entity_type_ids) => EntityTypeIds, + sym!(archived) => Archived, + sym!(confidence) => Confidence, + sym!(provenance) => match next!()? { + sym!(inferred) => ProvenanceInferred, + sym!(edition) => ProvenanceEdition, + _ => return None, + }, + sym!(properties) => PropertyMetadata, + _ => return None, + }, + sym!(link_data) => match next!()? { + sym!(left_entity_id) => match next!()? { + sym!(web_id) => LeftEntityWebId, + sym!(entity_uuid) => LeftEntityUuid, + // draft_id is synthesized (always None), not stored + sym!(draft_id) => return None, + _ => return None, + }, + sym!(right_entity_id) => match next!()? { + sym!(web_id) => RightEntityWebId, + sym!(entity_uuid) => RightEntityUuid, + // draft_id is synthesized (always None), not stored + sym!(draft_id) => return None, + _ => return None, + }, + sym!(left_entity_confidence) => LeftEntityConfidence, + sym!(right_entity_confidence) => RightEntityConfidence, + sym!(left_entity_provenance) => LeftEntityProvenance, + sym!(right_entity_provenance) => RightEntityProvenance, + _ => return None, + }, + + _ => return None, + }; + + // JSONB paths allow arbitrary sub-paths; all others must be fully resolved + if !path.is_jsonb() && projections.get(index).is_some() { + return None; + } + + Some((path, index)) +} diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs b/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs new file mode 100644 index 00000000000..b7caca00e8a --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs @@ -0,0 +1,9 @@ +mod access; +mod entity; + +#[cfg(test)] +mod tests; + +pub use entity::{EntityPath, EntityPathBitSet}; + +pub(crate) use self::access::Access; diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs b/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs new file mode 100644 index 00000000000..5dda38abefd --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs @@ -0,0 +1,238 @@ +//! Unit tests for entity projection path lookup. + +use hashql_core::{symbol::sym, r#type::TypeId}; + +use super::access::{Access, AccessMode}; +use crate::{ + body::{ + local::Local, + place::{Projection, ProjectionKind}, + }, + pass::execution::storage::EntityPath, +}; + +/// Helper to create a `FieldByName` projection. +fn proj(name: impl Into>) -> Projection<'static> { + Projection { + kind: ProjectionKind::FieldByName(name.into()), + r#type: TypeId::PLACEHOLDER, + } +} + +/// `[.properties]` → `Access::Postgres(Direct)` (JSONB column). +#[test] +fn properties_is_postgres() { + let projections = &[proj(sym::properties)]; + let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); + + assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); +} + +/// `[.properties.foo.bar]` → Postgres (JSONB otherwise). +/// +/// JSONB nodes have `otherwise` set, so any sub-path is also Postgres-accessible. +#[test] +fn properties_subpath_is_postgres() { + let projections = &[proj(sym::properties), proj(sym::foo), proj(sym::bar)]; + let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); + + assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); +} + +/// `[.encodings.vectors]` → `Access::Embedding(Direct)`. +#[test] +fn vectors_is_embedding() { + let projections = &[proj(sym::encodings), proj(sym::vectors)]; + let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); + + assert_eq!(access, Some(Access::Embedding(AccessMode::Direct))); +} + +/// Various metadata paths map to Postgres columns. +#[test] +fn metadata_columns_are_postgres() { + // metadata.archived -> Direct + let projections = &[proj(sym::metadata), proj(sym::archived)]; + assert_eq!( + EntityPath::resolve(projections).map(|(path, _)| path.access()), + Some(Access::Postgres(AccessMode::Direct)) + ); + + // metadata.record_id -> Composite + let projections = &[proj(sym::metadata), proj(sym::record_id)]; + assert_eq!( + EntityPath::resolve(projections).map(|(path, _)| path.access()), + Some(Access::Postgres(AccessMode::Composite)) + ); + + // metadata.record_id.entity_id.web_id -> Direct + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + proj(sym::web_id), + ]; + assert_eq!( + EntityPath::resolve(projections).map(|(path, _)| path.access()), + Some(Access::Postgres(AccessMode::Direct)) + ); + + // metadata.temporal_versioning.decision_time -> Direct + let projections = &[ + proj(sym::metadata), + proj(sym::temporal_versioning), + proj(sym::decision_time), + ]; + assert_eq!( + EntityPath::resolve(projections).map(|(path, _)| path.access()), + Some(Access::Postgres(AccessMode::Direct)) + ); +} + +/// `link_data.left_entity_id.draft_id` → `None` (synthesized, not stored). +#[test] +fn link_data_synthesized_is_none() { + let projections = &[ + proj(sym::link_data), + proj(sym::left_entity_id), + proj(sym::draft_id), + ]; + let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); + + assert_eq!(access, None); +} + +/// Invalid path like `[.unknown]` → `None`. +#[test] +fn unknown_path_returns_none() { + let projections = &[proj(sym::unknown)]; + let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); + + assert_eq!(access, None); +} + +/// The returned index reflects how many projections were consumed during resolution. +#[test] +fn index_counts_consumed_projections() { + // Single-segment: `.properties` consumes 1 + let projections = &[proj(sym::properties)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::Properties, 1)) + ); + + // Two segments: `.encodings.vectors` consumes 2 + let projections = &[proj(sym::encodings), proj(sym::vectors)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::Vectors, 2)) + ); + + // Three segments: `.metadata.provenance.inferred` consumes 3 + let projections = &[ + proj(sym::metadata), + proj(sym::provenance), + proj(sym::inferred), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::ProvenanceInferred, 3)) + ); + + // Four segments: `.metadata.record_id.entity_id.web_id` consumes 4 + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + proj(sym::web_id), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::WebId, 4)) + ); +} + +/// Composite paths that stop early via `next!(else ...)` return the correct index. +#[test] +fn index_for_composite_early_exit() { + // `.metadata.record_id` with no further projections → RecordId at index 2 + let projections = &[proj(sym::metadata), proj(sym::record_id)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::RecordId, 2)) + ); + + // `.metadata.record_id.entity_id` without a leaf → EntityId at index 3 + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::EntityId, 3)) + ); + + // `.metadata.temporal_versioning` without a leaf → TemporalVersioning at index 2 + let projections = &[proj(sym::metadata), proj(sym::temporal_versioning)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::TemporalVersioning, 2)) + ); +} + +/// A non-FieldByName projection (e.g. `Index`) after a composite node must return `None`, not +/// the composite path. Previously the `next!(else ...)` macro conflated "no more projections" with +/// "non-FieldByName projection", bypassing the exhaustion guard. +#[test] +fn non_field_projection_after_composite_returns_none() { + let index_projection = Projection { + kind: ProjectionKind::Index(Local::new(0)), + r#type: TypeId::PLACEHOLDER, + }; + + // `.metadata.record_id` followed by an index projection: not a valid entity path + let projections = &[proj(sym::metadata), proj(sym::record_id), index_projection]; + assert_eq!(EntityPath::resolve(projections), None); + + // `.metadata.record_id.entity_id` followed by an index projection + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + index_projection, + ]; + assert_eq!(EntityPath::resolve(projections), None); + + // `.metadata.temporal_versioning` followed by an index projection + let projections = &[ + proj(sym::metadata), + proj(sym::temporal_versioning), + index_projection, + ]; + assert_eq!(EntityPath::resolve(projections), None); +} + +/// JSONB paths stop consuming at the storage boundary; sub-path projections are excess. +#[test] +fn jsonb_index_excludes_subpath() { + // `.properties.foo.bar` → Properties at index 1, leaving 2 excess projections + let projections = &[proj(sym::properties), proj(sym::foo), proj(sym::bar)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::Properties, 1)) + ); + + // `.metadata.provenance.inferred.foo.bar` → ProvenanceInferred at index 3 + let projections = &[ + proj(sym::metadata), + proj(sym::provenance), + proj(sym::inferred), + proj(sym::foo), + proj(sym::bar), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::ProvenanceInferred, 3)) + ); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/vertex.rs b/libs/@local/hashql/mir/src/pass/execution/vertex.rs new file mode 100644 index 00000000000..b0d8441da76 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/vertex.rs @@ -0,0 +1,68 @@ +use hashql_core::{ + debug_panic, + symbol::sym, + r#type::{ + TypeId, + environment::Environment, + kind::{OpaqueType, TypeKind}, + }, +}; + +use crate::body::local::LocalDecl; + +fn peel<'heap>( + env: &Environment<'heap>, + id: TypeId, + depth: usize, +) -> Option<&'heap OpaqueType<'heap>> { + let r#type = env.r#type(id); + + // We don't need a sophisticated cycle detection algorithm here, the only reason a cycle could + // occur here is if apply and generic substitutions are the only members in a cycle, haven't + // been resolved and simplified away. Which should've created a type error earlier anyway. + if depth > 32 { + debug_panic!("maximum opaque type recursion depth exceeded"); + + return None; + } + + match r#type.kind { + TypeKind::Opaque(opaque_type) => Some(opaque_type), + TypeKind::Apply(apply) => peel(env, apply.base, depth + 1), + TypeKind::Generic(generic) => peel(env, generic.base, depth + 1), + TypeKind::Primitive(_) + | TypeKind::Intrinsic(_) + | TypeKind::Struct(_) + | TypeKind::Tuple(_) + | TypeKind::Union(_) + | TypeKind::Intersection(_) + | TypeKind::Closure(_) + | TypeKind::Never + | TypeKind::Unknown + | TypeKind::Param(_) + | TypeKind::Infer(_) => None, + } +} + +/// The vertex type of a [`GraphReadFilter`] body's vertex argument. +/// +/// [`GraphReadFilter`]: crate::body::Source::GraphReadFilter +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum VertexType { + Entity, +} + +impl VertexType { + /// Classifies a local declaration as a vertex type based on its opaque type name. + /// + /// Returns `None` if the declaration's type is not an opaque type or not a recognized vertex + /// type. + pub fn from_local(env: &Environment<'_>, decl: &LocalDecl<'_>) -> Option { + let opaque = peel(env, decl.r#type, 0)?; + + match opaque.name.as_constant()? { + sym::path::Entity::CONST => Some(Self::Entity), + _ => None, + } + } +}