From 9dcc6d866a2208fa6847fdd38b2197476c35b84b Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sat, 28 Feb 2026 17:15:32 +0100 Subject: [PATCH 01/32] feat: checkpoint feat: checkpoint (II) feat: checkpoint (III) feat: snapshot vec feat: add dedicated filter feat: checkpoint feat: filter implementation feat: filter implementation (mostly) done chore: environment capture note chore: always postgres bigint feat: target clone feat: simplify lookup feat: move storage up feat: eval entity path chore: checkpoint chore: checkpoint chore: find entrypoint feat: eval context feat: eval cleanup chore: cleanup feat: track index feat: wire up filter feat: add error reporting chore: checkpoint --- libs/@local/hashql/core/src/symbol/sym.rs | 2 + libs/@local/hashql/mir/src/body/place.rs | 6 + .../hashql/mir/src/pass/execution/mod.rs | 3 + .../execution/statement_placement/common.rs | 16 ++ .../statement_placement/embedding/mod.rs | 37 ++- .../statement_placement/lookup/entity.rs | 135 ---------- .../statement_placement/lookup/mod.rs | 41 --- .../statement_placement/lookup/tests.rs | 109 -------- .../statement_placement/lookup/trie.rs | 82 ------ .../pass/execution/statement_placement/mod.rs | 1 - .../statement_placement/postgres/mod.rs | 26 +- .../mir/src/pass/execution/storage/access.rs | 11 + .../mir/src/pass/execution/storage/entity.rs | 251 ++++++++++++++++++ .../mir/src/pass/execution/storage/mod.rs | 9 + .../mir/src/pass/execution/storage/tests.rs | 203 ++++++++++++++ .../hashql/mir/src/pass/execution/vertex.rs | 67 +++++ 16 files changed, 595 insertions(+), 404 deletions(-) delete mode 100644 libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/entity.rs delete mode 100644 libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/mod.rs delete mode 100644 libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/tests.rs delete mode 100644 libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/trie.rs create mode 100644 libs/@local/hashql/mir/src/pass/execution/storage/access.rs create mode 100644 libs/@local/hashql/mir/src/pass/execution/storage/entity.rs create mode 100644 libs/@local/hashql/mir/src/pass/execution/storage/mod.rs create mode 100644 libs/@local/hashql/mir/src/pass/execution/storage/tests.rs create mode 100644 libs/@local/hashql/mir/src/pass/execution/vertex.rs diff --git a/libs/@local/hashql/core/src/symbol/sym.rs b/libs/@local/hashql/core/src/symbol/sym.rs index 23a675bb88b..0492cc759c5 100644 --- a/libs/@local/hashql/core/src/symbol/sym.rs +++ b/libs/@local/hashql/core/src/symbol/sym.rs @@ -23,6 +23,7 @@ hashql_macros::define_symbols! { created_at_decision_time, created_at_transaction_time, created_by_id, + base_url, decision_time, Dict, div, @@ -110,6 +111,7 @@ hashql_macros::define_symbols! { unknown, Url, vectors, + version, web_id, // [tidy] sort alphabetically end diff --git a/libs/@local/hashql/mir/src/body/place.rs b/libs/@local/hashql/mir/src/body/place.rs index 32062a9098a..35ab48adb2e 100644 --- a/libs/@local/hashql/mir/src/body/place.rs +++ b/libs/@local/hashql/mir/src/body/place.rs @@ -447,6 +447,12 @@ pub struct Projection<'heap> { pub kind: ProjectionKind<'heap>, } +impl AsRef for Projection<'_> { + fn as_ref(&self) -> &Self { + self + } +} + /// A projection operation that navigates within structured data. /// /// Projections allow places to reference nested data within structured types. diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 8bd3f25080a..52991df5547 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -15,6 +15,8 @@ mod splitting; mod statement_placement; mod target; mod terminator_placement; +mod vertex; +pub mod storage; use core::{alloc::Allocator, assert_matches}; @@ -24,6 +26,7 @@ pub use self::{ cost::{ApproxCost, Cost}, island::{Island, IslandId, IslandVec}, placement::error::PlacementDiagnosticCategory, +vertex::VertexType, target::TargetId, }; use self::{ diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs index a99d024999a..f287e931fe1 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs @@ -16,6 +16,7 @@ use crate::{ local::Local, location::Location, operand::Operand, + place::Projection, rvalue::RValue, statement::{Assign, Statement, StatementKind}, terminator::TerminatorKind, @@ -29,6 +30,7 @@ use crate::{ execution::{ Cost, cost::{StatementCostVec, TraversalCostVec}, + storage::{Access, EntityPath}, }, }, visit::Visitor, @@ -319,3 +321,17 @@ where Ok(()) } } + +/// Determines which backend can access an entity field projection. +/// +/// Walks the projection path through the entity schema trie to determine whether the field is +/// stored in Postgres (as a column or JSONB path) or in the embedding store. Returns `None` if +/// the path doesn't map to any supported backend storage. +/// +/// For example: +/// - `entity.properties.foo` → `Some(Access::Postgres(Direct))` (JSONB) +/// - `entity.encodings.vectors` → `Some(Access::Embedding(Direct))` +/// - `entity.metadata.record_id.entity_id.web_id` → `Some(Access::Postgres(Direct))` +pub(crate) fn entity_projection_access(projections: &[Projection<'_>]) -> Option { + EntityPath::resolve(projections).map(|(path, _)| path.access()) +} diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs index 047cd59cfa5..2a5e91af656 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs @@ -1,9 +1,6 @@ use core::alloc::Allocator; -use hashql_core::{ - id::{Id as _, bit_vec::DenseBitSet}, - symbol::sym, -}; +use hashql_core::id::{Id as _, bit_vec::DenseBitSet}; use super::{ StatementPlacement, @@ -14,9 +11,10 @@ use crate::{ context::MirContext, pass::{ execution::{ - Cost, + Cost, VertexType, cost::{StatementCostVec, TraversalCostVec}, - statement_placement::lookup::{Access, entity_projection_access}, + statement_placement::common::entity_projection_access, + storage::Access, }, transform::Traversals, }, @@ -35,22 +33,19 @@ fn is_supported_place<'heap>( // For GraphReadFilter bodies, local 1 is the filter argument (vertex). Check if the // projection path maps to an Embedding-accessible field. if matches!(body.source, Source::GraphReadFilter(_)) && place.local.as_usize() == 1 { - let local_type = body.local_decls[place.local].r#type; - let type_name = context - .env - .r#type(local_type) - .kind - .opaque() - .map_or_else(|| unreachable!(), |opaque| opaque.name); - - if type_name == sym::path::Entity { - return matches!( - entity_projection_access(&place.projections), - Some(Access::Embedding(_)) - ); - } + let decl = &body.local_decls[place.local]; + let Some(vertex_type) = VertexType::from_local(context.env, decl) else { + unimplemented!("lookup for declared type") + }; - unimplemented!("unimplemented lookup for declared type") + match vertex_type { + VertexType::Entity => { + return matches!( + entity_projection_access(&place.projections), + Some(Access::Embedding(_)) + ); + } + } } domain.contains(place.local) diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/entity.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/entity.rs deleted file mode 100644 index 1d194e9d27f..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/entity.rs +++ /dev/null @@ -1,135 +0,0 @@ -use hashql_core::symbol::sym; - -use super::trie::{Access, AccessMode, PathNode}; - -/// Entity path access trie mapping field paths to backend access types. -/// -/// The trie structure mirrors the entity schema, with paths mapping to their storage location: -/// -/// - `properties` → JSONB column in `entity_editions` -/// - `encodings.vectors` → Embedding backend -/// - `metadata.*` → Various columns in `entity_temporal_metadata`, `entity_editions`, etc. -/// - `link_data.*` → `entity_edge` table via joins -/// -/// Entry point is the `entity_temporal_metadata` table which joins to `entity_ids`, -/// `entity_editions`, `entity_is_of_type`, and `entity_edge`. -// The static ref here is required, so that the symbols are not duplicated across crates and have -// the same interned string. -pub(super) static ENTITY_PATHS: PathNode = PathNode::root(&[ - // entity_editions.properties (JSONB) - PathNode::jsonb(sym::properties), - // (tbd) encodings - PathNode::branch( - sym::encodings, - None, - &[ - // Vectors are stored outside the entity inside of an embeddings database - PathNode::branch(sym::vectors, Access::Embedding(AccessMode::Direct), &[]), - ], - ), - PathNode::branch( - sym::metadata, - None, - &[ - // entity_temporal_metadata: web_id, entity_uuid, draft_id, entity_edition_id - PathNode::branch( - sym::record_id, - Access::Postgres(AccessMode::Composite), - &[ - // entity_temporal_metadata: web_id, entity_uuid, draft_id - PathNode::branch( - sym::entity_id, - Access::Postgres(AccessMode::Composite), - &[ - // entity_temporal_metadata.web_id - PathNode::leaf(sym::web_id, Access::Postgres(AccessMode::Direct)), - // entity_temporal_metadata.entity_uuid - PathNode::leaf(sym::entity_uuid, Access::Postgres(AccessMode::Direct)), - // entity_temporal_metadata.draft_id - PathNode::leaf(sym::draft_id, Access::Postgres(AccessMode::Direct)), - ], - ), - // entity_temporal_metadata.entity_edition_id - PathNode::leaf(sym::edition_id, Access::Postgres(AccessMode::Direct)), - ], - ), - // entity_temporal_metadata: decision_time, transaction_time - PathNode::branch( - sym::temporal_versioning, - Access::Postgres(AccessMode::Composite), - &[ - // entity_temporal_metadata.decision_time - PathNode::leaf(sym::decision_time, Access::Postgres(AccessMode::Direct)), - // entity_temporal_metadata.transaction_time - PathNode::leaf(sym::transaction_time, Access::Postgres(AccessMode::Direct)), - ], - ), - // entity_is_of_type (via JOIN) - PathNode::leaf(sym::entity_type_ids, Access::Postgres(AccessMode::Direct)), - // entity_editions.archived - PathNode::leaf(sym::archived, Access::Postgres(AccessMode::Direct)), - // entity_editions.confidence - PathNode::leaf(sym::confidence, Access::Postgres(AccessMode::Direct)), - // spans entity_ids.provenance + entity_editions.provenance - PathNode::branch( - sym::provenance, - None, - &[ - // entity_ids.provenance (JSONB) - PathNode::jsonb(sym::inferred), - // entity_editions.provenance (JSONB) - PathNode::jsonb(sym::edition), - ], - ), - // entity_editions.property_metadata (JSONB) - PathNode::jsonb(sym::properties), - ], - ), - // contains synthesized draft_id fields - PathNode::branch( - sym::link_data, - None, - &[ - // draft_id is synthesized (always None), not stored - PathNode::branch( - sym::left_entity_id, - None, - &[ - // entity_has_left_entity -> entity_edge.target_web_id - PathNode::leaf(sym::web_id, Access::Postgres(AccessMode::Direct)), - // entity_has_left_entity -> entity_edge.target_entity_uuid - PathNode::leaf(sym::entity_uuid, Access::Postgres(AccessMode::Direct)), - // synthesized, not in entity_edge - PathNode::leaf(sym::draft_id, None), - ], - ), - // draft_id is synthesized (always None), not stored - PathNode::branch( - sym::right_entity_id, - None, - &[ - // entity_has_right_entity -> entity_edge.target_web_id - PathNode::leaf(sym::web_id, Access::Postgres(AccessMode::Direct)), - // entity_has_right_entity -> entity_edge.target_entity_uuid - PathNode::leaf(sym::entity_uuid, Access::Postgres(AccessMode::Direct)), - // synthesized, not in entity_edge - PathNode::leaf(sym::draft_id, None), - ], - ), - // entity_edge.confidence (via entity_has_left_entity) - PathNode::leaf( - sym::left_entity_confidence, - Access::Postgres(AccessMode::Direct), - ), - // entity_edge.provenance (JSONB, via entity_has_left_entity) - PathNode::jsonb(sym::left_entity_provenance), - // entity_edge.confidence (via entity_has_right_entity) - PathNode::leaf( - sym::right_entity_confidence, - Access::Postgres(AccessMode::Direct), - ), - // entity_edge.provenance (JSONB, via entity_has_right_entity) - PathNode::jsonb(sym::right_entity_provenance), - ], - ), -]); diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/mod.rs deleted file mode 100644 index e7a24e6b0a7..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/mod.rs +++ /dev/null @@ -1,41 +0,0 @@ -mod entity; -mod trie; - -#[cfg(test)] -mod tests; - -use self::entity::ENTITY_PATHS; -pub(crate) use self::trie::Access; -use crate::body::place::{Projection, ProjectionKind}; - -/// Determines which backend can access an entity field projection. -/// -/// Walks the projection path through the entity schema trie to determine whether the field is -/// stored in Postgres (as a column or JSONB path) or in the embedding store. Returns `None` if -/// the path doesn't map to any supported backend storage. -/// -/// For example: -/// - `entity.properties.foo` → `Some(Access::Postgres(Direct))` (JSONB) -/// - `entity.encodings.vectors` → `Some(Access::Embedding(Direct))` -/// - `entity.metadata.record_id.entity_id.web_id` → `Some(Access::Postgres(Direct))` -pub(crate) fn entity_projection_access(projections: &[Projection<'_>]) -> Option { - let mut node = &ENTITY_PATHS; - - for projection in projections { - if node.children.is_empty() { - return node.otherwise; - } - - let ProjectionKind::FieldByName(name) = projection.kind else { - return node.otherwise; - }; - - let Some(next_node) = node.lookup(name) else { - return node.otherwise; - }; - - node = next_node; - } - - node.access -} diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/tests.rs deleted file mode 100644 index e3a338d5cc5..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/tests.rs +++ /dev/null @@ -1,109 +0,0 @@ -//! Unit tests for entity projection path lookup. - -use hashql_core::{symbol::sym, r#type::TypeId}; - -use super::{ - entity_projection_access, - trie::{Access, AccessMode}, -}; -use crate::body::place::{Projection, ProjectionKind}; - -/// Helper to create a `FieldByName` projection. -fn proj(name: impl Into>) -> Projection<'static> { - Projection { - kind: ProjectionKind::FieldByName(name.into()), - r#type: TypeId::PLACEHOLDER, - } -} - -/// `[.properties]` → `Access::Postgres(Direct)` (JSONB column). -#[test] -fn properties_is_postgres() { - let projections = &[proj(sym::properties)]; - let access = entity_projection_access(projections); - - assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); -} - -/// `[.properties.foo.bar]` → Postgres (JSONB otherwise). -/// -/// JSONB nodes have `otherwise` set, so any sub-path is also Postgres-accessible. -#[test] -fn properties_subpath_is_postgres() { - let projections = &[proj(sym::properties), proj(sym::foo), proj(sym::bar)]; - let access = entity_projection_access(projections); - - assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); -} - -/// `[.encodings.vectors]` → `Access::Embedding(Direct)`. -#[test] -fn vectors_is_embedding() { - let projections = &[proj(sym::encodings), proj(sym::vectors)]; - let access = entity_projection_access(projections); - - assert_eq!(access, Some(Access::Embedding(AccessMode::Direct))); -} - -/// Various metadata paths map to Postgres columns. -#[test] -fn metadata_columns_are_postgres() { - // metadata.archived -> Direct - let projections = &[proj(sym::metadata), proj(sym::archived)]; - assert_eq!( - entity_projection_access(projections), - Some(Access::Postgres(AccessMode::Direct)) - ); - - // metadata.record_id -> Composite - let projections = &[proj(sym::metadata), proj(sym::record_id)]; - assert_eq!( - entity_projection_access(projections), - Some(Access::Postgres(AccessMode::Composite)) - ); - - // metadata.record_id.entity_id.web_id -> Direct - let projections = &[ - proj(sym::metadata), - proj(sym::record_id), - proj(sym::entity_id), - proj(sym::web_id), - ]; - assert_eq!( - entity_projection_access(projections), - Some(Access::Postgres(AccessMode::Direct)) - ); - - // metadata.temporal_versioning.decision_time -> Direct - let projections = &[ - proj(sym::metadata), - proj(sym::temporal_versioning), - proj(sym::decision_time), - ]; - assert_eq!( - entity_projection_access(projections), - Some(Access::Postgres(AccessMode::Direct)) - ); -} - -/// `link_data.left_entity_id.draft_id` → `None` (synthesized, not stored). -#[test] -fn link_data_synthesized_is_none() { - let projections = &[ - proj(sym::link_data), - proj(sym::left_entity_id), - proj(sym::draft_id), - ]; - let access = entity_projection_access(projections); - - assert_eq!(access, None); -} - -/// Invalid path like `[.unknown]` → `None`. -#[test] -fn unknown_path_returns_none() { - let projections = &[proj(sym::unknown)]; - let access = entity_projection_access(projections); - - assert_eq!(access, None); -} diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/trie.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/trie.rs deleted file mode 100644 index f7f56e2f2f5..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/lookup/trie.rs +++ /dev/null @@ -1,82 +0,0 @@ -use hashql_core::symbol::{Symbol, sym}; - -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub(crate) enum AccessMode { - Direct, - Composite, -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub(crate) enum Access { - Postgres(AccessMode), - Embedding(AccessMode), -} - -/// A node in the path access trie. -/// -/// Each node represents a field in a path hierarchy and defines: -/// - The field name this node matches (`name`). -/// - What access applies when the path ends at this node (`access`). -/// - What access applies for unknown/deeper paths (`otherwise`). -/// - What children exist for further path traversal. -#[derive(Debug, Copy, Clone)] -pub(crate) struct PathNode { - /// Field name this node matches (empty string for root). - pub name: Symbol<'static>, - /// Access level when the path ends at this node (no more projections). - pub access: Option, - /// Access level for paths beyond known children (e.g., JSONB allows any sub-path). - pub otherwise: Option, - /// Child nodes. - pub children: &'static [Self], -} - -impl PathNode { - pub(crate) const fn root(children: &'static [Self]) -> Self { - Self { - name: sym::entity, - access: None, - otherwise: None, - children, - } - } - - pub(crate) const fn leaf( - name: Symbol<'static>, - access: impl [const] Into>, - ) -> Self { - Self { - name, - access: access.into(), - otherwise: None, - children: &[], - } - } - - /// Creates a JSONB node where any sub-path is also Postgres-accessible. - pub(crate) const fn jsonb(name: Symbol<'static>) -> Self { - Self { - name, - access: Some(Access::Postgres(AccessMode::Direct)), - otherwise: Some(Access::Postgres(AccessMode::Direct)), - children: &[], - } - } - - pub(crate) const fn branch( - name: Symbol<'static>, - access: impl [const] Into>, - children: &'static [Self], - ) -> Self { - Self { - name, - access: access.into(), - otherwise: None, - children, - } - } - - pub(crate) fn lookup(&self, name: Symbol<'_>) -> Option<&Self> { - self.children.iter().find(|node| node.name == name) - } -} diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs index 326e3d5d396..d39c0c77200 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs @@ -17,7 +17,6 @@ mod tests; mod common; mod embedding; mod interpret; -mod lookup; mod postgres; pub(crate) use self::{ diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs index ac99b437435..2a34f13341e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs @@ -4,7 +4,6 @@ use core::{alloc::Allocator, ops::ControlFlow}; use hashql_core::{ debug_panic, id::{Id as _, bit_vec::DenseBitSet}, - symbol::sym, sync::lock::LocalLock, r#type::{ self, RecursionBoundary, Type, TypeId, @@ -30,8 +29,10 @@ use crate::{ context::MirContext, pass::{ execution::{ + VertexType, cost::{Cost, StatementCostVec, TraversalCostVec}, - statement_placement::lookup::{Access, entity_projection_access}, + statement_placement::common::entity_projection_access, + storage::Access, }, transform::Traversals, }, @@ -372,22 +373,17 @@ impl<'heap, A: Allocator> PostgresSupported<'_, 'heap, A> { Some(self.env_domain.contains(field)) } Local::VERTEX => { - let local_type = body.local_decls[place.local].r#type; - let type_name = context - .env - .r#type(local_type) - .kind - .opaque() - .map_or_else(|| unreachable!(), |opaque| opaque.name); - - if type_name == sym::path::Entity { - return Some(matches!( + let decl = &body.local_decls[place.local]; + let Some(vertex_type) = VertexType::from_local(context.env, decl) else { + unimplemented!("lookup for declared type") + }; + + match vertex_type { + VertexType::Entity => Some(matches!( entity_projection_access(&place.projections), Some(Access::Postgres(_)) - )); + )), } - - unimplemented!("unimplemented lookup for declared type") } _ => None, } diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/access.rs b/libs/@local/hashql/mir/src/pass/execution/storage/access.rs new file mode 100644 index 00000000000..581408bf766 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/storage/access.rs @@ -0,0 +1,11 @@ +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub(crate) enum AccessMode { + Direct, + Composite, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub(crate) enum Access { + Postgres(AccessMode), + Embedding(AccessMode), +} diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs new file mode 100644 index 00000000000..e3e08bf6e8d --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs @@ -0,0 +1,251 @@ +use hashql_core::symbol::{ConstantSymbol, sym}; + +use super::access::{Access, AccessMode}; +use crate::body::place::{Projection, ProjectionKind}; + +macro_rules! sym { + ($($sym:tt)::*) => { + sym::$($sym)::*::CONST + }; +} + +/// Resolved entity field path. +/// +/// Each variant identifies a specific storage location in the entity schema. Consumers can +/// exhaustively match on this to generate backend-specific access (SQL expressions, placement +/// decisions, etc.) without duplicating path resolution logic. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum EntityPath { + /// `properties.*` — JSONB column in `entity_editions`. + Properties, + /// `encodings.vectors` — embedding backend. + Vectors, + + /// `metadata.record_id` — composite of [`EntityId`] + [`EditionId`]. + /// + /// [`EntityId`]: Self::EntityId + /// [`EditionId`]: Self::EditionId + RecordId, + /// `metadata.record_id.entity_id` — composite of `web_id` + `entity_uuid` + `draft_id`. + EntityId, + /// `metadata.record_id.entity_id.web_id` — `entity_temporal_metadata.web_id`. + WebId, + /// `metadata.record_id.entity_id.entity_uuid` — `entity_temporal_metadata.entity_uuid`. + EntityUuid, + /// `metadata.record_id.entity_id.draft_id` — `entity_temporal_metadata.draft_id`. + DraftId, + /// `metadata.record_id.edition_id` — `entity_temporal_metadata.entity_edition_id`. + EditionId, + + /// `metadata.temporal_versioning` — composite of [`DecisionTime`] + [`TransactionTime`]. + /// + /// [`DecisionTime`]: Self::DecisionTime + /// [`TransactionTime`]: Self::TransactionTime + TemporalVersioning, + /// `metadata.temporal_versioning.decision_time` — `entity_temporal_metadata.decision_time`. + DecisionTime, + /// `metadata.temporal_versioning.transaction_time` — + /// `entity_temporal_metadata.transaction_time`. + TransactionTime, + + /// `metadata.entity_type_ids` — `entity_is_of_type` table (via JOIN). + EntityTypeIds, + /// `metadata.archived` — `entity_editions.archived`. + Archived, + /// `metadata.confidence` — `entity_editions.confidence`. + Confidence, + + /// `metadata.provenance.inferred` — JSONB in `entity_ids.provenance`. + ProvenanceInferred, + /// `metadata.provenance.edition` — JSONB in `entity_editions.provenance`. + ProvenanceEdition, + /// `metadata.properties.*` — JSONB (`property_metadata`) in `entity_editions`. + PropertyMetadata, + + /// `link_data.left_entity_id.web_id` — `entity_edge.target_web_id` (via + /// `entity_has_left_entity`). + LeftEntityWebId, + /// `link_data.left_entity_id.entity_uuid` — `entity_edge.target_entity_uuid` (via + /// `entity_has_left_entity`). + LeftEntityUuid, + /// `link_data.right_entity_id.web_id` — `entity_edge.target_web_id` (via + /// `entity_has_right_entity`). + RightEntityWebId, + /// `link_data.right_entity_id.entity_uuid` — `entity_edge.target_entity_uuid` (via + /// `entity_has_right_entity`). + RightEntityUuid, + /// `link_data.left_entity_confidence` — `entity_edge.confidence` (via + /// `entity_has_left_entity`). + LeftEntityConfidence, + /// `link_data.right_entity_confidence` — `entity_edge.confidence` (via + /// `entity_has_right_entity`). + RightEntityConfidence, + /// `link_data.left_entity_provenance` — JSONB in `entity_edge.provenance` (via + /// `entity_has_left_entity`). + LeftEntityProvenance, + /// `link_data.right_entity_provenance` — JSONB in `entity_edge.provenance` (via + /// `entity_has_right_entity`). + RightEntityProvenance, +} + +impl EntityPath { + #[must_use] + pub fn resolve(projections: &[Projection<'_>]) -> Option<(Self, usize)> { + resolve(projections) + } + + /// Returns the backend access mode for this path. + pub(crate) const fn access(self) -> Access { + match self { + Self::Vectors => Access::Embedding(AccessMode::Direct), + + Self::RecordId | Self::EntityId | Self::TemporalVersioning => { + Access::Postgres(AccessMode::Composite) + } + + Self::Properties + | Self::WebId + | Self::EntityUuid + | Self::DraftId + | Self::EditionId + | Self::DecisionTime + | Self::TransactionTime + | Self::EntityTypeIds + | Self::Archived + | Self::Confidence + | Self::ProvenanceInferred + | Self::ProvenanceEdition + | Self::PropertyMetadata + | Self::LeftEntityWebId + | Self::LeftEntityUuid + | Self::RightEntityWebId + | Self::RightEntityUuid + | Self::LeftEntityConfidence + | Self::RightEntityConfidence + | Self::LeftEntityProvenance + | Self::RightEntityProvenance => Access::Postgres(AccessMode::Direct), + } + } + + const fn is_jsonb(self) -> bool { + matches!( + self, + Self::Properties + | Self::ProvenanceInferred + | Self::ProvenanceEdition + | Self::PropertyMetadata + | Self::LeftEntityProvenance + | Self::RightEntityProvenance + ) + } +} + +#[inline] +fn project(projections: &[Projection<'_>], index: &mut usize) -> Option { + let projection = projections.get(*index).and_then(|projection| { + if let ProjectionKind::FieldByName(name) = projection.kind { + name.as_constant() + } else { + None + } + }); + + if projection.is_some() { + *index += 1; + } + + projection +} + +/// Resolves an entity field path to an [`EntityPath`]. +/// +/// Walks a sequence of field name projections through the entity schema and returns the resolved +/// path, or `None` if the path doesn't map to any known storage location (including synthesized +/// fields like `link_data.*.draft_id`). +#[expect(clippy::match_same_arms, clippy::allow_attributes)] +fn resolve(projections: &[Projection<'_>]) -> Option<(EntityPath, usize)> { + #[allow(clippy::enum_glob_use, reason = "clarity")] + use EntityPath::*; + + let mut index = 0; + + macro_rules! next { + () => { + project(projections, &mut index) + }; + + (else $cond:expr) => {{ + let Some(value) = next!() else { + return Some(($cond, index)); + }; + + value + }}; + } + + let path = match next!()? { + // entity_editions.properties (JSONB) + sym!(properties) => Properties, + sym!(encodings) => match next!()? { + sym!(vectors) => Vectors, + _ => return None, + }, + sym!(metadata) => match next!()? { + sym!(record_id) => match next!(else RecordId) { + sym!(entity_id) => match next!(else EntityId) { + sym!(web_id) => WebId, + sym!(entity_uuid) => EntityUuid, + sym!(draft_id) => DraftId, + _ => return None, + }, + sym!(edition_id) => EditionId, + _ => return None, + }, + sym!(temporal_versioning) => match next!(else TemporalVersioning) { + sym!(decision_time) => DecisionTime, + sym!(transaction_time) => TransactionTime, + _ => return None, + }, + sym!(entity_type_ids) => EntityTypeIds, + sym!(archived) => Archived, + sym!(confidence) => Confidence, + sym!(provenance) => match next!()? { + sym!(inferred) => ProvenanceInferred, + sym!(edition) => ProvenanceEdition, + _ => return None, + }, + sym!(properties) => PropertyMetadata, + _ => return None, + }, + sym!(link_data) => match next!()? { + sym!(left_entity_id) => match next!()? { + sym!(web_id) => LeftEntityWebId, + sym!(entity_uuid) => LeftEntityUuid, + // draft_id is synthesized (always None), not stored + sym!(draft_id) => return None, + _ => return None, + }, + sym!(right_entity_id) => match next!()? { + sym!(web_id) => RightEntityWebId, + sym!(entity_uuid) => RightEntityUuid, + // draft_id is synthesized (always None), not stored + sym!(draft_id) => return None, + _ => return None, + }, + sym!(left_entity_confidence) => LeftEntityConfidence, + sym!(right_entity_confidence) => RightEntityConfidence, + sym!(left_entity_provenance) => LeftEntityProvenance, + sym!(right_entity_provenance) => RightEntityProvenance, + _ => return None, + }, + + _ => return None, + }; + + // JSONB paths allow arbitrary sub-paths; all others must be fully resolved + if !path.is_jsonb() && projections.get(index).is_some() { + return None; + } + + Some((path, index)) +} diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs b/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs new file mode 100644 index 00000000000..0c9dce61340 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs @@ -0,0 +1,9 @@ +mod access; +mod entity; + +#[cfg(test)] +mod tests; + +pub use entity::EntityPath; + +pub(crate) use self::access::Access; diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs b/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs new file mode 100644 index 00000000000..e696c41ae49 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs @@ -0,0 +1,203 @@ +//! Unit tests for entity projection path lookup. + +use hashql_core::{symbol::sym, r#type::TypeId}; + +use super::access::{Access, AccessMode}; +use crate::{ + body::place::{Projection, ProjectionKind}, + pass::execution::storage::EntityPath, +}; + +/// Helper to create a `FieldByName` projection. +fn proj(name: impl Into>) -> Projection<'static> { + Projection { + kind: ProjectionKind::FieldByName(name.into()), + r#type: TypeId::PLACEHOLDER, + } +} + +/// `[.properties]` → `Access::Postgres(Direct)` (JSONB column). +#[test] +fn properties_is_postgres() { + let projections = &[proj(sym::properties)]; + let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); + + assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); +} + +/// `[.properties.foo.bar]` → Postgres (JSONB otherwise). +/// +/// JSONB nodes have `otherwise` set, so any sub-path is also Postgres-accessible. +#[test] +fn properties_subpath_is_postgres() { + let projections = &[proj(sym::properties), proj(sym::foo), proj(sym::bar)]; + let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); + + assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); +} + +/// `[.encodings.vectors]` → `Access::Embedding(Direct)`. +#[test] +fn vectors_is_embedding() { + let projections = &[proj(sym::encodings), proj(sym::vectors)]; + let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); + + assert_eq!(access, Some(Access::Embedding(AccessMode::Direct))); +} + +/// Various metadata paths map to Postgres columns. +#[test] +fn metadata_columns_are_postgres() { + // metadata.archived -> Direct + let projections = &[proj(sym::metadata), proj(sym::archived)]; + assert_eq!( + EntityPath::resolve(projections).map(|(path, _)| path.access()), + Some(Access::Postgres(AccessMode::Direct)) + ); + + // metadata.record_id -> Composite + let projections = &[proj(sym::metadata), proj(sym::record_id)]; + assert_eq!( + EntityPath::resolve(projections).map(|(path, _)| path.access()), + Some(Access::Postgres(AccessMode::Composite)) + ); + + // metadata.record_id.entity_id.web_id -> Direct + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + proj(sym::web_id), + ]; + assert_eq!( + EntityPath::resolve(projections).map(|(path, _)| path.access()), + Some(Access::Postgres(AccessMode::Direct)) + ); + + // metadata.temporal_versioning.decision_time -> Direct + let projections = &[ + proj(sym::metadata), + proj(sym::temporal_versioning), + proj(sym::decision_time), + ]; + assert_eq!( + EntityPath::resolve(projections).map(|(path, _)| path.access()), + Some(Access::Postgres(AccessMode::Direct)) + ); +} + +/// `link_data.left_entity_id.draft_id` → `None` (synthesized, not stored). +#[test] +fn link_data_synthesized_is_none() { + let projections = &[ + proj(sym::link_data), + proj(sym::left_entity_id), + proj(sym::draft_id), + ]; + let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); + + assert_eq!(access, None); +} + +/// Invalid path like `[.unknown]` → `None`. +#[test] +fn unknown_path_returns_none() { + let projections = &[proj(sym::unknown)]; + let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); + + assert_eq!(access, None); +} + +/// The returned index reflects how many projections were consumed during resolution. +#[test] +fn index_counts_consumed_projections() { + // Single-segment: `.properties` consumes 1 + let projections = &[proj(sym::properties)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::Properties, 1)) + ); + + // Two segments: `.encodings.vectors` consumes 2 + let projections = &[proj(sym::encodings), proj(sym::vectors)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::Vectors, 2)) + ); + + // Three segments: `.metadata.provenance.inferred` consumes 3 + let projections = &[ + proj(sym::metadata), + proj(sym::provenance), + proj(sym::inferred), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::ProvenanceInferred, 3)) + ); + + // Four segments: `.metadata.record_id.entity_id.web_id` consumes 4 + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + proj(sym::web_id), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::WebId, 4)) + ); +} + +/// Composite paths that stop early via `next!(else ...)` return the correct index. +#[test] +fn index_for_composite_early_exit() { + // `.metadata.record_id` with no further projections → RecordId at index 2 + let projections = &[proj(sym::metadata), proj(sym::record_id)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::RecordId, 2)) + ); + + // `.metadata.record_id.entity_id` without a leaf → EntityId at index 3 + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::EntityId, 3)) + ); + + // `.metadata.temporal_versioning` without a leaf → TemporalVersioning at index 2 + let projections = &[proj(sym::metadata), proj(sym::temporal_versioning)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::TemporalVersioning, 2)) + ); +} + +/// JSONB paths stop consuming at the storage boundary; sub-path projections are excess. +#[test] +fn jsonb_index_excludes_subpath() { + // `.properties.foo.bar` → Properties at index 1, leaving 2 excess projections + let projections = &[proj(sym::properties), proj(sym::foo), proj(sym::bar)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::Properties, 1)) + ); + + // `.metadata.provenance.inferred.foo.bar` → ProvenanceInferred at index 3 + let projections = &[ + proj(sym::metadata), + proj(sym::provenance), + proj(sym::inferred), + proj(sym::foo), + proj(sym::bar), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::ProvenanceInferred, 3)) + ); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/vertex.rs b/libs/@local/hashql/mir/src/pass/execution/vertex.rs new file mode 100644 index 00000000000..cb5315f8683 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/vertex.rs @@ -0,0 +1,67 @@ +use hashql_core::{ + symbol::sym, + r#type::{ + TypeId, + environment::Environment, + kind::{OpaqueType, TypeKind}, + }, +}; + +use crate::body::local::LocalDecl; + +fn peel<'heap>( + env: &Environment<'heap>, + id: TypeId, + depth: usize, +) -> Option<&'heap OpaqueType<'heap>> { + let r#type = env.r#type(id); + + // We don't need a sophisticated cycle detection algorithm here, the only reason a cycle could + // occur here is if apply and generic substitutions are the only members in a cycle, haven't + // been resolved and simplified away. Which should've created a type error earlier anyway. + if depth > 32 { + // debug_panic!("maximum opaque type recursion depth exceeded"); + + return None; + } + + match r#type.kind { + TypeKind::Opaque(opaque_type) => Some(opaque_type), + TypeKind::Apply(apply) => peel(env, apply.base, depth + 1), + TypeKind::Generic(generic) => peel(env, generic.base, depth + 1), + TypeKind::Primitive(_) + | TypeKind::Intrinsic(_) + | TypeKind::Struct(_) + | TypeKind::Tuple(_) + | TypeKind::Union(_) + | TypeKind::Intersection(_) + | TypeKind::Closure(_) + | TypeKind::Never + | TypeKind::Unknown + | TypeKind::Param(_) + | TypeKind::Infer(_) => None, + } +} + +/// The vertex type of a [`GraphReadFilter`] body's vertex argument. +/// +/// [`GraphReadFilter`]: crate::body::Source::GraphReadFilter +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum VertexType { + Entity, +} + +impl VertexType { + /// Classifies a local declaration as a vertex type based on its opaque type name. + /// + /// Returns `None` if the declaration's type is not an opaque type or not a recognized vertex + /// type. + pub fn from_local(env: &Environment<'_>, decl: &LocalDecl<'_>) -> Option { + let opaque = peel(env, decl.r#type, 0)?; + + match opaque.name.as_constant()? { + sym::path::Entity::CONST => Some(Self::Entity), + _ => None, + } + } +} From 73482a0b60c464654d2701713d32a6b02bfbef91 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sat, 28 Feb 2026 17:35:30 +0100 Subject: [PATCH 02/32] feat: convert EntityPath to bitset --- .../mir/src/pass/execution/storage/entity.rs | 17 +++++++++++++++-- .../mir/src/pass/execution/storage/mod.rs | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs index e3e08bf6e8d..a8392b52927 100644 --- a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs @@ -1,4 +1,7 @@ -use hashql_core::symbol::{ConstantSymbol, sym}; +use hashql_core::{ + id::{Id, bit_vec::FiniteBitSet}, + symbol::{ConstantSymbol, sym}, +}; use super::access::{Access, AccessMode}; use crate::body::place::{Projection, ProjectionKind}; @@ -14,7 +17,7 @@ macro_rules! sym { /// Each variant identifies a specific storage location in the entity schema. Consumers can /// exhaustively match on this to generate backend-specific access (SQL expressions, placement /// decisions, etc.) without duplicating path resolution logic. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Id)] pub enum EntityPath { /// `properties.*` — JSONB column in `entity_editions`. Properties, @@ -88,6 +91,16 @@ pub enum EntityPath { RightEntityProvenance, } +type FiniteBitSetWidth = u32; +const _: () = { + assert!( + (FiniteBitSetWidth::BITS as usize) >= core::mem::variant_count::(), + "entity path count exceeds finite bitset width" + ); +}; + +pub type EntityPathBitSet = FiniteBitSet; + impl EntityPath { #[must_use] pub fn resolve(projections: &[Projection<'_>]) -> Option<(Self, usize)> { diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs b/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs index 0c9dce61340..b7caca00e8a 100644 --- a/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs @@ -4,6 +4,6 @@ mod entity; #[cfg(test)] mod tests; -pub use entity::EntityPath; +pub use entity::{EntityPath, EntityPathBitSet}; pub(crate) use self::access::Access; From 3362463efde0300cf29881039373d115bdf4f62f Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sat, 28 Feb 2026 18:01:02 +0100 Subject: [PATCH 03/32] fix: suggestions from code review --- libs/@local/hashql/core/src/symbol/sym.rs | 2 +- .../hashql/mir/src/pass/execution/mod.rs | 4 +- .../execution/statement_placement/common.rs | 6 +-- .../mir/src/pass/execution/storage/entity.rs | 6 ++- .../mir/src/pass/execution/storage/tests.rs | 37 ++++++++++++++++++- 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/libs/@local/hashql/core/src/symbol/sym.rs b/libs/@local/hashql/core/src/symbol/sym.rs index 0492cc759c5..388b5a4c65e 100644 --- a/libs/@local/hashql/core/src/symbol/sym.rs +++ b/libs/@local/hashql/core/src/symbol/sym.rs @@ -9,6 +9,7 @@ hashql_macros::define_symbols! { archived, archived_by_id, bar, + base_url, BaseUrl, bit_and, bit_not, @@ -23,7 +24,6 @@ hashql_macros::define_symbols! { created_at_decision_time, created_at_transaction_time, created_by_id, - base_url, decision_time, Dict, div, diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 52991df5547..7d0472a7dbd 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -13,10 +13,10 @@ mod island; mod placement; mod splitting; mod statement_placement; +pub mod storage; mod target; mod terminator_placement; mod vertex; -pub mod storage; use core::{alloc::Allocator, assert_matches}; @@ -26,8 +26,8 @@ pub use self::{ cost::{ApproxCost, Cost}, island::{Island, IslandId, IslandVec}, placement::error::PlacementDiagnosticCategory, -vertex::VertexType, target::TargetId, + vertex::VertexType, }; use self::{ fusion::BasicBlockFusion, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs index f287e931fe1..06fb609d65e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs @@ -324,9 +324,9 @@ where /// Determines which backend can access an entity field projection. /// -/// Walks the projection path through the entity schema trie to determine whether the field is -/// stored in Postgres (as a column or JSONB path) or in the embedding store. Returns `None` if -/// the path doesn't map to any supported backend storage. +/// Walks the projection path through the entity schema to determine whether the field is stored in +/// Postgres (as a column or JSONB path) or in the embedding store. Returns `None` if the path +/// doesn't map to any supported backend storage. /// /// For example: /// - `entity.properties.foo` → `Some(Access::Postgres(Direct))` (JSONB) diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs index a8392b52927..b1cab2ef6cb 100644 --- a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs @@ -188,8 +188,12 @@ fn resolve(projections: &[Projection<'_>]) -> Option<(EntityPath, usize)> { }; (else $cond:expr) => {{ - let Some(value) = next!() else { + if index >= projections.len() { return Some(($cond, index)); + } + + let Some(value) = next!() else { + return None; }; value diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs b/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs index e696c41ae49..5dda38abefd 100644 --- a/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs @@ -4,7 +4,10 @@ use hashql_core::{symbol::sym, r#type::TypeId}; use super::access::{Access, AccessMode}; use crate::{ - body::place::{Projection, ProjectionKind}, + body::{ + local::Local, + place::{Projection, ProjectionKind}, + }, pass::execution::storage::EntityPath, }; @@ -178,6 +181,38 @@ fn index_for_composite_early_exit() { ); } +/// A non-FieldByName projection (e.g. `Index`) after a composite node must return `None`, not +/// the composite path. Previously the `next!(else ...)` macro conflated "no more projections" with +/// "non-FieldByName projection", bypassing the exhaustion guard. +#[test] +fn non_field_projection_after_composite_returns_none() { + let index_projection = Projection { + kind: ProjectionKind::Index(Local::new(0)), + r#type: TypeId::PLACEHOLDER, + }; + + // `.metadata.record_id` followed by an index projection: not a valid entity path + let projections = &[proj(sym::metadata), proj(sym::record_id), index_projection]; + assert_eq!(EntityPath::resolve(projections), None); + + // `.metadata.record_id.entity_id` followed by an index projection + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + index_projection, + ]; + assert_eq!(EntityPath::resolve(projections), None); + + // `.metadata.temporal_versioning` followed by an index projection + let projections = &[ + proj(sym::metadata), + proj(sym::temporal_versioning), + index_projection, + ]; + assert_eq!(EntityPath::resolve(projections), None); +} + /// JSONB paths stop consuming at the storage boundary; sub-path projections are excess. #[test] fn jsonb_index_excludes_subpath() { From 885a006638c71dc8434378549b7b8a08516f22b6 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sat, 28 Feb 2026 18:02:41 +0100 Subject: [PATCH 04/32] fix: suggestions from code review --- libs/@local/hashql/mir/src/pass/execution/storage/entity.rs | 6 +----- libs/@local/hashql/mir/src/pass/execution/vertex.rs | 3 ++- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs index b1cab2ef6cb..c227f170a40 100644 --- a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs @@ -192,11 +192,7 @@ fn resolve(projections: &[Projection<'_>]) -> Option<(EntityPath, usize)> { return Some(($cond, index)); } - let Some(value) = next!() else { - return None; - }; - - value + next!()? }}; } diff --git a/libs/@local/hashql/mir/src/pass/execution/vertex.rs b/libs/@local/hashql/mir/src/pass/execution/vertex.rs index cb5315f8683..b0d8441da76 100644 --- a/libs/@local/hashql/mir/src/pass/execution/vertex.rs +++ b/libs/@local/hashql/mir/src/pass/execution/vertex.rs @@ -1,4 +1,5 @@ use hashql_core::{ + debug_panic, symbol::sym, r#type::{ TypeId, @@ -20,7 +21,7 @@ fn peel<'heap>( // occur here is if apply and generic substitutions are the only members in a cycle, haven't // been resolved and simplified away. Which should've created a type error earlier anyway. if depth > 32 { - // debug_panic!("maximum opaque type recursion depth exceeded"); + debug_panic!("maximum opaque type recursion depth exceeded"); return None; } From 2400df59b9961b48d5166fa9dc6e4252d51447a8 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sat, 28 Feb 2026 18:48:39 +0100 Subject: [PATCH 05/32] feat: move to shared infrastructure --- libs/@local/hashql/mir/package.json | 2 +- .../pass/execution/block_partitioned_vec.rs | 217 ++++++++++++++++++ .../hashql/mir/src/pass/execution/cost.rs | 189 +++------------ .../hashql/mir/src/pass/execution/mod.rs | 3 +- .../execution/statement_placement/common.rs | 2 +- .../statement_placement/embedding/mod.rs | 2 +- .../statement_placement/postgres/mod.rs | 2 +- .../mir/src/pass/execution/storage/mod.rs | 9 - .../execution/terminator_placement/mod.rs | 76 ++---- .../execution/terminator_placement/tests.rs | 2 +- .../{storage => traversal}/access.rs | 0 .../{storage => traversal}/entity.rs | 0 .../mir/src/pass/execution/traversal/mod.rs | 12 + .../mir/src/pass/execution/traversal/path.rs | 14 ++ .../execution/{storage => traversal}/tests.rs | 2 +- 15 files changed, 300 insertions(+), 232 deletions(-) create mode 100644 libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs delete mode 100644 libs/@local/hashql/mir/src/pass/execution/storage/mod.rs rename libs/@local/hashql/mir/src/pass/execution/{storage => traversal}/access.rs (100%) rename libs/@local/hashql/mir/src/pass/execution/{storage => traversal}/entity.rs (100%) create mode 100644 libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs create mode 100644 libs/@local/hashql/mir/src/pass/execution/traversal/path.rs rename libs/@local/hashql/mir/src/pass/execution/{storage => traversal}/tests.rs (99%) diff --git a/libs/@local/hashql/mir/package.json b/libs/@local/hashql/mir/package.json index 575098ee79d..c4513eecab0 100644 --- a/libs/@local/hashql/mir/package.json +++ b/libs/@local/hashql/mir/package.json @@ -9,7 +9,7 @@ "fix:clippy": "just clippy --fix", "lint:clippy": "just clippy", "test:codspeed": "cargo codspeed run -p hashql-mir", - "test:miri": "cargo miri nextest run -- changed_bitor interpret::locals::tests pass::analysis::execution::cost", + "test:miri": "cargo miri nextest run -- changed_bitor interpret::locals::tests pass::execution::block_partitioned_vec::tests pass::execution::cost::tests", "test:unit": "mise run test:unit @rust/hashql-mir" }, "dependencies": { diff --git a/libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs b/libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs new file mode 100644 index 00000000000..98222cb0d81 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs @@ -0,0 +1,217 @@ +//! Generic block-partitioned storage. +//! +//! Provides a flat data array with a block-offset table, so that per-element values can be +//! accessed by block ID. Each block owns a contiguous slice of the array, sized by the count +//! provided at construction time. +//! +//! Used as the backing store for both per-statement cost maps and per-edge terminator matrices. + +use core::{alloc::Allocator, iter}; + +use hashql_core::id::Id as _; + +use crate::body::basic_block::{BasicBlockId, BasicBlockSlice}; + +/// Dense block-partitioned storage. +/// +/// Stores a flat array of `T` values, partitioned into per-block slices via an offset table. +/// Each [`BasicBlockId`] maps to a contiguous range within the data array. The per-block count +/// is determined at construction time and can be rebuilt via [`remap`](Self::remap). +#[derive(Debug)] +pub(crate) struct BlockPartitionedVec { + offsets: Box, A>, + data: Vec, +} + +impl BlockPartitionedVec { + #[expect(unsafe_code)] + fn build_offsets( + mut iter: impl ExactSizeIterator, + alloc: A, + ) -> (Box, A>, usize) { + let mut offsets = Box::new_uninit_slice_in(iter.len() + 1, alloc); + + let mut offset = 0_u32; + + offsets[0].write(0); + + let (_, rest) = offsets[1..].write_iter(iter::from_fn(|| { + let next = iter.next()?; + + offset += next; + + Some(offset) + })); + + debug_assert!(rest.is_empty()); + debug_assert_eq!(iter.len(), 0); + + // SAFETY: We have initialized all elements of the slice. + let offsets = unsafe { offsets.assume_init() }; + let offsets = BasicBlockSlice::from_boxed_slice(offsets); + + (offsets, offset as usize) + } + + #[inline] + fn range(&self, block: BasicBlockId) -> core::ops::Range { + (self.offsets[block] as usize)..(self.offsets[block.plus(1)] as usize) + } + + /// Returns the slice of values for `block`. + #[inline] + pub(crate) fn of(&self, block: BasicBlockId) -> &[T] { + let range = self.range(block); + &self.data[range] + } + + /// Returns a mutable slice of values for `block`. + #[inline] + pub(crate) fn of_mut(&mut self, block: BasicBlockId) -> &mut [T] { + let range = self.range(block); + &mut self.data[range] + } + + /// Returns an iterator over all values in the flat data array. + #[cfg(test)] + pub(crate) fn iter(&self) -> impl Iterator { + self.data.iter() + } + + /// Returns the total number of elements across all blocks. + pub(crate) const fn len(&self) -> usize { + self.data.len() + } + + /// Returns the number of blocks in the partition. + #[cfg(test)] + pub(crate) fn block_count(&self) -> usize { + self.offsets.len() - 1 + } + + /// Rebuilds the offset table for a new partitioning. + /// + /// Call after transforms that change element counts per block. Does not resize or clear + /// the data array; callers must ensure the total element count remains unchanged. + pub(crate) fn remap(&mut self, counts: impl ExactSizeIterator) + where + A: Clone, + { + let alloc = Box::allocator(&self.offsets).clone(); + + let (offsets, _) = Self::build_offsets(counts, alloc); + self.offsets = offsets; + } +} + +impl BlockPartitionedVec { + /// Creates a new `BlockPartitionedVec` from per-block counts, with all values initialized + /// to `value`. + pub(crate) fn new(counts: impl ExactSizeIterator, value: T, alloc: A) -> Self { + let (offsets, length) = Self::build_offsets(counts, alloc.clone()); + let data = alloc::vec::from_elem_in(value, length, alloc); + + Self { offsets, data } + } +} + +#[cfg(test)] +mod tests { + #![expect(clippy::cast_possible_truncation)] + use alloc::alloc::Global; + + use super::BlockPartitionedVec; + use crate::body::basic_block::BasicBlockId; + + /// Single block with 5 elements: all accessible via `of()`/`of_mut()`. + #[test] + fn single_block() { + let mut vec = BlockPartitionedVec::new([5].into_iter(), 0_u32, Global); + + assert_eq!(vec.len(), 5); + assert_eq!(vec.block_count(), 1); + + let slice = vec.of_mut(BasicBlockId::new(0)); + for (index, value) in slice.iter_mut().enumerate() { + *value = index as u32; + } + + let slice = vec.of(BasicBlockId::new(0)); + assert_eq!(slice, &[0, 1, 2, 3, 4]); + } + + /// Multiple blocks with varying sizes: elements are correctly partitioned. + #[test] + fn multiple_blocks() { + let mut vec = BlockPartitionedVec::new([2, 3, 1].into_iter(), 0_u32, Global); + + assert_eq!(vec.len(), 6); + assert_eq!(vec.block_count(), 3); + + vec.of_mut(BasicBlockId::new(0))[0] = 10; + vec.of_mut(BasicBlockId::new(0))[1] = 20; + vec.of_mut(BasicBlockId::new(1))[0] = 30; + vec.of_mut(BasicBlockId::new(1))[1] = 40; + vec.of_mut(BasicBlockId::new(1))[2] = 50; + vec.of_mut(BasicBlockId::new(2))[0] = 60; + + assert_eq!(vec.of(BasicBlockId::new(0)), &[10, 20]); + assert_eq!(vec.of(BasicBlockId::new(1)), &[30, 40, 50]); + assert_eq!(vec.of(BasicBlockId::new(2)), &[60]); + } + + /// Blocks with zero elements produce empty slices. + #[test] + fn empty_blocks() { + let vec = BlockPartitionedVec::new([0, 3, 0].into_iter(), 0_u32, Global); + + assert_eq!(vec.len(), 3); + assert_eq!(vec.block_count(), 3); + assert!(vec.of(BasicBlockId::new(0)).is_empty()); + assert_eq!(vec.of(BasicBlockId::new(1)).len(), 3); + assert!(vec.of(BasicBlockId::new(2)).is_empty()); + } + + /// Zero blocks is valid. + #[test] + fn no_blocks() { + let vec = BlockPartitionedVec::new(core::iter::empty::(), 0_u32, Global); + + assert_eq!(vec.len(), 0); + assert_eq!(vec.block_count(), 0); + } + + /// `iter()` yields all elements in flat order. + #[test] + fn iter_all_elements() { + let mut vec = BlockPartitionedVec::new([2, 1].into_iter(), 0_u32, Global); + + vec.of_mut(BasicBlockId::new(0))[0] = 1; + vec.of_mut(BasicBlockId::new(0))[1] = 2; + vec.of_mut(BasicBlockId::new(1))[0] = 3; + + let collected: Vec = vec.iter().copied().collect(); + assert_eq!(collected, vec![1, 2, 3]); + } + + /// `remap()` rebuilds the offset table without changing data. + #[test] + fn remap_preserves_data() { + let mut vec = BlockPartitionedVec::new([3, 3].into_iter(), 0_u32, Global); + + // Write sequential values + for (index, value) in vec.of_mut(BasicBlockId::new(0)).iter_mut().enumerate() { + *value = index as u32; + } + for (index, value) in vec.of_mut(BasicBlockId::new(1)).iter_mut().enumerate() { + *value = (index + 3) as u32; + } + + // Remap to a different partitioning with the same total count + vec.remap([2, 4].into_iter()); + + assert_eq!(vec.block_count(), 2); + assert_eq!(vec.of(BasicBlockId::new(0)), &[0, 1]); + assert_eq!(vec.of(BasicBlockId::new(1)), &[2, 3, 4, 5]); + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/cost.rs b/libs/@local/hashql/mir/src/pass/execution/cost.rs index f587fe0b026..5ad56b55ef0 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost.rs @@ -7,17 +7,18 @@ use alloc::alloc::Global; use core::{ alloc::Allocator, fmt, - iter::{self, Sum}, + iter::Sum, ops::{Add, AddAssign, Index, IndexMut, Mul, MulAssign}, }; use std::f32; -use hashql_core::id::{Id as _, bit_vec::DenseBitSet}; +use hashql_core::id::bit_vec::DenseBitSet; +use super::block_partitioned_vec::BlockPartitionedVec; use crate::{ body::{ Body, - basic_block::{BasicBlockId, BasicBlockSlice}, + basic_block::BasicBlockId, basic_blocks::BasicBlocks, local::{Local, LocalVec}, location::Location, @@ -369,96 +370,52 @@ impl IntoIterator for &TraversalCostVec { /// indicates the target cannot execute that statement. The execution planner compares costs /// across targets to determine the optimal execution strategy. #[derive(Debug)] -pub(crate) struct StatementCostVec { - offsets: Box, A>, - costs: Vec, A>, -} - -impl StatementCostVec { - #[expect(unsafe_code)] - fn offsets( - mut iter: impl ExactSizeIterator, - alloc: A, - ) -> (Box, A>, usize) { - let mut offsets = Box::new_uninit_slice_in(iter.len() + 1, alloc); - - let mut offset = 0_u32; - - offsets[0].write(0); - - let (_, rest) = offsets[1..].write_iter(iter::from_fn(|| { - let next = iter.next()?; - - offset += next; - - Some(offset) - })); - - debug_assert!(rest.is_empty()); - debug_assert_eq!(iter.len(), 0); - - // SAFETY: We have initialized all elements of the slice. - let offsets = unsafe { offsets.assume_init() }; - let offsets = BasicBlockSlice::from_boxed_slice(offsets); - - (offsets, offset as usize) - } - - fn from_iter(iter: impl ExactSizeIterator, alloc: A) -> Self - where - A: Clone, - { - let (offsets, length) = Self::offsets(iter, alloc.clone()); - let costs = alloc::vec::from_elem_in(None, length, alloc); +pub(crate) struct StatementCostVec(BlockPartitionedVec, A>); - Self { offsets, costs } +impl StatementCostVec { + #[cfg(test)] + pub(crate) fn from_iter(iter: impl ExactSizeIterator, alloc: A) -> Self { + Self(BlockPartitionedVec::new(iter, None, alloc)) } /// Creates a cost map with space for all statements in the given blocks. /// /// All costs are initialized to `None` (unsupported). Use indexing to assign costs. #[expect(clippy::cast_possible_truncation)] - pub(crate) fn new_in(blocks: &BasicBlocks, alloc: A) -> Self - where - A: Clone, - { - Self::from_iter( + pub(crate) fn new_in(blocks: &BasicBlocks, alloc: A) -> Self { + Self(BlockPartitionedVec::new( blocks.iter().map(|block| block.statements.len() as u32), + None, alloc, - ) + )) } +} +impl StatementCostVec { /// Rebuilds the offset table for a new block layout. /// /// Call after transforms that change statement counts per block. Does not resize or clear - /// the cost data — callers must ensure the total statement count remains unchanged. + /// the cost data; callers must ensure the total statement count remains unchanged. #[expect(clippy::cast_possible_truncation)] pub(crate) fn remap(&mut self, blocks: &BasicBlocks) where A: Clone, { - let alloc = Box::allocator(&self.offsets).clone(); - - let (offsets, _) = Self::offsets( - blocks.iter().map(|block| block.statements.len() as u32), - alloc, - ); - self.offsets = offsets; + self.0 + .remap(blocks.iter().map(|block| block.statements.len() as u32)); } /// Returns `true` if no statements have assigned costs. #[cfg(test)] pub(crate) fn all_unassigned(&self) -> bool { - self.costs.iter().all(Option::is_none) + self.0.iter().all(Option::is_none) } /// Returns the cost slice for all statements in `block`. /// /// The returned slice is indexed by statement position (0-based within the block). pub(crate) fn of(&self, block: BasicBlockId) -> &[Option] { - let range = (self.offsets[block] as usize)..(self.offsets[block.plus(1)] as usize); - - &self.costs[range] + self.0.of(block) } pub(crate) fn sum_approx(&self, block: BasicBlockId) -> ApproxCost { @@ -468,11 +425,8 @@ impl StatementCostVec { /// Returns the cost at `location`, or `None` if out of bounds or unassigned. #[cfg(test)] pub(crate) fn get(&self, location: Location) -> Option { - let range = (self.offsets[location.block] as usize) - ..(self.offsets[location.block.plus(1)] as usize); - - // statement_index is 1-based - self.costs[range] + self.0 + .of(location.block) .get(location.statement_index - 1) .copied() .flatten() @@ -483,21 +437,15 @@ impl Index for StatementCostVec { type Output = Option; fn index(&self, index: Location) -> &Self::Output { - let range = - (self.offsets[index.block] as usize)..(self.offsets[index.block.plus(1)] as usize); - // statement_index is 1-based - &self.costs[range][index.statement_index - 1] + &self.0.of(index.block)[index.statement_index - 1] } } impl IndexMut for StatementCostVec { fn index_mut(&mut self, index: Location) -> &mut Self::Output { - let range = - (self.offsets[index.block] as usize)..(self.offsets[index.block.plus(1)] as usize); - // statement_index is 1-based - &mut self.costs[range][index.statement_index - 1] + &mut self.0.of_mut(index.block)[index.statement_index - 1] } } @@ -547,13 +495,12 @@ mod tests { assert_eq!(Cost::new(100), Some(hundred)); } - /// `StatementCostVec` correctly indexes by `Location` across multiple blocks. + /// `StatementCostVec` uses 1-based `Location` indexing to address the underlying + /// 0-based `BlockPartitionedVec`. #[test] - fn statement_cost_vec_indexing() { - // bb0: 2 statements, bb1: 3 statements, bb2: 1 statement - let mut costs = StatementCostVec::from_iter([2, 3, 1].into_iter(), Global); + fn statement_cost_vec_location_indexing() { + let mut costs = StatementCostVec::from_iter([2, 3].into_iter(), Global); - // Assign costs at various locations let loc_0_1 = Location { block: BasicBlockId::new(0), statement_index: 1, @@ -566,98 +513,20 @@ mod tests { block: BasicBlockId::new(1), statement_index: 2, }; - let loc_2_1 = Location { - block: BasicBlockId::new(2), - statement_index: 1, - }; costs[loc_0_1] = Some(cost!(10)); costs[loc_0_2] = Some(cost!(20)); costs[loc_1_2] = Some(cost!(30)); - costs[loc_2_1] = Some(cost!(40)); - // Verify retrieval assert_eq!(costs.get(loc_0_1), Some(cost!(10))); assert_eq!(costs.get(loc_0_2), Some(cost!(20))); assert_eq!(costs.get(loc_1_2), Some(cost!(30))); - assert_eq!(costs.get(loc_2_1), Some(cost!(40))); - // Unassigned locations return None + // Unassigned location returns None let loc_1_1 = Location { block: BasicBlockId::new(1), statement_index: 1, }; assert_eq!(costs.get(loc_1_1), None); } - - /// `StatementCostVec` initialization with a single block. - /// - /// This test exercises unsafe code and should be run under Miri. - #[test] - fn statement_cost_vec_init_single_block() { - // Single block with 5 statements - let mut costs = StatementCostVec::from_iter([5].into_iter(), Global); - - // All 5 statements should be accessible - for index in 1..=5_u32 { - let location = Location { - block: BasicBlockId::new(0), - statement_index: index as usize, - }; - - costs[location] = Some(Cost::new(index).expect("should be non-zero")); - } - - for index in 1..=5 { - let location = Location { - block: BasicBlockId::new(0), - statement_index: index as usize, - }; - - assert_eq!(costs.get(location), Cost::new(index)); - } - } - - /// `StatementCostVec` initialization with multiple blocks of varying sizes. - /// - /// This test exercises unsafe code and should be run under Miri. - #[test] - fn statement_cost_vec_init_multiple_blocks() { - // 0 statements, 1 statement, 5 statements - let mut costs = StatementCostVec::from_iter([0, 1, 5].into_iter(), Global); - - // bb1 has 1 statement - let loc_1_1 = Location { - block: BasicBlockId::new(1), - statement_index: 1, - }; - costs[loc_1_1] = Some(cost!(100)); - assert_eq!(costs.get(loc_1_1), Some(cost!(100))); - - // bb2 has 5 statements - for index in 1..=5 { - let location = Location { - block: BasicBlockId::new(2), - statement_index: index as usize, - }; - - costs[location] = Some(Cost::new(index).expect("non-zero")); - } - for index in 1..=5 { - let location = Location { - block: BasicBlockId::new(2), - statement_index: index as usize, - }; - assert_eq!(costs.get(location), Cost::new(index)); - } - } - - /// `StatementCostVec` initialization with zero blocks. - /// - /// This test exercises unsafe code and should be run under Miri. - #[test] - fn statement_cost_vec_init_empty() { - // Should not panic - let _costs = StatementCostVec::from_iter(core::iter::empty::(), Global); - } } diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 7d0472a7dbd..f7b0bc0a474 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -7,15 +7,16 @@ macro_rules! cost { #[cfg(test)] mod tests; +mod block_partitioned_vec; mod cost; mod fusion; mod island; mod placement; mod splitting; mod statement_placement; -pub mod storage; mod target; mod terminator_placement; +pub mod traversal; mod vertex; use core::{alloc::Allocator, assert_matches}; diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs index 06fb609d65e..da9c84cb245 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs @@ -30,7 +30,7 @@ use crate::{ execution::{ Cost, cost::{StatementCostVec, TraversalCostVec}, - storage::{Access, EntityPath}, + traversal::{Access, EntityPath}, }, }, visit::Visitor, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs index 2a5e91af656..cdb2205cd82 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs @@ -14,7 +14,7 @@ use crate::{ Cost, VertexType, cost::{StatementCostVec, TraversalCostVec}, statement_placement::common::entity_projection_access, - storage::Access, + traversal::Access, }, transform::Traversals, }, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs index 2a34f13341e..92d2a82ef25 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs @@ -32,7 +32,7 @@ use crate::{ VertexType, cost::{Cost, StatementCostVec, TraversalCostVec}, statement_placement::common::entity_projection_access, - storage::Access, + traversal::Access, }, transform::Traversals, }, diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs b/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs deleted file mode 100644 index b7caca00e8a..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod access; -mod entity; - -#[cfg(test)] -mod tests; - -pub use entity::{EntityPath, EntityPathBitSet}; - -pub(crate) use self::access::Access; diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs index 8937633aff8..801f9ddcbf3 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs @@ -34,7 +34,6 @@ use alloc::alloc::Global; use core::{ alloc::Allocator, - iter, ops::{Index, IndexMut}, }; @@ -52,6 +51,7 @@ use hashql_core::{ use super::{ Cost, + block_partitioned_vec::BlockPartitionedVec, target::{TargetBitSet, TargetId}, }; use crate::{ @@ -238,54 +238,16 @@ impl IndexMut<(TargetId, TargetId)> for TransMatrix { /// [`Return`]: TerminatorKind::Return /// [`Unreachable`]: TerminatorKind::Unreachable #[derive(Debug)] -pub(crate) struct TerminatorCostVec { - offsets: Box, A>, - matrices: Vec, -} - -impl TerminatorCostVec { - #[expect(unsafe_code)] - fn compute_offsets( - mut iter: impl ExactSizeIterator, - alloc: A, - ) -> (Box, A>, usize) { - let mut offsets = Box::new_uninit_slice_in(iter.len() + 1, alloc); - let mut running_offset = 0_u32; - - offsets[0].write(0); - - let (_, rest) = offsets[1..].write_iter(iter::from_fn(|| { - let successor_count = iter.next()?; - running_offset += successor_count; - Some(running_offset) - })); - - debug_assert!(rest.is_empty()); - debug_assert_eq!(iter.len(), 0); - - // SAFETY: All elements initialized by write_iter loop. - let offsets = unsafe { offsets.assume_init() }; - let offsets = BasicBlockSlice::from_boxed_slice(offsets); - - (offsets, running_offset as usize) - } - - fn from_successor_counts(iter: impl ExactSizeIterator, alloc: A) -> Self - where - A: Clone, - { - let (offsets, total_edges) = Self::compute_offsets(iter, alloc.clone()); - let matrices = alloc::vec::from_elem_in(TransMatrix::new(), total_edges, alloc); - - Self { offsets, matrices } - } +pub(crate) struct TerminatorCostVec(BlockPartitionedVec); +impl TerminatorCostVec { /// Creates a cost vector sized for `blocks`, with all transitions initially disallowed. - pub(crate) fn new(blocks: &BasicBlocks, alloc: A) -> Self - where - A: Clone, - { - Self::from_successor_counts(blocks.iter().map(Self::successor_count), alloc) + pub(crate) fn new(blocks: &BasicBlocks, alloc: A) -> Self { + Self(BlockPartitionedVec::new( + blocks.iter().map(|block| Self::successor_count(block)), + TransMatrix::new(), + alloc, + )) } #[expect(clippy::cast_possible_truncation)] @@ -296,25 +258,27 @@ impl TerminatorCostVec { TerminatorKind::Return(_) | TerminatorKind::Unreachable => 0, } } +} +impl TerminatorCostVec { pub(crate) const fn len(&self) -> usize { - self.matrices.len() + self.0.len() + } + + /// Returns the number of blocks in the partition. + #[cfg(test)] + pub(crate) fn block_count(&self) -> usize { + self.0.block_count() } /// Returns the transition matrices for all successor edges of `block`. pub(crate) fn of(&self, block: BasicBlockId) -> &[TransMatrix] { - let start = self.offsets[block] as usize; - let end = self.offsets[block.plus(1)] as usize; - - &self.matrices[start..end] + self.0.of(block) } /// Returns mutable transition matrices for all successor edges of `block`. pub(crate) fn of_mut(&mut self, block: BasicBlockId) -> &mut [TransMatrix] { - let start = self.offsets[block] as usize; - let end = self.offsets[block.plus(1)] as usize; - - &mut self.matrices[start..end] + self.0.of_mut(block) } } diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs index 28b8cd8d898..a76cf25eb48 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs @@ -127,7 +127,7 @@ fn format_edge_summary( edges: &TerminatorCostVec, ) -> impl Display + '_ { fmt::from_fn(move |fmt| { - for block in 0..(edges.offsets.len() - 1) { + for block in 0..edges.block_count() { let block_id = BasicBlockId::from_usize(block); let matrices = edges.of(block_id); writeln!(fmt, "{block_id}:")?; diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/access.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/access.rs similarity index 100% rename from libs/@local/hashql/mir/src/pass/execution/storage/access.rs rename to libs/@local/hashql/mir/src/pass/execution/traversal/access.rs diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs similarity index 100% rename from libs/@local/hashql/mir/src/pass/execution/storage/entity.rs rename to libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs new file mode 100644 index 00000000000..b931a29487b --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -0,0 +1,12 @@ +mod access; +mod entity; +mod path; + +#[cfg(test)] +mod tests; + +pub(crate) use self::access::Access; +pub use self::{ + entity::{EntityPath, EntityPathBitSet}, + path::TraversalPaths, +}; diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/path.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/path.rs new file mode 100644 index 00000000000..51b4db8037f --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/path.rs @@ -0,0 +1,14 @@ +use super::EntityPathBitSet; + +/// Resolved traversal paths for a single vertex access. +/// +/// Each variant corresponds to a vertex type in the graph schema. A `GraphReadFilter` body +/// operates over exactly one vertex type, so all traversal locals within a body share the same +/// variant. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum TraversalPaths { + /// Paths into the entity schema. + /// + /// An all-bits-set bitset indicates full entity access is required. + Entity(EntityPathBitSet), +} diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs similarity index 99% rename from libs/@local/hashql/mir/src/pass/execution/storage/tests.rs rename to libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs index 5dda38abefd..57679562512 100644 --- a/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs @@ -8,7 +8,7 @@ use crate::{ local::Local, place::{Projection, ProjectionKind}, }, - pass::execution::storage::EntityPath, + pass::execution::traversal::EntityPath, }; /// Helper to create a `FieldByName` projection. From 8d766643f04d995e7eb43c452dae0d816c8b8c05 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sat, 28 Feb 2026 19:06:59 +0100 Subject: [PATCH 06/32] feat: traversal path --- .../mir/src/pass/execution/traversal/mod.rs | 37 ++++++++++++++++--- .../mir/src/pass/execution/traversal/path.rs | 14 ------- 2 files changed, 32 insertions(+), 19 deletions(-) delete mode 100644 libs/@local/hashql/mir/src/pass/execution/traversal/path.rs diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index b931a29487b..567e2abbf0d 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -1,12 +1,39 @@ +//! Traversal path resolution and storage mapping. +//! +//! Maps property access projections on graph vertices to their backend storage locations. +//! Each vertex type has its own path enum ([`EntityPath`] for entities) that resolves +//! dot-notation field accesses to specific columns, JSONB paths, or embedding stores. +//! +//! [`TraversalPathBitSet`] and [`TraversalPath`] wrap the per-vertex-type path types so that +//! the execution pipeline can handle different vertex types uniformly. + mod access; mod entity; -mod path; #[cfg(test)] mod tests; pub(crate) use self::access::Access; -pub use self::{ - entity::{EntityPath, EntityPathBitSet}, - path::TraversalPaths, -}; +pub use self::entity::{EntityPath, EntityPathBitSet}; + +/// Set of resolved traversal paths for a single vertex type. +/// +/// Each variant wraps the bitset for a specific vertex type. A [`GraphReadFilter`] body operates +/// over exactly one vertex type, so all traversal locals within a body share the same variant. +/// +/// An all-bits-set bitset indicates full vertex access is required (the path could not be +/// resolved to a specific field). +/// +/// [`GraphReadFilter`]: crate::body::Source::GraphReadFilter +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum TraversalPathBitSet { + /// Paths into the entity schema. + Entity(EntityPathBitSet), +} + +/// A single resolved traversal path for a specific vertex type. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum TraversalPath { + /// A path into the entity schema. + Entity(EntityPath), +} diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/path.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/path.rs deleted file mode 100644 index 51b4db8037f..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/path.rs +++ /dev/null @@ -1,14 +0,0 @@ -use super::EntityPathBitSet; - -/// Resolved traversal paths for a single vertex access. -/// -/// Each variant corresponds to a vertex type in the graph schema. A `GraphReadFilter` body -/// operates over exactly one vertex type, so all traversal locals within a body share the same -/// variant. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum TraversalPaths { - /// Paths into the entity schema. - /// - /// An all-bits-set bitset indicates full entity access is required. - Entity(EntityPathBitSet), -} From d9208e6b36fa4cbad77ecefaa480946b252e06b0 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sat, 28 Feb 2026 19:51:28 +0100 Subject: [PATCH 07/32] feat: move analysis traversal pass (WIP) --- .../src/pass/execution/traversal/analysis.rs | 137 ++++++++++++++++++ .../mir/src/pass/execution/traversal/mod.rs | 36 +++++ 2 files changed, 173 insertions(+) create mode 100644 libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs new file mode 100644 index 00000000000..b858c4431bc --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs @@ -0,0 +1,137 @@ +use core::{ + alloc::Allocator, + ops::{Index, IndexMut}, +}; + +use hashql_core::r#type::environment::Environment; + +use super::TraversalPathBitSet; +use crate::{ + body::{ + basic_block::BasicBlockId, + basic_blocks::BasicBlocks, + local::{Local, LocalDecl, LocalSlice}, + location::Location, + place::{DefUse, Place, PlaceContext}, + }, + pass::execution::{ + VertexType, block_partitioned_vec::BlockPartitionedVec, traversal::EntityPath, + }, + visit::{self, Visitor}, +}; +/// Per-statement resolved traversal paths for a graph read filter body. +/// +/// Stores a [`TraversalPathBitSet`] for every statement position, recording which vertex +/// fields each statement accesses. Indexed by [`Location`] (1-based statement index). +pub struct Traversals { + inner: BlockPartitionedVec, +} + +impl Traversals { + /// Creates a traversal map with space for all statements in the given blocks. + /// + /// All positions are initialized to an empty bitset for the given vertex type. + #[expect(clippy::cast_possible_truncation)] + pub(crate) fn new_in(blocks: &BasicBlocks, vertex: VertexType, alloc: A) -> Self { + Self { + inner: BlockPartitionedVec::new( + blocks.iter().map(|block| block.statements.len() as u32), + TraversalPathBitSet::empty(vertex), + alloc, + ), + } + } +} + +impl Traversals { + /// Returns the traversal path sets for all statements in `block`. + /// + /// The returned slice is indexed by statement position (0-based within the block). + pub(crate) fn of(&self, block: BasicBlockId) -> &[TraversalPathBitSet] { + self.inner.of(block) + } + + /// Returns a mutable slice of traversal path sets for all statements in `block`. + pub(crate) fn of_mut(&mut self, block: BasicBlockId) -> &mut [TraversalPathBitSet] { + self.inner.of_mut(block) + } + + /// Rebuilds the offset table for a new block layout. + /// + /// Call after transforms that change statement counts per block. Does not resize or clear + /// the data; callers must ensure the total statement count remains unchanged. + #[expect(clippy::cast_possible_truncation)] + pub(crate) fn remap(&mut self, blocks: &BasicBlocks) + where + A: Clone, + { + self.inner + .remap(blocks.iter().map(|block| block.statements.len() as u32)); + } +} + +impl Index for Traversals { + type Output = TraversalPathBitSet; + + fn index(&self, index: Location) -> &Self::Output { + &self.inner.of(index.block)[index.statement_index - 1] + } +} + +impl IndexMut for Traversals { + fn index_mut(&mut self, index: Location) -> &mut Self::Output { + &mut self.inner.of_mut(index.block)[index.statement_index - 1] + } +} + +struct TraversalAnalysisVisitor<'env, 'heap, A: Allocator> { + env: &'env Environment<'heap>, + vertex: VertexType, + traversals: Traversals, + locals: &'env LocalSlice>, +} + +impl<'heap, A: Allocator> Visitor<'heap> for TraversalAnalysisVisitor<'_, 'heap, A> { + type Result = Result<(), !>; + + fn visit_place( + &mut self, + location: Location, + context: PlaceContext, + place: &Place<'heap>, + ) -> Self::Result { + if place.local != Local::VERTEX { + // We do not target the vertex itself, so no traversals need to be recorded. + return Ok(()); + } + + if context.into_def_use() != Some(DefUse::Use) { + // We're only interested in `DefUse::Use` + return Ok(()); + } + + match self.vertex { + VertexType::Entity => { + let current = self.traversals[location] + .as_entity_mut() + .unwrap_or_else(|| { + unreachable!("a graph body cannot traverse over multiple types") + }); + + let path = EntityPath::resolve(&place.projections); + + if let Some((path, _)) = path { + current.insert(path); + } else { + // The path leads to "nothing", indicating that we must hydrate the entire + // entity. + current.insert_range(..); + } + } + } + + visit::r#ref::walk_place(self, location, context, place) + } +} + +// TODO: proper pass that goes over the basic blocks, and does all the required stuff diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index 567e2abbf0d..b57fa0190ba 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -10,11 +10,13 @@ mod access; mod entity; +mod analysis; #[cfg(test)] mod tests; pub(crate) use self::access::Access; pub use self::entity::{EntityPath, EntityPathBitSet}; +use super::VertexType; /// Set of resolved traversal paths for a single vertex type. /// @@ -31,6 +33,40 @@ pub enum TraversalPathBitSet { Entity(EntityPathBitSet), } +#[expect( + clippy::unnecessary_wraps, + reason = "currently only entities are supported, this will change in the future" +)] +impl TraversalPathBitSet { + /// Creates an empty bitset for the given vertex type. + #[must_use] + pub const fn empty(vertex: VertexType) -> Self { + match vertex { + #[expect(clippy::cast_possible_truncation)] + VertexType::Entity => Self::Entity(EntityPathBitSet::new_empty( + core::mem::variant_count::() as u32, + )), + } + } + + /// Returns the inner [`EntityPathBitSet`] if this is the [`Entity`](Self::Entity) variant. + #[must_use] + pub const fn as_entity(&self) -> Option<&EntityPathBitSet> { + match self { + Self::Entity(bitset) => Some(bitset), + } + } + + /// Returns a mutable reference to the inner [`EntityPathBitSet`] if this is the + /// [`Entity`](Self::Entity) variant. + #[must_use] + pub const fn as_entity_mut(&mut self) -> Option<&mut EntityPathBitSet> { + match self { + Self::Entity(bitset) => Some(bitset), + } + } +} + /// A single resolved traversal path for a specific vertex type. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum TraversalPath { From 87b184fb3bed6d5f8ae59edcfc4494d6c09ca0d9 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sat, 28 Feb 2026 21:33:38 +0100 Subject: [PATCH 08/32] feat: checkpoint --- .../pass/analysis/dataflow/liveness/mod.rs | 37 ++++++++++++++++-- .../hashql/mir/src/pass/execution/mod.rs | 13 ++++--- .../statement_placement/embedding/mod.rs | 15 +++----- .../statement_placement/interpret/mod.rs | 12 +++--- .../pass/execution/statement_placement/mod.rs | 14 +++---- .../statement_placement/postgres/mod.rs | 15 +++----- .../src/pass/execution/traversal/analysis.rs | 38 ++++++++++++++++++- .../mir/src/pass/execution/traversal/mod.rs | 2 + 8 files changed, 105 insertions(+), 41 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs index df60f07649f..1deed3f8fc1 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs @@ -68,12 +68,15 @@ use crate::{ Body, local::Local, location::Location, - place::{DefUse, PlaceContext}, + place::{DefUse, Place, PlaceContext}, statement::{Assign, Statement, StatementKind}, terminator::Terminator, }, - pass::transform::Traversals, - visit::Visitor, + pass::{ + execution::{VertexType, traversal::EntityPath}, + transform::Traversals, + }, + visit::{self, Visitor}, }; /// Traversal-aware liveness analysis. @@ -164,6 +167,34 @@ impl Visitor<'_> for TraversalTransferFunction<'_> { Ok(()) } + + fn visit_place( + &mut self, + location: Location, + context: PlaceContext, + place: &Place<'_>, + ) -> Self::Result { + let Some(def_use) = context.into_def_use() else { + return Ok(()); + }; + + let vertex = VertexType::Entity; // TODO: actually do this properly + + // Check if the place is a vertex, and the vertex type results in a partial result, in that + // case we do *not* continue, because it is considered a partial traversal and does not + // contribute to the liveness analysis of the partially hydrated entity. + if def_use == DefUse::Use + && place.local == Local::VERTEX + && EntityPath::resolve(&place.projections).is_some() + { + // This is a *valid* partial traversal, and does therefore not contribute to the full + // liveness of the entity. (This is required to ensure that we're not evaluating the + // full size of the entity on transition if we don't need it.) + return Ok(()); + } + + visit::r#ref::walk_place(self, location, context, place) + } } /// Computes liveness information for all locals in a MIR body. diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index f7b0bc0a474..69b85864147 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -38,6 +38,7 @@ use self::{ statement_placement::{StatementPlacement as _, TargetPlacementStatement}, target::TargetArray, terminator_placement::TerminatorPlacement, + traversal::TraversalAnalysis, }; use super::{analysis::size_estimation::BodyFootprint, transform::Traversals}; use crate::{ @@ -64,11 +65,10 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { ) { assert_matches!(body.source, Source::GraphReadFilter(_)); - let traversals = self - .traversals - .lookup(body.id) - .unwrap_or_else(|| unreachable!()); + let mut traversals = TraversalAnalysis::traversal_analysis_in(context, body, &self.scratch); + // TODO: This is no longer fully needed, instead each target array should create a cost + // estimation, based on retrieval cost, not(!) size, for each item in the id. let mut traversal_costs: TargetArray<_> = TargetArray::from_fn(|_| None); let mut statement_costs: TargetArray<_> = TargetArray::from_fn(|_| None); @@ -79,7 +79,7 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { let mut statement = TargetPlacementStatement::new_in(target, &traversal_costs, &self.scratch); let (traversal_cost, statement_cost) = - statement.statement_placement_in(context, body, traversals, &self.scratch); + statement.statement_placement_in(context, body, &traversals, &self.scratch); traversal_costs[target] = Some(traversal_cost); statement_costs[target] = Some(statement_cost); @@ -95,6 +95,9 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { &self.scratch, ); + // The body has been split (sequentially) and like the statement costs needs to be remapped + traversals.remap(&body.basic_blocks); + let terminators = TerminatorPlacement::new_in(InformationRange::full(), &self.scratch); let mut terminator_costs = terminators.terminator_placement_in( body, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs index cdb2205cd82..4f644bff34f 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs @@ -9,14 +9,11 @@ use super::{ use crate::{ body::{Body, Source, local::Local, operand::Operand, place::Place, rvalue::RValue}, context::MirContext, - pass::{ - execution::{ - Cost, VertexType, - cost::{StatementCostVec, TraversalCostVec}, - statement_placement::common::entity_projection_access, - traversal::Access, - }, - transform::Traversals, + pass::execution::{ + Cost, VertexType, + cost::{StatementCostVec, TraversalCostVec}, + statement_placement::common::entity_projection_access, + traversal::{Access, Traversals}, }, visit::Visitor as _, }; @@ -110,7 +107,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals<'heap>, + traversals: &Traversals, alloc: A, ) -> (TraversalCostVec, StatementCostVec) { let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc.clone()); diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs index bd2acf00b89..450bb3abba7 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs @@ -8,12 +8,10 @@ use crate::{ statement::{Assign, Statement, StatementKind}, }, context::MirContext, - pass::{ - execution::{ - cost::{Cost, StatementCostVec, TraversalCostVec}, - target::TargetArray, - }, - transform::Traversals, + pass::execution::{ + cost::{Cost, StatementCostVec, TraversalCostVec}, + target::TargetArray, + traversal::Traversals, }, visit::Visitor, }; @@ -92,7 +90,7 @@ impl<'heap, A: Allocator + Clone, B: Allocator> StatementPlacement<'heap, A> &mut self, _: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals<'heap>, + traversals: &Traversals, alloc: A, ) -> (TraversalCostVec, StatementCostVec) { let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc.clone()); diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs index d39c0c77200..a9fc3e0dd0d 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs @@ -23,14 +23,14 @@ pub(crate) use self::{ embedding::EmbeddingStatementPlacement, interpret::InterpreterStatementPlacement, postgres::PostgresStatementPlacement, }; -use super::target::{TargetArray, TargetId}; +use super::{ + target::{TargetArray, TargetId}, + traversal::Traversals, +}; use crate::{ body::Body, context::MirContext, - pass::{ - execution::cost::{StatementCostVec, TraversalCostVec}, - transform::Traversals, - }, + pass::execution::cost::{StatementCostVec, TraversalCostVec}, }; /// Computes statement placement costs for a specific execution target. @@ -55,7 +55,7 @@ pub(crate) trait StatementPlacement<'heap, A: Allocator> { &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals<'heap>, + traversals: &Traversals, alloc: A, ) -> (TraversalCostVec, StatementCostVec); } @@ -91,7 +91,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals<'heap>, + traversals: &Traversals, alloc: A, ) -> (TraversalCostVec, StatementCostVec) { match self { diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs index 92d2a82ef25..d12f06f8141 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs @@ -27,14 +27,11 @@ use crate::{ rvalue::{Aggregate, AggregateKind, BinOp, Binary, RValue, Unary}, }, context::MirContext, - pass::{ - execution::{ - VertexType, - cost::{Cost, StatementCostVec, TraversalCostVec}, - statement_placement::common::entity_projection_access, - traversal::Access, - }, - transform::Traversals, + pass::execution::{ + VertexType, + cost::{Cost, StatementCostVec, TraversalCostVec}, + statement_placement::common::entity_projection_access, + traversal::{Access, Traversals}, }, visit::Visitor as _, }; @@ -711,7 +708,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals<'heap>, + traversals: &Traversals, alloc: A, ) -> (TraversalCostVec, StatementCostVec) { let traversal_costs = TraversalCostVec::new_in(body, traversals, alloc.clone()); diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs index b858c4431bc..5fe6f81b3da 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs @@ -8,12 +8,14 @@ use hashql_core::r#type::environment::Environment; use super::TraversalPathBitSet; use crate::{ body::{ + Body, Source, basic_block::BasicBlockId, basic_blocks::BasicBlocks, local::{Local, LocalDecl, LocalSlice}, location::Location, place::{DefUse, Place, PlaceContext}, }, + context::MirContext, pass::execution::{ VertexType, block_partitioned_vec::BlockPartitionedVec, traversal::EntityPath, }, @@ -23,7 +25,7 @@ use crate::{ /// /// Stores a [`TraversalPathBitSet`] for every statement position, recording which vertex /// fields each statement accesses. Indexed by [`Location`] (1-based statement index). -pub struct Traversals { +pub(crate) struct Traversals { inner: BlockPartitionedVec, } @@ -134,4 +136,38 @@ impl<'heap, A: Allocator> Visitor<'heap> for TraversalAnalysisVisitor<'_, 'heap, } } +pub(crate) struct TraversalAnalysis; + +impl TraversalAnalysis { + pub(crate) fn traversal_analysis_in<'heap, A: Allocator + Clone>( + context: &MirContext<'_, 'heap>, + body: &Body<'heap>, + alloc: A, + ) -> Traversals { + match body.source { + Source::GraphReadFilter(_) => {} + Source::Ctor(_) | Source::Closure(..) | Source::Thunk(..) | Source::Intrinsic(_) => { + panic!("traversal analysis may only be called on graph related operations") + } + }; + + let Some(vertex) = VertexType::from_local(context.env, &body.local_decls[Local::VERTEX]) + else { + unimplemented!("lookup for declared type") + }; + + let traversals = Traversals::new_in(&body.basic_blocks, vertex, alloc); + + let mut visitor = TraversalAnalysisVisitor { + env: context.env, + vertex, + traversals, + locals: &body.local_decls, + }; + Ok(()) = visitor.visit_body(body); + + visitor.traversals + } +} + // TODO: proper pass that goes over the basic blocks, and does all the required stuff diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index b57fa0190ba..7ac83174bc8 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -14,6 +14,8 @@ mod analysis; #[cfg(test)] mod tests; +pub(crate) use analysis::{TraversalAnalysis, Traversals}; + pub(crate) use self::access::Access; pub use self::entity::{EntityPath, EntityPathBitSet}; use super::VertexType; From 4fdcf6396db1de7d30cbc2a77a5bb36f8481e540 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 13:03:40 +0100 Subject: [PATCH 09/32] feat: checkpoint --- libs/@local/hashql/mir/src/lib.rs | 1 + .../pass/analysis/dataflow/lattice/impls.rs | 13 ++ .../pass/analysis/dataflow/liveness/mod.rs | 2 +- .../hashql/mir/src/pass/execution/cost.rs | 74 +------- .../hashql/mir/src/pass/execution/mod.rs | 67 ++++---- .../execution/statement_placement/common.rs | 9 +- .../statement_placement/embedding/mod.rs | 14 +- .../statement_placement/interpret/mod.rs | 56 +++---- .../pass/execution/statement_placement/mod.rs | 33 ++-- .../statement_placement/postgres/mod.rs | 12 +- .../src/pass/execution/traversal/analysis.rs | 30 ++-- .../src/pass/execution/traversal/entity.rs | 158 +++++++++++++++++- .../mir/src/pass/execution/traversal/mod.rs | 35 +++- .../mir/src/pass/execution/traversal/tests.rs | 132 ++++++++++++++- 14 files changed, 432 insertions(+), 204 deletions(-) diff --git a/libs/@local/hashql/mir/src/lib.rs b/libs/@local/hashql/mir/src/lib.rs index f3b4006cd96..926e4807bab 100644 --- a/libs/@local/hashql/mir/src/lib.rs +++ b/libs/@local/hashql/mir/src/lib.rs @@ -2,6 +2,7 @@ //! //! ## Workspace dependencies #![cfg_attr(doc, doc = simple_mermaid::mermaid!("../docs/dependency-diagram.mmd"))] +#![allow(unused)] #![feature( // Language Features associated_type_defaults, diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs index df40960b7d3..e072b375ff6 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs @@ -195,6 +195,19 @@ macro_rules! impl_bitset { impl_bitset!(DenseBitSet, ChunkedBitSet, MixedBitSet); +impl HasBottom<(T, U)> for V +where + V: HasBottom + HasBottom, +{ + fn bottom(&self) -> (T, U) { + (self.bottom(), self.bottom()) + } + + fn is_bottom(&self, value: &(T, U)) -> bool { + self.is_bottom(&value.0) && self.is_bottom(&value.1) + } +} + impl MeetSemiLattice for Reverse where U: JoinSemiLattice, diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs index 1deed3f8fc1..0b1c7cdf725 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs @@ -178,7 +178,7 @@ impl Visitor<'_> for TraversalTransferFunction<'_> { return Ok(()); }; - let vertex = VertexType::Entity; // TODO: actually do this properly + let _vertex = VertexType::Entity; // TODO: actually do this properly // Check if the place is a vertex, and the vertex type results in a partial result, in that // case we do *not* continue, because it is considered a partial traversal and does not diff --git a/libs/@local/hashql/mir/src/pass/execution/cost.rs b/libs/@local/hashql/mir/src/pass/execution/cost.rs index 5ad56b55ef0..ec3e5ab350d 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost.rs @@ -12,19 +12,10 @@ use core::{ }; use std::f32; -use hashql_core::id::bit_vec::DenseBitSet; - use super::block_partitioned_vec::BlockPartitionedVec; use crate::{ - body::{ - Body, - basic_block::BasicBlockId, - basic_blocks::BasicBlocks, - local::{Local, LocalVec}, - location::Location, - }, + body::{basic_block::BasicBlockId, basic_blocks::BasicBlocks, location::Location}, macros::{forward_ref_binop, forward_ref_op_assign}, - pass::transform::Traversals, }; /// Execution cost for a statement on a particular target. @@ -117,6 +108,14 @@ impl Cost { Self::new_saturating(raw.saturating_add(other.0.as_inner())) } + #[inline] + #[must_use] + pub const fn saturating_mul(self, other: u32) -> Self { + let raw = self.0.as_inner(); + + Self::new_saturating(raw.saturating_mul(other)) + } + #[expect(clippy::cast_precision_loss)] #[inline] #[must_use] @@ -309,61 +308,6 @@ impl Sum for ApproxCost { } } -/// Sparse cost map for traversal locals. -/// -/// Traversals are locals that require data fetching from a backend (e.g., entity field access). -/// This map only stores costs for locals marked as traversals; insertions for non-traversal -/// locals are ignored. This allows the execution planner to focus on the operations that actually -/// require backend coordination. -pub struct TraversalCostVec { - traversals: DenseBitSet, - costs: LocalVec, A>, -} - -impl TraversalCostVec { - /// Creates an empty traversal cost map for the given body. - /// - /// Only locals that are enabled traversals (per [`Traversals::enabled`]) will accept cost - /// insertions; other locals are silently ignored. - pub fn new_in<'heap>(body: &Body<'heap>, traversals: &Traversals<'heap>, alloc: A) -> Self { - Self { - traversals: traversals.enabled(body), - costs: LocalVec::new_in(alloc), - } - } - - /// Returns the cost assigned to `local`, or `None` if unassigned or not a traversal. - pub fn get(&self, local: Local) -> Option { - self.costs.lookup(local).copied() - } - - /// Records a cost for a traversal local. - /// - /// If `local` is not a traversal, the insertion is silently ignored. - pub fn insert(&mut self, local: Local, cost: Cost) { - if self.traversals.contains(local) { - self.costs.insert(local, cost); - } - } - - /// Iterates over all (local, cost) pairs that have assigned costs. - pub fn iter(&self) -> impl Iterator { - self.costs - .iter_enumerated() - .filter_map(|(local, cost)| cost.map(|cost| (local, cost))) - } -} - -impl IntoIterator for &TraversalCostVec { - type Item = (Local, Cost); - - type IntoIter = impl Iterator; - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - /// Dense cost map for all statements in a body. /// /// Stores the execution cost for every statement, indexed by [`Location`]. A `None` cost diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 69b85864147..5a162e99b9d 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -31,13 +31,9 @@ pub use self::{ vertex::VertexType, }; use self::{ - fusion::BasicBlockFusion, - island::IslandPlacement, - placement::{ArcConsistency, PlacementSolverContext}, splitting::BasicBlockSplitting, statement_placement::{StatementPlacement as _, TargetPlacementStatement}, target::TargetArray, - terminator_placement::TerminatorPlacement, traversal::TraversalAnalysis, }; use super::{analysis::size_estimation::BodyFootprint, transform::Traversals}; @@ -45,7 +41,6 @@ use crate::{ body::{Body, Source, basic_block::BasicBlockVec}, context::MirContext, def::DefIdSlice, - pass::analysis::size_estimation::InformationRange, }; pub struct ExecutionAnalysis<'ctx, 'heap, S: Allocator> { @@ -67,28 +62,23 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { let mut traversals = TraversalAnalysis::traversal_analysis_in(context, body, &self.scratch); - // TODO: This is no longer fully needed, instead each target array should create a cost - // estimation, based on retrieval cost, not(!) size, for each item in the id. - let mut traversal_costs: TargetArray<_> = TargetArray::from_fn(|_| None); let mut statement_costs: TargetArray<_> = TargetArray::from_fn(|_| None); let mut targets = TargetId::all(); targets.reverse(); // We reverse the order, so that earlier targets (aka the interpreter) can have access to traversal costs for target in targets { - let mut statement = - TargetPlacementStatement::new_in(target, &traversal_costs, &self.scratch); - let (traversal_cost, statement_cost) = + let mut statement = TargetPlacementStatement::new_in(target, &self.scratch); + let statement_cost = statement.statement_placement_in(context, body, &traversals, &self.scratch); - traversal_costs[target] = Some(traversal_cost); statement_costs[target] = Some(statement_cost); } let mut statement_costs = statement_costs.map(|cost| cost.unwrap_or_else(|| unreachable!())); - let mut possibilities = BasicBlockSplitting::new_in(&self.scratch).split_in( + let _possibilities = BasicBlockSplitting::new_in(&self.scratch).split_in( context, body, &mut statement_costs, @@ -98,35 +88,38 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { // The body has been split (sequentially) and like the statement costs needs to be remapped traversals.remap(&body.basic_blocks); - let terminators = TerminatorPlacement::new_in(InformationRange::full(), &self.scratch); - let mut terminator_costs = terminators.terminator_placement_in( - body, - &self.footprints[body.id], - traversals, - &possibilities, - &self.scratch, - ); + todo!() - ArcConsistency { - blocks: &mut possibilities, - terminators: &mut terminator_costs, - } - .run_in(body, &self.scratch); + // let terminators = TerminatorPlacement::new_in(InformationRange::full(), &self.scratch); + // let mut terminator_costs = terminators.terminator_placement_in( + // body, + // &self.footprints[body.id], + // traversals, + // &possibilities, + // &self.scratch, + // ); - let mut solver = PlacementSolverContext { - assignment: &possibilities, - statements: &statement_costs, - terminators: &terminator_costs, - } - .build_in(body, &self.scratch); + // ArcConsistency { + // blocks: &mut possibilities, + // terminators: &mut terminator_costs, + // } + // .run_in(body, &self.scratch); + + // let mut solver = PlacementSolverContext { + // assignment: &possibilities, + // statements: &statement_costs, + // terminators: &terminator_costs, + // } + // .build_in(body, &self.scratch); - let mut assignment = solver.run(context, body); + // let mut assignment = solver.run(context, body); - let fusion = BasicBlockFusion::new_in(&self.scratch); - fusion.fuse(body, &mut assignment); + // let fusion = BasicBlockFusion::new_in(&self.scratch); + // fusion.fuse(body, &mut assignment); - let islands = IslandPlacement::new_in(&self.scratch).run(body, &assignment, context.heap); + // let islands = IslandPlacement::new_in(&self.scratch).run(body, &assignment, + // context.heap); - (assignment, islands) + // (assignment, islands) } } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs index da9c84cb245..aaf098cf66b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs @@ -29,7 +29,7 @@ use crate::{ }, execution::{ Cost, - cost::{StatementCostVec, TraversalCostVec}, + cost::StatementCostVec, traversal::{Access, EntityPath}, }, }, @@ -276,7 +276,6 @@ pub(crate) struct CostVisitor<'ctx, 'env, 'heap, S, A: Allocator> { pub cost: Cost, pub statement_costs: StatementCostVec, - pub traversal_costs: TraversalCostVec, pub supported: S, } @@ -305,12 +304,6 @@ where )) .then_some(self.cost); - if let Some(cost) = cost - && lhs.projections.is_empty() - { - self.traversal_costs.insert(lhs.local, cost); - } - self.statement_costs[location] = cost; } StatementKind::StorageDead(_) | StatementKind::StorageLive(_) | StatementKind::Nop => { diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs index 4f644bff34f..170d18c6969 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs @@ -11,7 +11,7 @@ use crate::{ context::MirContext, pass::execution::{ Cost, VertexType, - cost::{StatementCostVec, TraversalCostVec}, + cost::StatementCostVec, statement_placement::common::entity_projection_access, traversal::{Access, Traversals}, }, @@ -107,16 +107,15 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals, + _traversals: &Traversals, alloc: A, - ) -> (TraversalCostVec, StatementCostVec) { - let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc.clone()); - let traversal_costs = TraversalCostVec::new_in(body, traversals, alloc); + ) -> StatementCostVec { + let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); match body.source { Source::GraphReadFilter(_) => {} Source::Ctor(_) | Source::Closure(..) | Source::Thunk(..) | Source::Intrinsic(_) => { - return (traversal_costs, statement_costs); + return statement_costs; } } @@ -152,12 +151,11 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> cost: self.statement_cost, statement_costs, - traversal_costs, supported: &EmbeddingSupported, }; visitor.visit_body(body); - (visitor.traversal_costs, visitor.statement_costs) + visitor.statement_costs } } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs index 450bb3abba7..28270754328 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs @@ -9,8 +9,7 @@ use crate::{ }, context::MirContext, pass::execution::{ - cost::{Cost, StatementCostVec, TraversalCostVec}, - target::TargetArray, + cost::{Cost, StatementCostVec}, traversal::Traversals, }, visit::Visitor, @@ -19,14 +18,15 @@ use crate::{ #[cfg(test)] mod tests; -struct CostVisitor<'ctx, A: Allocator, B: Allocator> { +struct CostVisitor<'ctx, A: Allocator> { cost: Cost, + traversal_overhead: Cost, statement_costs: StatementCostVec, - traversal_costs: &'ctx TargetArray>>, + traversals: &'ctx Traversals, } -impl<'heap, A: Allocator, B: Allocator> Visitor<'heap> for CostVisitor<'_, A, B> { +impl<'heap, A: Allocator> Visitor<'heap> for CostVisitor<'_, A> { type Result = Result<(), !>; fn visit_statement( @@ -39,15 +39,15 @@ impl<'heap, A: Allocator, B: Allocator> Visitor<'heap> for CostVisitor<'_, A, B> StatementKind::Assign(Assign { lhs, rhs: _ }) => { // If it's a traversal load (aka we add the interpreter cost, as well as the cost to // load the statement). We assume worst case for the traversal. - let cost = if lhs.projections.is_empty() - && let Some(cost) = self - .traversal_costs - .iter() - .filter_map(|costs| costs.as_ref()) - .filter_map(|costs| costs.get(lhs.local)) - .max() - { - self.cost.saturating_add(cost) + #[expect( + clippy::cast_possible_truncation, + reason = "variant count is under u32::MAX" + )] + let cost = if lhs.projections.is_empty() { + self.cost.saturating_add( + self.traversal_overhead + .saturating_mul(self.traversals.path_count(location) as u32), + ) } else { self.cost }; @@ -67,49 +67,45 @@ impl<'heap, A: Allocator, B: Allocator> Visitor<'heap> for CostVisitor<'_, A, B> /// target. /// /// Supports all statements unconditionally, serving as the universal fallback. -pub(crate) struct InterpreterStatementPlacement<'ctx, A: Allocator> { - traversal_costs: &'ctx TargetArray>>, +pub(crate) struct InterpreterStatementPlacement { + traversal_overhead: Cost, statement_cost: Cost, } -impl<'ctx, A: Allocator> InterpreterStatementPlacement<'ctx, A> { - pub(crate) const fn new( - traversal_costs: &'ctx TargetArray>>, - ) -> Self { +impl InterpreterStatementPlacement { + pub(crate) const fn new() -> Self { Self { - traversal_costs, + traversal_overhead: cost!(4), statement_cost: cost!(8), } } } -impl<'heap, A: Allocator + Clone, B: Allocator> StatementPlacement<'heap, A> - for InterpreterStatementPlacement<'_, B> -{ +impl<'heap, A: Allocator + Clone> StatementPlacement<'heap, A> for InterpreterStatementPlacement { fn statement_placement_in( &mut self, _: &MirContext<'_, 'heap>, body: &Body<'heap>, traversals: &Traversals, alloc: A, - ) -> (TraversalCostVec, StatementCostVec) { - let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc.clone()); - let traversal_costs = TraversalCostVec::new_in(body, traversals, alloc); + ) -> StatementCostVec { + let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); match body.source { Source::GraphReadFilter(_) => {} Source::Ctor(_) | Source::Closure(..) | Source::Thunk(..) | Source::Intrinsic(_) => { - return (traversal_costs, statement_costs); + return statement_costs; } } let mut visitor = CostVisitor { cost: self.statement_cost, statement_costs, - traversal_costs: self.traversal_costs, + traversal_overhead: self.traversal_overhead, + traversals, }; visitor.visit_body(body); - (traversal_costs, visitor.statement_costs) + visitor.statement_costs } } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs index a9fc3e0dd0d..e7f7d8e7af7 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs @@ -23,15 +23,8 @@ pub(crate) use self::{ embedding::EmbeddingStatementPlacement, interpret::InterpreterStatementPlacement, postgres::PostgresStatementPlacement, }; -use super::{ - target::{TargetArray, TargetId}, - traversal::Traversals, -}; -use crate::{ - body::Body, - context::MirContext, - pass::execution::cost::{StatementCostVec, TraversalCostVec}, -}; +use super::{target::TargetId, traversal::Traversals}; +use crate::{body::Body, context::MirContext, pass::execution::cost::StatementCostVec}; /// Computes statement placement costs for a specific execution target. /// @@ -57,26 +50,20 @@ pub(crate) trait StatementPlacement<'heap, A: Allocator> { body: &Body<'heap>, traversals: &Traversals, alloc: A, - ) -> (TraversalCostVec, StatementCostVec); + ) -> StatementCostVec; } -pub(crate) enum TargetPlacementStatement<'ctx, 'heap, S: Allocator> { - Interpreter(InterpreterStatementPlacement<'ctx, S>), +pub(crate) enum TargetPlacementStatement<'heap, S: Allocator> { + Interpreter(InterpreterStatementPlacement), Postgres(PostgresStatementPlacement<'heap, S>), Embedding(EmbeddingStatementPlacement), } -impl<'ctx, S: Allocator + Clone> TargetPlacementStatement<'ctx, '_, S> { +impl TargetPlacementStatement<'_, S> { #[must_use] - pub(crate) fn new_in( - target: TargetId, - traversals: &'ctx TargetArray>>, - scratch: S, - ) -> Self { + pub(crate) fn new_in(target: TargetId, scratch: S) -> Self { match target { - TargetId::Interpreter => { - Self::Interpreter(InterpreterStatementPlacement::new(traversals)) - } + TargetId::Interpreter => Self::Interpreter(InterpreterStatementPlacement::new()), TargetId::Postgres => Self::Postgres(PostgresStatementPlacement::new_in(scratch)), TargetId::Embedding => Self::Embedding(EmbeddingStatementPlacement::new_in(scratch)), } @@ -84,7 +71,7 @@ impl<'ctx, S: Allocator + Clone> TargetPlacementStatement<'ctx, '_, S> { } impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> - for TargetPlacementStatement<'_, 'heap, S> + for TargetPlacementStatement<'heap, S> { #[inline] fn statement_placement_in( @@ -93,7 +80,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> body: &Body<'heap>, traversals: &Traversals, alloc: A, - ) -> (TraversalCostVec, StatementCostVec) { + ) -> StatementCostVec { match self { TargetPlacementStatement::Interpreter(placement) => { placement.statement_placement_in(context, body, traversals, alloc) diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs index d12f06f8141..e67ba18cf38 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs @@ -29,7 +29,7 @@ use crate::{ context::MirContext, pass::execution::{ VertexType, - cost::{Cost, StatementCostVec, TraversalCostVec}, + cost::{Cost, StatementCostVec}, statement_placement::common::entity_projection_access, traversal::{Access, Traversals}, }, @@ -708,16 +708,15 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals, + _: &Traversals, alloc: A, - ) -> (TraversalCostVec, StatementCostVec) { - let traversal_costs = TraversalCostVec::new_in(body, traversals, alloc.clone()); + ) -> StatementCostVec { let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); match body.source { Source::GraphReadFilter(_) => {} Source::Ctor(_) | Source::Closure(..) | Source::Thunk(..) | Source::Intrinsic(_) => { - return (traversal_costs, statement_costs); + return statement_costs; } } @@ -752,12 +751,11 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> cost: self.statement_cost, statement_costs, - traversal_costs, supported: &supported, }; visitor.visit_body(body); - (visitor.traversal_costs, visitor.statement_costs) + visitor.statement_costs } } diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs index 5fe6f81b3da..8a405676ddd 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs @@ -3,15 +3,13 @@ use core::{ ops::{Index, IndexMut}, }; -use hashql_core::r#type::environment::Environment; - use super::TraversalPathBitSet; use crate::{ body::{ Body, Source, basic_block::BasicBlockId, basic_blocks::BasicBlocks, - local::{Local, LocalDecl, LocalSlice}, + local::Local, location::Location, place::{DefUse, Place, PlaceContext}, }, @@ -58,6 +56,13 @@ impl Traversals { self.inner.of_mut(block) } + /// Returns the number of vertex paths accessed by the statement at `location`. + #[inline] + #[must_use] + pub(crate) fn path_count(&self, location: Location) -> usize { + self[location].len() + } + /// Rebuilds the offset table for a new block layout. /// /// Call after transforms that change statement counts per block. Does not resize or clear @@ -86,14 +91,12 @@ impl IndexMut for Traversals { } } -struct TraversalAnalysisVisitor<'env, 'heap, A: Allocator> { - env: &'env Environment<'heap>, +struct TraversalAnalysisVisitor { vertex: VertexType, traversals: Traversals, - locals: &'env LocalSlice>, } -impl<'heap, A: Allocator> Visitor<'heap> for TraversalAnalysisVisitor<'_, 'heap, A> { +impl<'heap, A: Allocator> Visitor<'heap> for TraversalAnalysisVisitor { type Result = Result<(), !>; fn visit_place( @@ -127,7 +130,7 @@ impl<'heap, A: Allocator> Visitor<'heap> for TraversalAnalysisVisitor<'_, 'heap, } else { // The path leads to "nothing", indicating that we must hydrate the entire // entity. - current.insert_range(..); + current.insert_all(); } } } @@ -149,7 +152,7 @@ impl TraversalAnalysis { Source::Ctor(_) | Source::Closure(..) | Source::Thunk(..) | Source::Intrinsic(_) => { panic!("traversal analysis may only be called on graph related operations") } - }; + } let Some(vertex) = VertexType::from_local(context.env, &body.local_decls[Local::VERTEX]) else { @@ -158,16 +161,9 @@ impl TraversalAnalysis { let traversals = Traversals::new_in(&body.basic_blocks, vertex, alloc); - let mut visitor = TraversalAnalysisVisitor { - env: context.env, - vertex, - traversals, - locals: &body.local_decls, - }; + let mut visitor = TraversalAnalysisVisitor { vertex, traversals }; Ok(()) = visitor.visit_body(body); visitor.traversals } } - -// TODO: proper pass that goes over the basic blocks, and does all the required stuff diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index c227f170a40..e46902ce8b4 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -99,8 +99,6 @@ const _: () = { ); }; -pub type EntityPathBitSet = FiniteBitSet; - impl EntityPath { #[must_use] pub fn resolve(projections: &[Projection<'_>]) -> Option<(Self, usize)> { @@ -140,6 +138,78 @@ impl EntityPath { } } + /// Returns the transitive children of this path in the composite hierarchy. + /// + /// Composites cover their children: [`RecordId`](Self::RecordId) covers + /// [`EntityId`](Self::EntityId) and all of its children, plus [`EditionId`](Self::EditionId). + /// Leaf paths return an empty slice. + const fn children(self) -> &'static [Self] { + match self { + Self::RecordId => &[ + Self::EntityId, + Self::WebId, + Self::EntityUuid, + Self::DraftId, + Self::EditionId, + ], + Self::EntityId => &[Self::WebId, Self::EntityUuid, Self::DraftId], + Self::TemporalVersioning => &[Self::DecisionTime, Self::TransactionTime], + Self::Properties + | Self::Vectors + | Self::WebId + | Self::EntityUuid + | Self::DraftId + | Self::EditionId + | Self::DecisionTime + | Self::TransactionTime + | Self::EntityTypeIds + | Self::Archived + | Self::Confidence + | Self::ProvenanceInferred + | Self::ProvenanceEdition + | Self::PropertyMetadata + | Self::LeftEntityWebId + | Self::LeftEntityUuid + | Self::RightEntityWebId + | Self::RightEntityUuid + | Self::LeftEntityConfidence + | Self::RightEntityConfidence + | Self::LeftEntityProvenance + | Self::RightEntityProvenance => &[], + } + } + + /// Returns the ancestor composites of this path, nearest first. + /// + /// For example, [`WebId`](Self::WebId) has ancestors + /// [`EntityId`](Self::EntityId) and [`RecordId`](Self::RecordId). + /// Top-level paths return an empty slice. + const fn ancestors(self) -> &'static [Self] { + match self { + Self::WebId | Self::EntityUuid | Self::DraftId => &[Self::EntityId, Self::RecordId], + Self::EntityId | Self::EditionId => &[Self::RecordId], + Self::DecisionTime | Self::TransactionTime => &[Self::TemporalVersioning], + Self::Properties + | Self::Vectors + | Self::RecordId + | Self::TemporalVersioning + | Self::EntityTypeIds + | Self::Archived + | Self::Confidence + | Self::ProvenanceInferred + | Self::ProvenanceEdition + | Self::PropertyMetadata + | Self::LeftEntityWebId + | Self::LeftEntityUuid + | Self::RightEntityWebId + | Self::RightEntityUuid + | Self::LeftEntityConfidence + | Self::RightEntityConfidence + | Self::LeftEntityProvenance + | Self::RightEntityProvenance => &[], + } + } + const fn is_jsonb(self) -> bool { matches!( self, @@ -153,6 +223,90 @@ impl EntityPath { } } +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct EntityPathBitSet(FiniteBitSet); + +impl EntityPathBitSet { + #[expect(clippy::cast_possible_truncation)] + #[must_use] + pub const fn new_empty() -> Self { + Self(FiniteBitSet::new_empty( + core::mem::variant_count::() as u32, + )) + } + + /// Inserts this path into `bitset` with composite swallowing. + /// + /// If an ancestor composite is already present, the insertion is a no-op (the ancestor + /// already implies this path). If this path is a composite, any children already in the + /// set are removed (the composite subsumes them). + pub(crate) fn insert(&mut self, path: EntityPath) { + for &ancestor in path.ancestors() { + if self.0.contains(ancestor) { + return; + } + } + + self.0.insert(path); + + for &child in path.children() { + self.0.remove(child); + } + } + + pub(crate) fn insert_all(&mut self) { + const HAS_ANCESTOR_COUNT: usize = { + let mut count = 0; + let mut index = 0; + let paths = EntityPath::all(); + + while index < paths.len() { + if !paths[index].ancestors().is_empty() { + count += 1; + } + + index += 1; + } + + count + }; + + const HAS_ANCESTORS: [EntityPath; HAS_ANCESTOR_COUNT] = { + let mut out = [EntityPath::Archived; HAS_ANCESTOR_COUNT]; + + let mut index = 0; + let mut ptr = 0; + let paths = EntityPath::all(); + + while ptr < paths.len() { + if !paths[ptr].ancestors().is_empty() { + out[index] = paths[ptr]; + index += 1; + } + + ptr += 1; + } + + out + }; + + self.0.insert_range(..); + + for path in HAS_ANCESTORS { + self.0.remove(path); + } + } +} + +impl const core::ops::Deref for EntityPathBitSet { + type Target = FiniteBitSet; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + #[inline] fn project(projections: &[Projection<'_>], index: &mut usize) -> Option { let projection = projections.get(*index).and_then(|projection| { diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index 7ac83174bc8..fe88abe84fa 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -44,14 +44,12 @@ impl TraversalPathBitSet { #[must_use] pub const fn empty(vertex: VertexType) -> Self { match vertex { - #[expect(clippy::cast_possible_truncation)] - VertexType::Entity => Self::Entity(EntityPathBitSet::new_empty( - core::mem::variant_count::() as u32, - )), + VertexType::Entity => Self::Entity(EntityPathBitSet::new_empty()), } } /// Returns the inner [`EntityPathBitSet`] if this is the [`Entity`](Self::Entity) variant. + #[inline] #[must_use] pub const fn as_entity(&self) -> Option<&EntityPathBitSet> { match self { @@ -61,12 +59,41 @@ impl TraversalPathBitSet { /// Returns a mutable reference to the inner [`EntityPathBitSet`] if this is the /// [`Entity`](Self::Entity) variant. + #[inline] #[must_use] pub const fn as_entity_mut(&mut self) -> Option<&mut EntityPathBitSet> { match self { Self::Entity(bitset) => Some(bitset), } } + + /// Returns `true` if no paths are set. + #[inline] + #[must_use] + pub const fn is_empty(self) -> bool { + match self { + Self::Entity(bitset) => bitset.is_empty(), + } + } + + /// Returns the number of paths set. + #[inline] + #[must_use] + pub fn len(self) -> usize { + match self { + Self::Entity(bitset) => bitset.len(), + } + } + + /// Inserts a resolved path with composite swallowing. + /// + /// If an ancestor composite is already present in the set, the insertion is a no-op. + /// If the path is a composite, any children already in the set are removed. + pub fn insert(&mut self, path: TraversalPath) { + match (self, path) { + (Self::Entity(bitset), TraversalPath::Entity(path)) => bitset.insert(path), + } + } } /// A single resolved traversal path for a specific vertex type. diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs index 57679562512..a1d2f0daa54 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs @@ -1,4 +1,4 @@ -//! Unit tests for entity projection path lookup. +//! Unit tests for entity projection path lookup and composite swallowing. use hashql_core::{symbol::sym, r#type::TypeId}; @@ -8,7 +8,7 @@ use crate::{ local::Local, place::{Projection, ProjectionKind}, }, - pass::execution::traversal::EntityPath, + pass::execution::traversal::{EntityPath, EntityPathBitSet}, }; /// Helper to create a `FieldByName` projection. @@ -236,3 +236,131 @@ fn jsonb_index_excludes_subpath() { Some((EntityPath::ProvenanceInferred, 3)) ); } + +// --- Composite swallowing tests --- + +fn empty_bitset() -> EntityPathBitSet { + EntityPathBitSet::new_empty() +} + +/// Inserting a leaf path into an empty set adds that path. +#[test] +fn insert_leaf_into_empty() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + + assert!(bitset.contains(EntityPath::WebId)); + assert!(!bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::RecordId)); +} + +/// Inserting a composite removes any children already in the set. +#[test] +fn composite_swallows_children() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::EntityUuid); + bitset.insert(EntityPath::DraftId); + + assert!(bitset.contains(EntityPath::WebId)); + assert!(bitset.contains(EntityPath::EntityUuid)); + assert!(bitset.contains(EntityPath::DraftId)); + + bitset.insert(EntityPath::EntityId); + + assert!(bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::WebId)); + assert!(!bitset.contains(EntityPath::EntityUuid)); + assert!(!bitset.contains(EntityPath::DraftId)); +} + +/// Inserting a child when its ancestor composite is already present is a no-op. +#[test] +fn child_suppressed_by_ancestor() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::RecordId); + + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::EntityId); + bitset.insert(EntityPath::EditionId); + + assert!(bitset.contains(EntityPath::RecordId)); + assert!(!bitset.contains(EntityPath::WebId)); + assert!(!bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::EditionId)); +} + +/// A grandparent composite suppresses grandchildren. +#[test] +fn grandparent_suppresses_grandchild() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::RecordId); + + // WebId is a grandchild of RecordId (through EntityId) + bitset.insert(EntityPath::WebId); + + assert!(bitset.contains(EntityPath::RecordId)); + assert!(!bitset.contains(EntityPath::WebId)); +} + +/// Inserting a top-level composite swallows the entire subtree. +#[test] +fn record_id_swallows_entire_subtree() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::EntityUuid); + bitset.insert(EntityPath::EditionId); + + bitset.insert(EntityPath::RecordId); + + assert!(bitset.contains(EntityPath::RecordId)); + assert!(!bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::WebId)); + assert!(!bitset.contains(EntityPath::EntityUuid)); + assert!(!bitset.contains(EntityPath::DraftId)); + assert!(!bitset.contains(EntityPath::EditionId)); +} + +/// `TemporalVersioning` swallows `DecisionTime` and `TransactionTime`. +#[test] +fn temporal_versioning_swallows_children() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::DecisionTime); + bitset.insert(EntityPath::TransactionTime); + + bitset.insert(EntityPath::TemporalVersioning); + + assert!(bitset.contains(EntityPath::TemporalVersioning)); + assert!(!bitset.contains(EntityPath::DecisionTime)); + assert!(!bitset.contains(EntityPath::TransactionTime)); +} + +/// Non-composite paths are unaffected by each other. +#[test] +fn independent_leaves_coexist() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::Properties); + bitset.insert(EntityPath::Archived); + bitset.insert(EntityPath::Vectors); + + assert!(bitset.contains(EntityPath::Properties)); + assert!(bitset.contains(EntityPath::Archived)); + assert!(bitset.contains(EntityPath::Vectors)); +} + +/// Inserting `EntityId` into a set with `WebId` swallows `WebId`, but unrelated paths remain. +#[test] +fn swallow_selective() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::Properties); + bitset.insert(EntityPath::DecisionTime); + + bitset.insert(EntityPath::EntityId); + + assert!(bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::WebId)); + // Unrelated paths untouched + assert!(bitset.contains(EntityPath::Properties)); + assert!(bitset.contains(EntityPath::DecisionTime)); +} From efb0b855b6917f79c8b5eac17356e3e86236c004 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 13:13:56 +0100 Subject: [PATCH 10/32] feat: checkpoint --- .../pass/analysis/dataflow/lattice/impls.rs | 26 ++- .../statement_placement/embedding/tests.rs | 20 +- .../statement_placement/interpret/tests.rs | 221 +----------------- .../statement_placement/postgres/tests.rs | 111 +++------ .../execution/statement_placement/tests.rs | 85 ++----- .../src/pass/execution/traversal/analysis.rs | 20 +- .../embedding/all_args_excluded.snap | 6 +- ...on_vectors_entity_projection_rejected.snap | 6 +- .../only_vectors_projection_supported.snap | 7 +- .../embedding/other_operations_rejected.snap | 6 +- .../storage_statements_zero_cost.snap | 7 +- .../interpret/all_statements_supported.snap | 6 +- .../storage_statements_zero_cost.snap | 6 +- .../postgres/aggregate_closure_rejected.snap | 4 - .../postgres/aggregate_tuple_supported.snap | 6 +- .../postgres/apply_rejected.snap | 4 - .../postgres/binary_unary_ops_supported.snap | 6 +- .../postgres/diamond_must_analysis.snap | 4 - .../postgres/entity_projection_column.snap | 13 +- .../postgres/entity_projection_jsonb.snap | 7 +- ...closure_field_rejected_other_accepted.snap | 4 - .../env_dict_non_string_key_rejected.snap | 4 - .../env_dict_opaque_string_key_accepted.snap | 4 - .../env_dict_string_key_accepted.snap | 4 - .../env_with_closure_type_rejected.snap | 4 - .../env_without_closure_accepted.snap | 6 +- .../postgres/eq_dict_vs_struct_rejected.snap | 4 - .../postgres/eq_list_vs_tuple_rejected.snap | 4 - .../eq_place_vs_constant_accepted.snap | 4 - .../postgres/eq_same_type_accepted.snap | 4 - .../postgres/eq_unknown_type_rejected.snap | 4 - .../postgres/fnptr_constant_rejected.snap | 4 - .../postgres/graph_read_edge_unsupported.snap | 6 +- .../postgres/input_supported.snap | 6 +- .../postgres/ne_dict_vs_struct_rejected.snap | 4 - .../serialization_unsafe_edge_propagates.snap | 4 - ...erialization_unsafe_statement_no_cost.snap | 4 - .../storage_statements_zero_cost.snap | 6 +- 38 files changed, 116 insertions(+), 535 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs index e072b375ff6..d04649781e8 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs @@ -195,19 +195,41 @@ macro_rules! impl_bitset { impl_bitset!(DenseBitSet, ChunkedBitSet, MixedBitSet); -impl HasBottom<(T, U)> for V +impl HasBottom<(T, U)> for PowersetLattice where - V: HasBottom + HasBottom, + Self: HasBottom + HasBottom, { + #[inline] fn bottom(&self) -> (T, U) { (self.bottom(), self.bottom()) } + #[inline] fn is_bottom(&self, value: &(T, U)) -> bool { self.is_bottom(&value.0) && self.is_bottom(&value.1) } } +impl JoinSemiLattice<(T, U)> for PowersetLattice +where + Self: JoinSemiLattice + JoinSemiLattice, +{ + #[inline] + fn join_owned(&self, mut lhs: (T, U), rhs: &(T, U)) -> (T, U) + where + (T, U): Sized, + { + self.join(&mut lhs.0, &rhs.0); + self.join(&mut lhs.1, &rhs.1); + lhs + } + + #[inline] + fn join(&self, lhs: &mut (T, U), rhs: &(T, U)) -> bool { + self.join(&mut lhs.0, &rhs.0) || self.join(&mut lhs.1, &rhs.1) + } +} + impl MeetSemiLattice for Reverse where U: JoinSemiLattice, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs index df2105b7b25..e533541eae2 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs @@ -45,8 +45,7 @@ fn only_vectors_projection_supported() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "only_vectors_projection_supported", @@ -54,7 +53,6 @@ fn only_vectors_projection_supported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -88,8 +86,7 @@ fn all_args_excluded() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "all_args_excluded", @@ -97,7 +94,6 @@ fn all_args_excluded() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -129,8 +125,7 @@ fn non_vectors_entity_projection_rejected() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "non_vectors_entity_projection_rejected", @@ -138,7 +133,6 @@ fn non_vectors_entity_projection_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -172,8 +166,7 @@ fn storage_statements_zero_cost() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "storage_statements_zero_cost", @@ -181,7 +174,6 @@ fn storage_statements_zero_cost() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -227,8 +219,7 @@ fn other_operations_rejected() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "other_operations_rejected", @@ -236,6 +227,5 @@ fn other_operations_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs index 43fc247081b..06cfdf71c0b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs @@ -1,8 +1,6 @@ //! Tests for [`InterpreterStatementPlacement`]. #![expect(clippy::min_ident_chars)] -use alloc::alloc::Global; - use hashql_core::{heap::Heap, symbol::sym, r#type::environment::Environment}; use hashql_diagnostics::DiagnosticIssues; @@ -11,17 +9,9 @@ use crate::{ context::MirContext, def::DefId, intern::Interner, - pass::{ - Changed, TransformPass as _, - execution::{ - cost::TraversalCostVec, - statement_placement::{ - InterpreterStatementPlacement, StatementPlacement as _, - tests::{assert_placement, run_placement}, - }, - target::{TargetArray, TargetId}, - }, - transform::TraversalExtraction, + pass::execution::statement_placement::{ + InterpreterStatementPlacement, StatementPlacement as _, + tests::{assert_placement, run_placement}, }, }; @@ -68,11 +58,8 @@ fn all_statements_supported() { diagnostics: DiagnosticIssues::new(), }; - let traversal_costs = TargetArray::from_fn(|_| None); - let mut placement: InterpreterStatementPlacement<'_, Global> = - InterpreterStatementPlacement::new(&traversal_costs); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "all_statements_supported", @@ -80,7 +67,6 @@ fn all_statements_supported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -116,11 +102,8 @@ fn storage_statements_zero_cost() { diagnostics: DiagnosticIssues::new(), }; - let traversal_costs = TargetArray::from_fn(|_| None); - let mut placement: InterpreterStatementPlacement<'_, Global> = - InterpreterStatementPlacement::new(&traversal_costs); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "storage_statements_zero_cost", @@ -128,195 +111,5 @@ fn storage_statements_zero_cost() { &body, &context, &statement_costs, - &traversal_costs, - ); -} - -/// Traversal locals receive the backend cost added to the base interpreter cost. -/// -/// When Postgres assigns a traversal cost of 4, the interpreter adds it to the base cost (8) -/// via `saturating_add`, yielding 12 for the traversal assignment. Non-traversal assignments -/// remain at the base cost. -#[test] -fn traversal_single_backend_cost() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - let mut body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: [Opaque sym::path::Entity; ?], result: Bool; - @proj metadata = vertex.metadata: ?, archived = metadata.archived: Bool; - - bb0() { - result = un.! archived; - return result; - } - }); - - let mut context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let mut extraction = TraversalExtraction::new_in(Global); - let _: Changed = extraction.run(&mut context, &mut body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - - let mut postgres_costs = TraversalCostVec::new_in(&body, &traversals, &heap); - for local in body.local_decls.ids() { - if traversals.contains(local) { - postgres_costs.insert(local, cost!(4)); - } - } - - let mut traversal_costs: TargetArray>> = - TargetArray::from_fn(|_| None); - traversal_costs[TargetId::Postgres] = Some(postgres_costs); - - let mut interpreter = InterpreterStatementPlacement::new(&traversal_costs); - let (traversal_cost_out, statement_costs) = - interpreter.statement_placement_in(&context, &body, &traversals, &heap); - - assert_placement( - "traversal_single_backend_cost", - "interpret", - &body, - &context, - &statement_costs, - &traversal_cost_out, - ); -} - -/// The interpreter picks the maximum traversal cost across all backends. -/// -/// With Postgres assigning cost 4 and Embedding assigning cost 6 to different traversal -/// locals, the interpreter adds the per-local maximum to its base cost. Each traversal -/// assignment reflects the worst-case backend cost for that specific local. -#[test] -fn traversal_worst_case_multiple_backends() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - let mut body = body!(interner, env; [graph::read::filter]@0/2 -> ? { - decl env: (), vertex: [Opaque sym::path::Entity; ?], archived: Bool, vectors: ?; - @proj metadata = vertex.metadata: ?, archived_proj = metadata.archived: Bool, - encodings = vertex.encodings: ?, vectors_proj = encodings.vectors: ?; - - bb0() { - archived = load archived_proj; - vectors = load vectors_proj; - return vectors; - } - }); - - let mut context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let mut extraction = TraversalExtraction::new_in(Global); - let _: Changed = extraction.run(&mut context, &mut body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - - // Assign different costs per backend per local so the interpreter picks the max for each. - // First traversal local gets Postgres cost 4, second gets Embedding cost 6. - let mut postgres_costs = TraversalCostVec::new_in(&body, &traversals, &heap); - let mut embedding_costs = TraversalCostVec::new_in(&body, &traversals, &heap); - let traversal_locals: Vec<_> = body - .local_decls - .ids() - .filter(|local| traversals.contains(*local)) - .collect(); - postgres_costs.insert(traversal_locals[0], cost!(4)); - embedding_costs.insert(traversal_locals[1], cost!(6)); - - let mut traversal_costs: TargetArray>> = - TargetArray::from_fn(|_| None); - traversal_costs[TargetId::Postgres] = Some(postgres_costs); - traversal_costs[TargetId::Embedding] = Some(embedding_costs); - - let mut interpreter = InterpreterStatementPlacement::new(&traversal_costs); - let (traversal_cost_out, statement_costs) = - interpreter.statement_placement_in(&context, &body, &traversals, &heap); - - assert_placement( - "traversal_worst_case_multiple_backends", - "interpret", - &body, - &context, - &statement_costs, - &traversal_cost_out, - ); -} - -/// Non-traversal assignments are unaffected by traversal costs. -/// -/// Even when traversal costs are present for entity projection locals, assignments to -/// non-traversal locals (like arithmetic results) retain the base interpreter cost of 8. -#[test] -fn non_traversal_unaffected_by_costs() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - let mut body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: [Opaque sym::path::Entity; ?], - archived: Bool, x: Int, y: Int, sum: Int, result: Bool; - @proj metadata = vertex.metadata: ?, archived_proj = metadata.archived: Bool; - - bb0() { - archived = load archived_proj; - x = load 10; - y = load 20; - sum = bin.+ x y; - result = bin.> sum 15; - return result; - } - }); - - let mut context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let mut extraction = TraversalExtraction::new_in(Global); - let _: Changed = extraction.run(&mut context, &mut body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - - let mut postgres_costs = TraversalCostVec::new_in(&body, &traversals, &heap); - for local in body.local_decls.ids() { - if traversals.contains(local) { - postgres_costs.insert(local, cost!(4)); - } - } - - let mut traversal_costs: TargetArray>> = - TargetArray::from_fn(|_| None); - traversal_costs[TargetId::Postgres] = Some(postgres_costs); - - let mut interpreter = InterpreterStatementPlacement::new(&traversal_costs); - let (traversal_cost_out, statement_costs) = - interpreter.statement_placement_in(&context, &body, &traversals, &heap); - - assert_placement( - "non_traversal_unaffected_by_costs", - "interpret", - &body, - &context, - &statement_costs, - &traversal_cost_out, ); } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs index b0a982458eb..a574d727efa 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs @@ -26,12 +26,13 @@ use crate::{ def::DefId, intern::Interner, op, - pass::{ - execution::statement_placement::{ + pass::execution::{ + VertexType, + statement_placement::{ PostgresStatementPlacement, StatementPlacement as _, tests::{assert_placement, run_placement}, }, - transform::Traversals, + traversal::Traversals, }, }; @@ -66,8 +67,7 @@ fn binary_unary_ops_supported() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "binary_unary_ops_supported", @@ -75,7 +75,6 @@ fn binary_unary_ops_supported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -108,8 +107,7 @@ fn aggregate_tuple_supported() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "aggregate_tuple_supported", @@ -117,7 +115,6 @@ fn aggregate_tuple_supported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -153,8 +150,7 @@ fn aggregate_closure_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "aggregate_closure_rejected", @@ -162,7 +158,6 @@ fn aggregate_closure_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -201,8 +196,7 @@ fn apply_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "apply_rejected", @@ -210,7 +204,6 @@ fn apply_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -242,8 +235,7 @@ fn input_supported() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "input_supported", @@ -251,7 +243,6 @@ fn input_supported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -285,8 +276,7 @@ fn env_with_closure_type_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "env_with_closure_type_rejected", @@ -294,7 +284,6 @@ fn env_with_closure_type_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -327,8 +316,7 @@ fn env_without_closure_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "env_without_closure_accepted", @@ -336,7 +324,6 @@ fn env_without_closure_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -368,8 +355,7 @@ fn entity_projection_column() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "entity_projection_column", @@ -377,7 +363,6 @@ fn entity_projection_column() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -410,8 +395,7 @@ fn entity_projection_jsonb() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "entity_projection_jsonb", @@ -419,7 +403,6 @@ fn entity_projection_jsonb() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -456,8 +439,7 @@ fn storage_statements_zero_cost() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "storage_statements_zero_cost", @@ -465,7 +447,6 @@ fn storage_statements_zero_cost() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -520,8 +501,7 @@ fn diamond_must_analysis() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "diamond_must_analysis", @@ -529,7 +509,6 @@ fn diamond_must_analysis() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -554,7 +533,7 @@ fn graph_read_edge_unsupported() { let mut builder = BodyBuilder::new(&interner); let _env_local = builder.local("env", unit_ty); - let vertex = builder.local("vertex", entity_ty); + let _vertex = builder.local("vertex", entity_ty); let axis = builder.local("axis", int_ty); let graph_result = builder.local("graph_result", int_ty); let local_val = builder.local("local_val", int_ty); @@ -596,11 +575,10 @@ fn graph_read_edge_unsupported() { diagnostics: DiagnosticIssues::new(), }; - let traversals = Traversals::with_capacity_in(vertex.local, body.local_decls.len(), &heap); + let traversals = Traversals::new_in(&body.basic_blocks, VertexType::Entity, &heap); let mut placement = PostgresStatementPlacement::new_in(Global); - let (traversal_costs, statement_costs) = - placement.statement_placement_in(&context, &body, &traversals, &heap); + let statement_costs = placement.statement_placement_in(&context, &body, &traversals, &heap); assert_placement( "graph_read_edge_unsupported", @@ -608,7 +586,6 @@ fn graph_read_edge_unsupported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -642,8 +619,7 @@ fn env_closure_field_rejected_other_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "env_closure_field_rejected_other_accepted", @@ -651,7 +627,6 @@ fn env_closure_field_rejected_other_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -684,8 +659,7 @@ fn env_dict_non_string_key_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "env_dict_non_string_key_rejected", @@ -693,7 +667,6 @@ fn env_dict_non_string_key_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -725,8 +698,7 @@ fn env_dict_string_key_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "env_dict_string_key_accepted", @@ -734,7 +706,6 @@ fn env_dict_string_key_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -770,8 +741,7 @@ fn env_dict_opaque_string_key_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "env_dict_opaque_string_key_accepted", @@ -779,7 +749,6 @@ fn env_dict_opaque_string_key_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -841,8 +810,7 @@ fn fnptr_constant_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "fnptr_constant_rejected", @@ -850,7 +818,6 @@ fn fnptr_constant_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -888,8 +855,7 @@ fn eq_dict_vs_struct_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "eq_dict_vs_struct_rejected", @@ -897,7 +863,6 @@ fn eq_dict_vs_struct_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -935,8 +900,7 @@ fn eq_list_vs_tuple_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "eq_list_vs_tuple_rejected", @@ -944,7 +908,6 @@ fn eq_list_vs_tuple_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -982,8 +945,7 @@ fn eq_unknown_type_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "eq_unknown_type_rejected", @@ -991,7 +953,6 @@ fn eq_unknown_type_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -1029,8 +990,7 @@ fn eq_same_type_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "eq_same_type_accepted", @@ -1038,7 +998,6 @@ fn eq_same_type_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -1076,8 +1035,7 @@ fn ne_dict_vs_struct_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "ne_dict_vs_struct_rejected", @@ -1085,7 +1043,6 @@ fn ne_dict_vs_struct_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -1652,8 +1609,7 @@ fn eq_place_vs_constant_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "eq_place_vs_constant_accepted", @@ -1661,7 +1617,6 @@ fn eq_place_vs_constant_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -2189,8 +2144,7 @@ fn serialization_unsafe_statement_no_cost() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "serialization_unsafe_statement_no_cost", @@ -2198,7 +2152,6 @@ fn serialization_unsafe_statement_no_cost() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -2245,8 +2198,7 @@ fn serialization_unsafe_edge_propagates() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&mut context, &mut placement, body); assert_placement( "serialization_unsafe_edge_propagates", @@ -2254,6 +2206,5 @@ fn serialization_unsafe_edge_propagates() { &body, &context, &statement_costs, - &traversal_costs, ); } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs index 542d7bf494b..29fc68f89f7 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs @@ -3,7 +3,7 @@ use alloc::alloc::Global; use core::{alloc::Allocator, fmt::Display}; -use std::{io::Write as _, path::PathBuf}; +use std::path::PathBuf; use hashql_core::{ heap::Heap, @@ -15,21 +15,17 @@ use insta::{Settings, assert_snapshot}; use super::StatementPlacement; use crate::{ - body::{Body, local::Local, location::Location, statement::Statement}, + body::{Body, location::Location, statement::Statement}, builder::body, context::MirContext, intern::Interner, - pass::{ - Changed, TransformPass as _, - execution::{ - cost::{StatementCostVec, TraversalCostVec}, - statement_placement::{ - EmbeddingStatementPlacement, InterpreterStatementPlacement, - PostgresStatementPlacement, - }, - target::TargetArray, + pass::execution::{ + VertexType, + cost::StatementCostVec, + statement_placement::{ + EmbeddingStatementPlacement, InterpreterStatementPlacement, PostgresStatementPlacement, }, - transform::{TraversalExtraction, Traversals}, + traversal::{TraversalAnalysis, Traversals}, }, pretty::{TextFormatAnnotations, TextFormatOptions}, }; @@ -56,17 +52,6 @@ impl TextFormatAnnotations for CostAnnotations<'_, A> { } } -/// Formats traversal costs as a summary section. -fn format_traversals(traversal_costs: &TraversalCostVec) -> impl Display { - core::fmt::from_fn(move |f| { - writeln!(f, "Traversals:")?; - for (local, cost) in traversal_costs { - writeln!(f, " {local}: {cost}")?; - } - Ok(()) - }) -} - /// Runs statement placement analysis and asserts the result matches a snapshot. #[track_caller] pub(crate) fn assert_placement<'heap, A: Allocator>( @@ -75,7 +60,6 @@ pub(crate) fn assert_placement<'heap, A: Allocator>( body: &Body<'heap>, context: &MirContext<'_, 'heap>, statement_costs: &StatementCostVec, - traversal_costs: &TraversalCostVec, ) { let formatter = Formatter::new(context.heap); let type_formatter = TypeFormatter::new(&formatter, context.env, TypeFormatterOptions::terse()); @@ -95,16 +79,6 @@ pub(crate) fn assert_placement<'heap, A: Allocator>( text_format.format_body(body).expect("formatting failed"); - write!( - text_format.writer, - "\n\n{:=^50}\n\n", - format!(" Traversals ") - ) - .expect("infallible"); - - write!(text_format.writer, "{}", format_traversals(traversal_costs)) - .expect("formatting failed"); - // Snapshot configuration let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let mut settings = Settings::clone_current(); @@ -121,29 +95,19 @@ pub(crate) fn assert_placement<'heap, A: Allocator>( /// Helper to set up a test context and run placement analysis. /// -/// Returns the body, context components, and cost vectors for assertion. +/// Returns the body and statement cost vector for assertion. #[track_caller] pub(crate) fn run_placement<'heap>( context: &mut MirContext<'_, 'heap>, placement: &mut impl StatementPlacement<'heap, &'heap Heap>, - mut body: Body<'heap>, -) -> ( - Body<'heap>, - StatementCostVec<&'heap Heap>, - TraversalCostVec<&'heap Heap>, -) { - // Run TraversalExtraction to produce Traversals - let mut extraction = TraversalExtraction::new_in(Global); - let _: Changed = extraction.run(context, &mut body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - - // Run placement analysis - let (traversal_costs, statement_costs) = + body: Body<'heap>, +) -> (Body<'heap>, StatementCostVec<&'heap Heap>) { + let traversals = TraversalAnalysis::traversal_analysis_in(context, &body, context.heap); + + let statement_costs = placement.statement_placement_in(context, &body, &traversals, context.heap); - (body, statement_costs, traversal_costs) + (body, statement_costs) } // ============================================================================= @@ -180,27 +144,18 @@ fn non_graph_read_filter_returns_empty() { diagnostics: DiagnosticIssues::new(), }; - let traversals = Traversals::with_capacity_in(Local::new(1), body.local_decls.len(), &heap); - - let traversal_costs = TargetArray::from_fn(|_| None); + let traversals = Traversals::new_in(&body.basic_blocks, VertexType::Entity, &heap); let mut postgres = PostgresStatementPlacement::new_in(Global); - let mut interpreter = InterpreterStatementPlacement::::new(&traversal_costs); + let mut interpreter = InterpreterStatementPlacement::new(); let mut embedding = EmbeddingStatementPlacement::new_in(Global); - let (postgres_traversal, postgres_statement) = - postgres.statement_placement_in(&context, &body, &traversals, &heap); - let (interpreter_traversal, interpreter_statement) = + let postgres_statement = postgres.statement_placement_in(&context, &body, &traversals, &heap); + let interpreter_statement = interpreter.statement_placement_in(&context, &body, &traversals, &heap); - let (embedding_traversal, embedding_statement) = - embedding.statement_placement_in(&context, &body, &traversals, &heap); + let embedding_statement = embedding.statement_placement_in(&context, &body, &traversals, &heap); - assert_eq!(postgres_traversal.iter().count(), 0); assert!(postgres_statement.all_unassigned()); - - assert_eq!(interpreter_traversal.iter().count(), 0); assert!(interpreter_statement.all_unassigned()); - - assert_eq!(embedding_traversal.iter().count(), 0); assert!(embedding_statement.all_unassigned()); } diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs index 8a405676ddd..29b3968cfd6 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs @@ -19,23 +19,26 @@ use crate::{ }, visit::{self, Visitor}, }; -/// Per-statement resolved traversal paths for a graph read filter body. +/// Per-location resolved traversal paths for a graph read filter body. /// -/// Stores a [`TraversalPathBitSet`] for every statement position, recording which vertex -/// fields each statement accesses. Indexed by [`Location`] (1-based statement index). +/// Stores a [`TraversalPathBitSet`] for every statement and terminator position, recording +/// which vertex fields each location accesses. Indexed by [`Location`] (1-based statement +/// index, with the terminator at `statements.len() + 1`). pub(crate) struct Traversals { inner: BlockPartitionedVec, } impl Traversals { - /// Creates a traversal map with space for all statements in the given blocks. + /// Creates a traversal map with space for all statements and terminators in the given blocks. /// /// All positions are initialized to an empty bitset for the given vertex type. #[expect(clippy::cast_possible_truncation)] pub(crate) fn new_in(blocks: &BasicBlocks, vertex: VertexType, alloc: A) -> Self { Self { inner: BlockPartitionedVec::new( - blocks.iter().map(|block| block.statements.len() as u32), + blocks + .iter() + .map(|block| (block.statements.len() + 1) as u32), TraversalPathBitSet::empty(vertex), alloc, ), @@ -72,8 +75,11 @@ impl Traversals { where A: Clone, { - self.inner - .remap(blocks.iter().map(|block| block.statements.len() as u32)); + self.inner.remap( + blocks + .iter() + .map(|block| (block.statements.len() + 1) as u32), + ); } } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/all_args_excluded.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/all_args_excluded.snap index 283ab6f3e9f..7ce1d6b9bbe 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/all_args_excluded.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/all_args_excluded.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/non_vectors_entity_projection_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/non_vectors_entity_projection_rejected.snap index 981a34bc5e7..cf47eefe2f7 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/non_vectors_entity_projection_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/non_vectors_entity_projection_rejected.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { @@ -11,7 +11,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %2 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/only_vectors_projection_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/only_vectors_projection_supported.snap index b9e86c3d371..d27a7c6e3c9 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/only_vectors_projection_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/only_vectors_projection_supported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { @@ -11,8 +11,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { return %2 } } - -=================== Traversals =================== - -Traversals: - %2: 4 diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap index 14d5b00838e..bcfb665f330 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { @@ -29,7 +29,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %11 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/storage_statements_zero_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/storage_statements_zero_cost.snap index eef17c67fe6..44a6d7cc273 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/storage_statements_zero_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/storage_statements_zero_cost.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { @@ -13,8 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { return %2 } } - -=================== Traversals =================== - -Traversals: - %2: 4 diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap index 07b1dd10829..49428694348 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { @@ -31,7 +31,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %12 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/storage_statements_zero_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/storage_statements_zero_cost.snap index 826771d4f33..0086a9abef8 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/storage_statements_zero_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/storage_statements_zero_cost.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { @@ -19,7 +19,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap index e243762048b..2f9c56244ef 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap index 16347008ee5..5fbf3d9d5c2 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/apply_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/apply_rejected.snap index b87811583fe..558972bc77f 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/apply_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/apply_rejected.snap @@ -17,7 +17,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %5 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/binary_unary_ops_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/binary_unary_ops_supported.snap index b0ad1ca6f4f..3bff03d7cf2 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/binary_unary_ops_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/binary_unary_ops_supported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { @@ -19,7 +19,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %6 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap index c935df92984..4d5a3e85593 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap @@ -37,7 +37,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %7 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_column.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_column.snap index 1a61f1151e5..a18c0b1b663 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_column.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_column.snap @@ -1,18 +1,9 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { - let %2: Boolean - bb0(): { - %2 = %1.metadata.archived // cost: 4 - - return %2 + return %1.metadata.archived } } - -=================== Traversals =================== - -Traversals: - %2: 4 diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_jsonb.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_jsonb.snap index 84f716750c5..d49c49e03cd 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_jsonb.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_jsonb.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { @@ -11,8 +11,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { return %2 } } - -=================== Traversals =================== - -Traversals: - %2: 4 diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_closure_field_rejected_other_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_closure_field_rejected_other_accepted.snap index 2da09ce226a..c60090e59c5 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_closure_field_rejected_other_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_closure_field_rejected_other_accepted.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer, (Integer) -> Integer), %1: Ent return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap index ca890d60d98..7d7b5573690 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) - return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap index e3e55a90c61..e43510fd97a 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) -> return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap index 79bea1693f6..21b8df95642 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) -> return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_with_closure_type_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_with_closure_type_rejected.snap index 733eef254e8..b8a99b82d85 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_with_closure_type_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_with_closure_type_rejected.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer, (Integer) -> Integer), %1: Ent return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_without_closure_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_without_closure_accepted.snap index c21e2e65bbd..250a2822c48 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_without_closure_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_without_closure_accepted.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (Integer, Boolean), %1: Entity) -> Boolean { @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer, Boolean), %1: Entity) -> Boole return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_dict_vs_struct_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_dict_vs_struct_rejected.snap index bff13820ad3..eafdd800313 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_dict_vs_struct_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_dict_vs_struct_rejected.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict, (a: Integer)), % return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_list_vs_tuple_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_list_vs_tuple_rejected.snap index 0a227af5e40..e2e5d75207f 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_list_vs_tuple_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_list_vs_tuple_rejected.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (List, (Integer, Integer)), %1: return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_place_vs_constant_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_place_vs_constant_accepted.snap index 79277ce302a..d2479908f24 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_place_vs_constant_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_place_vs_constant_accepted.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) -> return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_same_type_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_same_type_accepted.snap index f60814f8d25..1cf9b7ba0b5 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_same_type_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_same_type_accepted.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer, Integer), %1: Entity) -> Boole return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_unknown_type_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_unknown_type_rejected.snap index bbe64b63f3b..0e1ed8a9f05 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_unknown_type_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_unknown_type_rejected.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer, ?), %1: Entity) -> Boolean { return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap index b7e3d9457b7..ca5104a18ab 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/graph_read_edge_unsupported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/graph_read_edge_unsupported.snap index f54367c9c82..ea8c6d79a91 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/graph_read_edge_unsupported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/graph_read_edge_unsupported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { @@ -24,7 +24,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %6 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/input_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/input_supported.snap index afdd06acdc4..6494ccad9ef 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/input_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/input_supported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/ne_dict_vs_struct_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/ne_dict_vs_struct_rejected.snap index 3c69071337c..be924a3f22a 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/ne_dict_vs_struct_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/ne_dict_vs_struct_rejected.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict, (a: Integer)), % return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_edge_propagates.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_edge_propagates.snap index aaa1cac6f56..1c479bb79cc 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_edge_propagates.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_edge_propagates.snap @@ -22,7 +22,3 @@ fn {graph::read::filter@4294967040}(%0: (Uuid | String, Integer), %1: Entity) -> return %5 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_statement_no_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_statement_no_cost.snap index 2ddf52236fa..faab5b071f2 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_statement_no_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_statement_no_cost.snap @@ -17,7 +17,3 @@ fn {graph::read::filter@4294967040}(%0: (Uuid | String, Integer), %1: Entity) -> return %5 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/storage_statements_zero_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/storage_statements_zero_cost.snap index 1eb59985945..4773d151019 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/storage_statements_zero_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/storage_statements_zero_cost.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { @@ -19,7 +19,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { return %4 } } - -=================== Traversals =================== - -Traversals: From 8e95f62b77275e8c78a27eff70c9d95faa9600f1 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 13:37:51 +0100 Subject: [PATCH 11/32] feat: checkpoint --- .../hashql/core/src/id/bit_vec/finite.rs | 44 +- .../mir/src/pass/execution/splitting/mod.rs | 3 +- .../statement_placement/embedding/tests.rs | 8 +- .../statement_placement/interpret/tests.rs | 171 ++++++- .../statement_placement/postgres/tests.rs | 48 +- .../execution/statement_placement/tests.rs | 2 +- .../execution/terminator_placement/tests.rs | 2 +- .../{analysis.rs => analysis/mod.rs} | 3 + .../execution/traversal/analysis/tests.rs | 463 ++++++++++++++++++ .../src/pass/execution/traversal/entity.rs | 3 +- .../mir/src/pass/execution/traversal/tests.rs | 98 +++- .../non_traversal_unaffected_by_costs.snap | 20 +- .../traversal_multiple_paths_cost.snap | 15 + ...t.snap => traversal_single_path_cost.snap} | 10 +- .../traversal_swallowing_reduces_cost.snap | 13 + ...raversal_worst_case_multiple_backends.snap | 19 - 16 files changed, 825 insertions(+), 97 deletions(-) rename libs/@local/hashql/mir/src/pass/execution/traversal/{analysis.rs => analysis/mod.rs} (99%) create mode 100644 libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs create mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap rename libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/{traversal_single_backend_cost.snap => traversal_single_path_cost.snap} (58%) create mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_worst_case_multiple_backends.snap diff --git a/libs/@local/hashql/core/src/id/bit_vec/finite.rs b/libs/@local/hashql/core/src/id/bit_vec/finite.rs index 2490383981b..cd4694a9811 100644 --- a/libs/@local/hashql/core/src/id/bit_vec/finite.rs +++ b/libs/@local/hashql/core/src/id/bit_vec/finite.rs @@ -221,8 +221,8 @@ impl FiniteBitSet { /// /// Panics if the range end exceeds the capacity of the underlying integral type. #[inline] - pub fn insert_range(&mut self, bounds: impl RangeBounds) { - let Some((start, end)) = inclusive_start_end(bounds, T::MAX_DOMAIN_SIZE as usize) else { + pub fn insert_range(&mut self, bounds: impl RangeBounds, domain_size: usize) { + let Some((start, end)) = inclusive_start_end(bounds, domain_size) else { return; }; @@ -416,7 +416,7 @@ mod tests { #[test] fn remove() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); set.remove(TestId::from_usize(0)); set.remove(TestId::from_usize(7)); @@ -442,7 +442,7 @@ mod tests { fn insert_range_basic() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(2)..TestId::from_usize(5)); + set.insert_range(TestId::from_usize(2)..TestId::from_usize(5), 8); assert!(!set.contains(TestId::from_usize(0))); assert!(!set.contains(TestId::from_usize(1))); @@ -457,7 +457,7 @@ mod tests { fn insert_range_inclusive() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(2)..=TestId::from_usize(5)); + set.insert_range(TestId::from_usize(2)..=TestId::from_usize(5), 8); assert!(set.contains(TestId::from_usize(2))); assert!(set.contains(TestId::from_usize(5))); @@ -468,7 +468,7 @@ mod tests { fn insert_range_full() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); assert_eq!(set.len(), 8); assert_eq!(set.into_inner(), u8::MAX); @@ -478,7 +478,7 @@ mod tests { fn insert_range_empty() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(5)..TestId::from_usize(5)); + set.insert_range(TestId::from_usize(5)..TestId::from_usize(5), 8); assert!(set.is_empty()); } @@ -516,7 +516,7 @@ mod tests { #[test] fn clear() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); set.clear(); assert!(set.is_empty()); } @@ -524,7 +524,7 @@ mod tests { #[test] fn contains_out_of_bounds_returns_false() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); assert!(!set.contains(TestId::from_usize(100))); } @@ -538,19 +538,19 @@ mod tests { #[test] fn different_integral_types() { let mut set8: FiniteBitSet = FiniteBitSet::new_empty(8); - set8.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + set8.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); assert_eq!(set8.len(), 8); let mut set16: FiniteBitSet = FiniteBitSet::new_empty(16); - set16.insert_range(TestId::from_usize(0)..=TestId::from_usize(15)); + set16.insert_range(TestId::from_usize(0)..=TestId::from_usize(15), 16); assert_eq!(set16.len(), 16); let mut set64: FiniteBitSet = FiniteBitSet::new_empty(64); - set64.insert_range(TestId::from_usize(0)..=TestId::from_usize(63)); + set64.insert_range(TestId::from_usize(0)..=TestId::from_usize(63), 64); assert_eq!(set64.len(), 64); let mut set128: FiniteBitSet = FiniteBitSet::new_empty(128); - set128.insert_range(TestId::from_usize(0)..=TestId::from_usize(127)); + set128.insert_range(TestId::from_usize(0)..=TestId::from_usize(127), 128); assert_eq!(set128.len(), 128); } @@ -561,7 +561,7 @@ mod tests { for start in 0..bits.min(8) { for end in start..bits.min(16) { let mut set: FiniteBitSet = FiniteBitSet::new_empty(64); - set.insert_range(TestId::from_u32(start)..=TestId::from_u32(end.min(63))); + set.insert_range(TestId::from_u32(start)..=TestId::from_u32(end.min(63)), 64); for i in 0..64 { let expected = i >= start && i <= end.min(63); @@ -611,7 +611,7 @@ mod tests { #[test] fn subtract_removes_bits() { let mut a: FiniteBitSet = FiniteBitSet::new_empty(8); - a.insert_range(TestId::from_usize(0)..=TestId::from_usize(4)); + a.insert_range(TestId::from_usize(0)..=TestId::from_usize(4), 8); let mut b: FiniteBitSet = FiniteBitSet::new_empty(8); b.insert(TestId::from_usize(1)); @@ -632,10 +632,10 @@ mod tests { #[test] fn subtract_disjoint_sets() { let mut a: FiniteBitSet = FiniteBitSet::new_empty(8); - a.insert_range(TestId::from_usize(0)..=TestId::from_usize(3)); + a.insert_range(TestId::from_usize(0)..=TestId::from_usize(3), 8); let mut b: FiniteBitSet = FiniteBitSet::new_empty(8); - b.insert_range(TestId::from_usize(4)..=TestId::from_usize(7)); + b.insert_range(TestId::from_usize(4)..=TestId::from_usize(7), 8); assert!(!a.subtract(&b)); assert_eq!(a.len(), 4); @@ -644,10 +644,10 @@ mod tests { #[test] fn intersect_keeps_common_bits() { let mut a: FiniteBitSet = FiniteBitSet::new_empty(8); - a.insert_range(TestId::from_usize(0)..=TestId::from_usize(4)); + a.insert_range(TestId::from_usize(0)..=TestId::from_usize(4), 8); let mut b: FiniteBitSet = FiniteBitSet::new_empty(8); - b.insert_range(TestId::from_usize(2)..=TestId::from_usize(6)); + b.insert_range(TestId::from_usize(2)..=TestId::from_usize(6), 8); assert!(a.intersect(&b)); assert!(!a.contains(TestId::from_usize(0))); @@ -666,10 +666,10 @@ mod tests { #[test] fn intersect_disjoint_sets() { let mut a: FiniteBitSet = FiniteBitSet::new_empty(8); - a.insert_range(TestId::from_usize(0)..=TestId::from_usize(3)); + a.insert_range(TestId::from_usize(0)..=TestId::from_usize(3), 8); let mut b: FiniteBitSet = FiniteBitSet::new_empty(8); - b.insert_range(TestId::from_usize(4)..=TestId::from_usize(7)); + b.insert_range(TestId::from_usize(4)..=TestId::from_usize(7), 8); assert!(a.intersect(&b)); assert!(a.is_empty()); @@ -678,7 +678,7 @@ mod tests { #[test] fn intersect_with_empty() { let mut a: FiniteBitSet = FiniteBitSet::new_empty(8); - a.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + a.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); let b: FiniteBitSet = FiniteBitSet::new_empty(8); diff --git a/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs b/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs index ca3f35b5ccc..8ba21bdb58a 100644 --- a/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs @@ -185,7 +185,8 @@ fn offset_basic_blocks<'heap, A: Allocator, S: Allocator + Clone>( // Unlike other regions, these may be empty. Mark empty blocks as supported everywhere. if costs[TargetId::Interpreter].is_empty() { - targets[start_id].insert_range(TargetId::MIN..=TargetId::MAX); + targets[start_id] + .insert_range(TargetId::MIN..=TargetId::MAX, TargetId::VARIANT_COUNT); } else { targets[start_id] = supported(&costs, 0); } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs index e533541eae2..93278807e17 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs @@ -45,7 +45,7 @@ fn only_vectors_projection_supported() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "only_vectors_projection_supported", @@ -125,7 +125,7 @@ fn non_vectors_entity_projection_rejected() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "non_vectors_entity_projection_rejected", @@ -166,7 +166,7 @@ fn storage_statements_zero_cost() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "storage_statements_zero_cost", @@ -219,7 +219,7 @@ fn other_operations_rejected() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "other_operations_rejected", diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs index 06cfdf71c0b..c0b677d1a55 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs @@ -59,7 +59,7 @@ fn all_statements_supported() { }; let mut placement = InterpreterStatementPlacement::new(); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "all_statements_supported", @@ -70,6 +70,173 @@ fn all_statements_supported() { ); } +/// A single vertex projection yields cost 12 (base 8 + overhead 4 × 1 path). +/// +/// Tests that `path_count` from `TraversalAnalysis` feeds into the interpreter cost +/// formula. A constant load at the same location has cost 8 (zero paths). +#[test] +fn traversal_single_path_cost() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?], archived: Bool, result: Bool; + @proj metadata = vertex.metadata: ?, archived_proj = metadata.archived: Bool; + + bb0() { + archived = load archived_proj; + result = un.! archived; + return result; + } + }); + + let mut context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); + + assert_placement( + "traversal_single_path_cost", + "interpret", + &body, + &context, + &statement_costs, + ); +} + +/// Two vertex projections in a single statement yield cost 16 (base 8 + overhead 4 × 2 paths). +/// +/// A tuple referencing both `_1.properties` and `_1.metadata.archived` has `path_count = 2`. +#[test] +fn traversal_multiple_paths_cost() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, Bool) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + single: ?, both: (?, Bool); + @proj properties = vertex.properties: ?, + metadata = vertex.metadata: ?, + archived = metadata.archived: Bool; + + bb0() { + single = load properties; + both = tuple properties, archived; + return both; + } + }); + + let mut context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); + + assert_placement( + "traversal_multiple_paths_cost", + "interpret", + &body, + &context, + &statement_costs, + ); +} + +/// Composite swallowing reduces `path_count` and therefore interpreter cost. +/// +/// A tuple referencing `_1.metadata.record_id.entity_id.web_id` and +/// `_1.metadata.record_id`: `RecordId` swallows `WebId`, so `path_count = 1` +/// and cost = 12, not 16. +#[test] +fn traversal_swallowing_reduces_cost() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, ?) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, ?); + @proj metadata = vertex.metadata: ?, + record_id = metadata.record_id: ?, + entity_id = record_id.entity_id: ?, + web_id = entity_id.web_id: ?; + + bb0() { + result = tuple web_id, record_id; + return result; + } + }); + + let mut context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); + + assert_placement( + "traversal_swallowing_reduces_cost", + "interpret", + &body, + &context, + &statement_costs, + ); +} + +/// Statements without vertex access are unaffected by traversal costing. +/// +/// A body with vertex projections in one statement and pure constants in another. +/// The constant-only statement still gets base cost 8 (`path_count = 0`). +#[test] +fn non_traversal_unaffected_by_costs() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + props: ?, x: Int, result: Bool; + @proj properties = vertex.properties: ?; + + bb0() { + props = load properties; + x = load 42; + result = bin.> x 10; + return result; + } + }); + + let mut context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); + + assert_placement( + "non_traversal_unaffected_by_costs", + "interpret", + &body, + &context, + &statement_costs, + ); +} + /// `StorageLive`/`StorageDead`/`Nop` get `cost!(0)`, assignments get `cost!(8)`. /// /// Tests the cost differentiation: storage management statements have zero cost @@ -103,7 +270,7 @@ fn storage_statements_zero_cost() { }; let mut placement = InterpreterStatementPlacement::new(); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "storage_statements_zero_cost", diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs index a574d727efa..9442d885dcc 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs @@ -67,7 +67,7 @@ fn binary_unary_ops_supported() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "binary_unary_ops_supported", @@ -107,7 +107,7 @@ fn aggregate_tuple_supported() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "aggregate_tuple_supported", @@ -150,7 +150,7 @@ fn aggregate_closure_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "aggregate_closure_rejected", @@ -196,7 +196,7 @@ fn apply_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "apply_rejected", @@ -235,7 +235,7 @@ fn input_supported() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "input_supported", @@ -276,7 +276,7 @@ fn env_with_closure_type_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_with_closure_type_rejected", @@ -316,7 +316,7 @@ fn env_without_closure_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_without_closure_accepted", @@ -355,7 +355,7 @@ fn entity_projection_column() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "entity_projection_column", @@ -395,7 +395,7 @@ fn entity_projection_jsonb() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "entity_projection_jsonb", @@ -439,7 +439,7 @@ fn storage_statements_zero_cost() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "storage_statements_zero_cost", @@ -501,7 +501,7 @@ fn diamond_must_analysis() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "diamond_must_analysis", @@ -619,7 +619,7 @@ fn env_closure_field_rejected_other_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_closure_field_rejected_other_accepted", @@ -659,7 +659,7 @@ fn env_dict_non_string_key_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_dict_non_string_key_rejected", @@ -698,7 +698,7 @@ fn env_dict_string_key_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_dict_string_key_accepted", @@ -741,7 +741,7 @@ fn env_dict_opaque_string_key_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_dict_opaque_string_key_accepted", @@ -810,7 +810,7 @@ fn fnptr_constant_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "fnptr_constant_rejected", @@ -855,7 +855,7 @@ fn eq_dict_vs_struct_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "eq_dict_vs_struct_rejected", @@ -900,7 +900,7 @@ fn eq_list_vs_tuple_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "eq_list_vs_tuple_rejected", @@ -945,7 +945,7 @@ fn eq_unknown_type_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "eq_unknown_type_rejected", @@ -990,7 +990,7 @@ fn eq_same_type_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "eq_same_type_accepted", @@ -1035,7 +1035,7 @@ fn ne_dict_vs_struct_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "ne_dict_vs_struct_rejected", @@ -1609,7 +1609,7 @@ fn eq_place_vs_constant_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "eq_place_vs_constant_accepted", @@ -2144,7 +2144,7 @@ fn serialization_unsafe_statement_no_cost() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "serialization_unsafe_statement_no_cost", @@ -2198,7 +2198,7 @@ fn serialization_unsafe_edge_propagates() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "serialization_unsafe_edge_propagates", diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs index 29fc68f89f7..db3e0b68eef 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs @@ -98,7 +98,7 @@ pub(crate) fn assert_placement<'heap, A: Allocator>( /// Returns the body and statement cost vector for assertion. #[track_caller] pub(crate) fn run_placement<'heap>( - context: &mut MirContext<'_, 'heap>, + context: &MirContext<'_, 'heap>, placement: &mut impl StatementPlacement<'heap, &'heap Heap>, body: Body<'heap>, ) -> (Body<'heap>, StatementCostVec<&'heap Heap>) { diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs index a76cf25eb48..36d349ffac9 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs @@ -50,7 +50,7 @@ fn target_set(targets: &[TargetId]) -> TargetBitSet { #[expect(clippy::cast_possible_truncation)] fn all_targets() -> TargetBitSet { let mut set = FiniteBitSet::new_empty(TargetId::VARIANT_COUNT as u32); - set.insert_range(TargetId::MIN..=TargetId::MAX); + set.insert_range(TargetId::MIN..=TargetId::MAX, TargetId::VARIANT_COUNT); set } diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs similarity index 99% rename from libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs rename to libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs index 29b3968cfd6..8f740f4b010 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs @@ -1,3 +1,6 @@ +#[cfg(test)] +mod tests; + use core::{ alloc::Allocator, ops::{Index, IndexMut}, diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs new file mode 100644 index 00000000000..5fd93108761 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs @@ -0,0 +1,463 @@ +#![expect(clippy::min_ident_chars)] +use hashql_core::{heap::Heap, id::Id as _, symbol::sym, r#type::environment::Environment}; +use hashql_diagnostics::DiagnosticIssues; + +use crate::{ + body::{Body, basic_block::BasicBlockId, location::Location}, + builder::body, + context::MirContext, + intern::Interner, + pass::execution::traversal::{ + EntityPath, + analysis::{TraversalAnalysis, Traversals}, + }, +}; + +fn analyze<'heap>(context: &MirContext<'_, 'heap>, body: &Body<'heap>) -> Traversals<&'heap Heap> { + TraversalAnalysis::traversal_analysis_in(context, body, context.heap) +} + +fn location(block: usize, statement_index: usize) -> Location { + Location { + block: BasicBlockId::from_usize(block), + statement_index, + } +} + +/// Accessing `_1.properties` records `{Properties}` at the statement. +#[test] +fn single_leaf_path() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], props: ?; + @proj properties = vertex.properties: ?; + + bb0() { + props = load properties; + return props; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + // statement 0: props = load _1.properties + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::Properties)); + assert_eq!(stmt.len(), 1); + + // terminator: return props (not a vertex access) + let term = traversals[location(0, 2)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(term.is_empty()); +} + +/// Chained projections `_1.metadata.archived` resolve to `{Archived}`. +#[test] +fn multi_segment_path() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: Bool; + @proj metadata = vertex.metadata: ?, archived = metadata.archived: Bool; + + bb0() { + val = load archived; + return val; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::Archived)); + assert_eq!(stmt.len(), 1); +} + +/// Bare vertex access (`load _1`) sets all bits via `insert_all`. +#[test] +fn bare_vertex_sets_all_bits() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + + bb0() { + val = load vertex; + return val; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + // Composites present, their children subsumed + assert!(stmt.contains(EntityPath::Properties)); + assert!(stmt.contains(EntityPath::Vectors)); + assert!(stmt.contains(EntityPath::RecordId)); + assert!(stmt.contains(EntityPath::TemporalVersioning)); + assert!(!stmt.contains(EntityPath::EntityId)); + assert!(!stmt.contains(EntityPath::WebId)); + assert!(!stmt.contains(EntityPath::DecisionTime)); + // 24 total variants - 6 children = 18 top-level paths + assert_eq!(stmt.len(), 18); +} + +/// A tuple referencing two vertex projections records both paths at one location. +#[test] +fn multiple_paths_same_statement() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, Bool) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, Bool); + @proj properties = vertex.properties: ?, + metadata = vertex.metadata: ?, + archived = metadata.archived: Bool; + + bb0() { + result = tuple properties, archived; + return result; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::Properties)); + assert!(stmt.contains(EntityPath::Archived)); + assert_eq!(stmt.len(), 2); +} + +/// Returning a vertex projection place records the path at the terminator position. +#[test] +fn terminator_vertex_access() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?]; + @proj metadata = vertex.metadata: ?, archived = metadata.archived: Bool; + + bb0() { + return archived; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + // 0 statements, terminator at index 1 + let term = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(term.contains(EntityPath::Archived)); + assert_eq!(term.len(), 1); +} + +/// Accessing env fields (non-vertex local) produces no traversal entries. +#[test] +fn non_vertex_access_ignored() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (Int), vertex: [Opaque sym::path::Entity; ?], val: Int; + @proj env_0 = env.0: Int; + + bb0() { + val = load env_0; + return val; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.is_empty()); + + let term = traversals[location(0, 2)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(term.is_empty()); +} + +/// Composite path `_1.metadata.record_id` records `{RecordId}`, not individual children. +#[test] +fn composite_path_recorded() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj metadata = vertex.metadata: ?, + record_id = metadata.record_id: ?; + + bb0() { + val = load record_id; + return val; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::RecordId)); + assert_eq!(stmt.len(), 1); +} + +/// Embedding path `_1.encodings.vectors` records `{Vectors}`. +#[test] +fn embedding_path_recorded() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj encodings = vertex.encodings: ?, + vectors = encodings.vectors: ?; + + bb0() { + val = load vectors; + return val; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::Vectors)); + assert_eq!(stmt.len(), 1); +} + +/// Vertex accesses in different blocks are recorded at the correct locations. +#[test] +fn paths_across_blocks() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + props: ?, val: Bool, cond: Bool; + @proj properties = vertex.properties: ?, + metadata = vertex.metadata: ?, + archived = metadata.archived: Bool; + + bb0() { + props = load properties; + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + val = load archived; + return val; + }, + bb2() { + return cond; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + // bb0[0]: props = load _1.properties + let bb0_s0 = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(bb0_s0.contains(EntityPath::Properties)); + assert_eq!(bb0_s0.len(), 1); + + // bb0[1]: cond = load true (no vertex access) + let bb0_s1 = traversals[location(0, 2)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(bb0_s1.is_empty()); + + // bb1[0]: val = load _1.metadata.archived + let bb1_s0 = traversals[location(1, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(bb1_s0.contains(EntityPath::Archived)); + assert_eq!(bb1_s0.len(), 1); + + // bb2 terminator: return cond (no vertex access) + let bb2_term = traversals[location(2, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(bb2_term.is_empty()); +} + +/// Composite swallowing works end-to-end through the analysis pass. +/// +/// A statement loading `_1.metadata.record_id` followed by one loading +/// `_1.metadata.record_id.entity_id.web_id`: the first records `{RecordId}`, +/// the second records `{WebId}`. No cross-statement interaction. +#[test] +fn swallowing_across_statements() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + rid: ?, wid: ?; + @proj metadata = vertex.metadata: ?, + record_id = metadata.record_id: ?, + entity_id = record_id.entity_id: ?, + web_id = entity_id.web_id: ?; + + bb0() { + rid = load record_id; + wid = load web_id; + return rid; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + // Each statement records independently + let stmt0 = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt0.contains(EntityPath::RecordId)); + assert_eq!(stmt0.len(), 1); + + let stmt1 = traversals[location(0, 2)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt1.contains(EntityPath::WebId)); + assert_eq!(stmt1.len(), 1); +} + +/// Within a single statement, inserting a composite after its child swallows the child. +#[test] +fn swallowing_within_statement() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, ?) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, ?); + @proj metadata = vertex.metadata: ?, + record_id = metadata.record_id: ?, + entity_id = record_id.entity_id: ?, + web_id = entity_id.web_id: ?; + + bb0() { + result = tuple web_id, record_id; + return result; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + // Both operands reference _1. WebId is inserted first, then RecordId swallows it. + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::RecordId)); + assert!(!stmt.contains(EntityPath::WebId)); + assert_eq!(stmt.len(), 1); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index e46902ce8b4..d3bf3005467 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -290,7 +290,8 @@ impl EntityPathBitSet { out }; - self.0.insert_range(..); + self.0 + .insert_range(.., core::mem::variant_count::()); for path in HAS_ANCESTORS { self.0.remove(path); diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs index a1d2f0daa54..c7b1ce22cf0 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs @@ -1,4 +1,4 @@ -//! Unit tests for entity projection path lookup and composite swallowing. +//! Unit tests for entity projection path lookup, composite swallowing, and traversal analysis. use hashql_core::{symbol::sym, r#type::TypeId}; @@ -364,3 +364,99 @@ fn swallow_selective() { assert!(bitset.contains(EntityPath::Properties)); assert!(bitset.contains(EntityPath::DecisionTime)); } + +// --- insert_all tests --- + +/// `insert_all` sets exactly the top-level paths (composites replace their children). +#[test] +fn insert_all_sets_top_level_paths() { + let mut bitset = empty_bitset(); + bitset.insert_all(); + + // Top-level and childless paths are present + assert!(bitset.contains(EntityPath::Properties)); + assert!(bitset.contains(EntityPath::Vectors)); + assert!(bitset.contains(EntityPath::RecordId)); + assert!(bitset.contains(EntityPath::TemporalVersioning)); + assert!(bitset.contains(EntityPath::EntityTypeIds)); + assert!(bitset.contains(EntityPath::Archived)); + assert!(bitset.contains(EntityPath::Confidence)); + assert!(bitset.contains(EntityPath::ProvenanceInferred)); + assert!(bitset.contains(EntityPath::ProvenanceEdition)); + assert!(bitset.contains(EntityPath::PropertyMetadata)); + assert!(bitset.contains(EntityPath::LeftEntityWebId)); + assert!(bitset.contains(EntityPath::RightEntityWebId)); + + // Children subsumed by composites are absent + assert!(!bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::WebId)); + assert!(!bitset.contains(EntityPath::EntityUuid)); + assert!(!bitset.contains(EntityPath::DraftId)); + assert!(!bitset.contains(EntityPath::EditionId)); + assert!(!bitset.contains(EntityPath::DecisionTime)); + assert!(!bitset.contains(EntityPath::TransactionTime)); +} + +/// `insert_all` produces the correct count: total variants minus children with ancestors. +#[test] +fn insert_all_len() { + let mut bitset = empty_bitset(); + bitset.insert_all(); + + // 25 variants - 7 children (EntityId, WebId, EntityUuid, DraftId, EditionId, + // DecisionTime, TransactionTime) = 18 + assert_eq!(bitset.len(), 18); +} + +/// `insert_all` after individual inserts produces the same result as a fresh `insert_all`. +#[test] +fn insert_all_is_idempotent_over_existing() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::Properties); + bitset.insert_all(); + + let mut fresh = empty_bitset(); + fresh.insert_all(); + + assert_eq!(bitset, fresh); +} + +/// An empty bitset has len 0. +#[test] +fn empty_bitset_len() { + let bitset = empty_bitset(); + assert_eq!(bitset.len(), 0); + assert!(bitset.is_empty()); +} + +/// `len` tracks individual inserts correctly. +#[test] +fn len_after_inserts() { + let mut bitset = empty_bitset(); + assert_eq!(bitset.len(), 0); + + bitset.insert(EntityPath::Properties); + assert_eq!(bitset.len(), 1); + + bitset.insert(EntityPath::Archived); + assert_eq!(bitset.len(), 2); + + // Duplicate insert doesn't change count + bitset.insert(EntityPath::Properties); + assert_eq!(bitset.len(), 2); +} + +/// Composite swallowing decreases `len` when children are removed. +#[test] +fn len_decreases_on_swallow() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::EntityUuid); + bitset.insert(EntityPath::DraftId); + assert_eq!(bitset.len(), 3); + + // EntityId swallows all three children + bitset.insert(EntityPath::EntityId); + assert_eq!(bitset.len(), 1); +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap index 1568758b168..80b872f6109 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap @@ -3,23 +3,15 @@ source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { - let %2: Boolean + let %2: ? let %3: Integer - let %4: Integer - let %5: Integer - let %6: Boolean + let %4: Boolean bb0(): { - %2 = %1.metadata.archived // cost: 12 - %3 = 10 // cost: 8 - %4 = 20 // cost: 8 - %5 = %3 + %4 // cost: 8 - %6 = %5 > 15 // cost: 8 + %2 = %1.properties // cost: 12 + %3 = 42 // cost: 8 + %4 = %3 > 10 // cost: 8 - return %6 + return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap new file mode 100644 index 00000000000..94b42590076 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap @@ -0,0 +1,15 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> (?, Boolean) { + let %2: ? + let %3: (?, Boolean) + + bb0(): { + %2 = %1.properties // cost: 12 + %3 = (%1.properties, %1.metadata.archived) // cost: 16 + + return %3 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_backend_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap similarity index 58% rename from libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_backend_cost.snap rename to libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap index 40c059720ef..e3ba22c2fd3 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_backend_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap @@ -7,13 +7,9 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { let %3: Boolean bb0(): { - %3 = %1.metadata.archived // cost: 12 - %2 = !%3 // cost: 8 + %2 = %1.metadata.archived // cost: 12 + %3 = !%2 // cost: 8 - return %2 + return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap new file mode 100644 index 00000000000..78a7ed23795 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap @@ -0,0 +1,13 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> (?, ?) { + let %2: (?, ?) + + bb0(): { + %2 = (%1.metadata.record_id.entity_id.web_id, %1.metadata.record_id) // cost: 12 + + return %2 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_worst_case_multiple_backends.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_worst_case_multiple_backends.snap deleted file mode 100644 index a98062f1ab1..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_worst_case_multiple_backends.snap +++ /dev/null @@ -1,19 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs -expression: output ---- -fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { - let %2: Boolean - let %3: ? - - bb0(): { - %2 = %1.metadata.archived // cost: 12 - %3 = %1.encodings.vectors // cost: 14 - - return %3 - } -} - -=================== Traversals =================== - -Traversals: From 4b6a619e2cce70333fd0255029ceb48303b77e1b Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 13:48:21 +0100 Subject: [PATCH 12/32] feat: rework tests --- .../statement_placement/embedding/tests.rs | 2 +- .../statement_placement/interpret/mod.rs | 2 +- .../execution/traversal/analysis/tests.rs | 120 +++++++++++++++++- 3 files changed, 118 insertions(+), 6 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs index 93278807e17..f4df5de0f07 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs @@ -86,7 +86,7 @@ fn all_args_excluded() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs) = run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "all_args_excluded", diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs index 28270754328..5d8e9b2beb9 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs @@ -34,7 +34,7 @@ impl<'heap, A: Allocator> Visitor<'heap> for CostVisitor<'_, A> { location: Location, statement: &Statement<'heap>, ) -> Self::Result { - // All statements are supported; TraversalExtraction provides backend data access + // All statements are supported; TraversalAnalysis provides backend data access match &statement.kind { StatementKind::Assign(Assign { lhs, rhs: _ }) => { // If it's a traversal load (aka we add the interpreter cost, as well as the cost to diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs index 5fd93108761..d5a5a5a7d1c 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs @@ -133,7 +133,7 @@ fn bare_vertex_sets_all_bits() { assert!(!stmt.contains(EntityPath::EntityId)); assert!(!stmt.contains(EntityPath::WebId)); assert!(!stmt.contains(EntityPath::DecisionTime)); - // 24 total variants - 6 children = 18 top-level paths + // 25 variants - 7 children = 18 top-level paths assert_eq!(stmt.len(), 18); } @@ -375,13 +375,13 @@ fn paths_across_blocks() { assert!(bb2_term.is_empty()); } -/// Composite swallowing works end-to-end through the analysis pass. +/// Each statement records paths independently; no cross-statement interaction. /// /// A statement loading `_1.metadata.record_id` followed by one loading /// `_1.metadata.record_id.entity_id.web_id`: the first records `{RecordId}`, -/// the second records `{WebId}`. No cross-statement interaction. +/// the second records `{WebId}`. Swallowing only applies within a single statement. #[test] -fn swallowing_across_statements() { +fn paths_recorded_independently_per_statement() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); @@ -424,6 +424,118 @@ fn swallowing_across_statements() { assert_eq!(stmt1.len(), 1); } +/// An unresolvable vertex projection (e.g., `_1.unknown`) triggers `insert_all`. +/// +/// When `EntityPath::resolve` returns `None`, the analysis conservatively assumes the +/// entire entity is needed and sets all bits. +#[test] +fn unresolvable_projection_sets_all_bits() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj unknown = vertex.unknown: ?; + + bb0() { + val = load unknown; + return val; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + // Unresolvable path → insert_all → 25 variants - 7 children = 18 + assert_eq!(stmt.len(), 18); + assert!(stmt.contains(EntityPath::Properties)); + assert!(stmt.contains(EntityPath::RecordId)); +} + +/// `link_data.left_entity_id.web_id` resolves to `{LeftEntityWebId}`. +#[test] +fn link_data_path_recorded() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj link_data = vertex.link_data: ?, + left_entity_id = link_data.left_entity_id: ?, + web_id = left_entity_id.web_id: ?; + + bb0() { + val = load web_id; + return val; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::LeftEntityWebId)); + assert_eq!(stmt.len(), 1); +} + +/// `TemporalVersioning` composite swallowing works end-to-end through analysis. +/// +/// A tuple referencing both `_1.metadata.temporal_versioning.decision_time` and +/// `_1.metadata.temporal_versioning`: `TemporalVersioning` swallows `DecisionTime`. +#[test] +fn temporal_versioning_swallowing_through_analysis() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, ?) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, ?); + @proj metadata = vertex.metadata: ?, + temporal_versioning = metadata.temporal_versioning: ?, + decision_time = temporal_versioning.decision_time: ?; + + bb0() { + result = tuple decision_time, temporal_versioning; + return result; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = analyze(&context, &body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::TemporalVersioning)); + assert!(!stmt.contains(EntityPath::DecisionTime)); + assert_eq!(stmt.len(), 1); +} + /// Within a single statement, inserting a composite after its child swallows the child. #[test] fn swallowing_within_statement() { From 410f9555080680e91ba0e47ea063305926e80484 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 16:38:42 +0100 Subject: [PATCH 13/32] feat: define lattice over the entity --- .../hashql/core/src/id/bit_vec/finite.rs | 56 ++++--- libs/@local/hashql/core/src/id/bit_vec/mod.rs | 53 +++--- libs/@local/hashql/core/src/id/mod.rs | 31 ++-- libs/@local/hashql/core/src/lib.rs | 5 + libs/@local/hashql/macros/src/id/enum.rs | 10 +- .../pass/analysis/dataflow/lattice/impls.rs | 77 ++++++++- .../src/pass/execution/traversal/entity.rs | 153 ++++++++++++++---- .../mir/src/pass/execution/traversal/mod.rs | 72 +++++++++ .../mir/src/pass/execution/traversal/tests.rs | 94 ++++++++++- 9 files changed, 459 insertions(+), 92 deletions(-) diff --git a/libs/@local/hashql/core/src/id/bit_vec/finite.rs b/libs/@local/hashql/core/src/id/bit_vec/finite.rs index cd4694a9811..a49eec92f1a 100644 --- a/libs/@local/hashql/core/src/id/bit_vec/finite.rs +++ b/libs/@local/hashql/core/src/id/bit_vec/finite.rs @@ -11,7 +11,6 @@ //! [`DenseBitSet`]: super::DenseBitSet #![expect( clippy::cast_possible_truncation, - clippy::cast_lossless, reason = "Integral conversions in macro expansions may truncate or widen depending on target \ type" )] @@ -19,7 +18,7 @@ use core::{ fmt::{self, Debug}, hash::{Hash, Hasher}, - marker::PhantomData, + marker::{Destruct, PhantomData}, ops::{BitAnd, BitAndAssign, BitOrAssign, Not, RangeBounds, Shl, Shr, Sub}, }; @@ -34,17 +33,17 @@ use crate::id::{Id, bit_vec::inclusive_start_end}; /// /// The "integral" in the name refers to the mathematical concept of integers, distinguishing /// these types from other potential backing stores like arrays of integers. -pub trait FiniteBitSetIntegral: +pub const trait FiniteBitSetIntegral: Copy + Clone + Hash - + BitAnd - + BitOrAssign - + BitAndAssign - + Shl - + Shr - + Sub - + Not + + const BitAnd + + const BitOrAssign + + const BitAndAssign + + const Shl + + const Shr + + const Sub + + const Not + const PartialEq + fmt::Binary { @@ -66,7 +65,7 @@ pub trait FiniteBitSetIntegral: const ZERO: Self; /// Converts an [`Id`] to this integral type. - fn from_id(id: I) -> Self; + fn from_id(id: I) -> Self; /// Converts a `usize` to this integral type. fn from_usize(value: usize) -> Self; @@ -86,14 +85,14 @@ macro_rules! impl_trait { $(impl_trait!(@impl $integral);)* }; (@impl $integral:ty) => { - impl FiniteBitSetIntegral for $integral { + impl const FiniteBitSetIntegral for $integral { const EMPTY: Self = Self::MIN; const FILLED: Self = Self::MAX; const MAX_DOMAIN_SIZE: u32 = <$integral>::BITS; const ONE: Self = 1; const ZERO: Self = 0; - fn from_id(id: I) -> Self { + fn from_id(id: I) -> Self { id.as_u32() as Self } @@ -207,7 +206,11 @@ impl FiniteBitSet { /// /// Panics if `index` is out of bounds for the underlying integral type. #[inline] - pub fn insert(&mut self, index: I) { + pub const fn insert(&mut self, index: I) + where + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { assert!(index.as_u32() < T::MAX_DOMAIN_SIZE); self.store |= T::ONE << T::from_id(index); @@ -221,7 +224,12 @@ impl FiniteBitSet { /// /// Panics if the range end exceeds the capacity of the underlying integral type. #[inline] - pub fn insert_range(&mut self, bounds: impl RangeBounds, domain_size: usize) { + pub const fn insert_range(&mut self, bounds: R, domain_size: usize) + where + R: [const] RangeBounds + [const] Destruct, + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { let Some((start, end)) = inclusive_start_end(bounds, domain_size) else { return; }; @@ -240,7 +248,11 @@ impl FiniteBitSet { /// /// Panics if `index` is out of bounds for the underlying integral type. #[inline] - pub fn remove(&mut self, index: I) { + pub const fn remove(&mut self, index: I) + where + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { assert!(index.as_u32() < T::MAX_DOMAIN_SIZE); self.store &= !(T::ONE << T::from_id(index)); @@ -252,7 +264,11 @@ impl FiniteBitSet { /// /// Panics if `index` is out of bounds for the underlying integral type. #[inline] - pub fn set(&mut self, index: I, value: bool) { + pub const fn set(&mut self, index: I, value: bool) + where + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { if value { self.insert(index); } else { @@ -265,7 +281,11 @@ impl FiniteBitSet { /// Returns `false` if `index` is out of bounds (rather than panicking). #[inline] #[must_use] - pub fn contains(&self, index: I) -> bool { + pub const fn contains(&self, index: I) -> bool + where + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { if index.as_u32() >= T::MAX_DOMAIN_SIZE { false } else { diff --git a/libs/@local/hashql/core/src/id/bit_vec/mod.rs b/libs/@local/hashql/core/src/id/bit_vec/mod.rs index 095f67dcc25..bbc332ca59d 100644 --- a/libs/@local/hashql/core/src/id/bit_vec/mod.rs +++ b/libs/@local/hashql/core/src/id/bit_vec/mod.rs @@ -38,7 +38,7 @@ use alloc::rc::Rc; use core::{ fmt, iter, - marker::PhantomData, + marker::{Destruct, PhantomData}, ops::{Bound, Range, RangeBounds}, slice, }; @@ -78,32 +78,12 @@ const CHUNK_BITS: usize = CHUNK_WORDS * WORD_BITS; // 2048 bits type ChunkSize = u16; const _: () = assert!(CHUNK_BITS <= ChunkSize::MAX as usize); -pub trait BitRelations { +pub const trait BitRelations { fn union(&mut self, other: &Rhs) -> bool; fn subtract(&mut self, other: &Rhs) -> bool; fn intersect(&mut self, other: &Rhs) -> bool; } -#[inline] -fn inclusive_start_end(range: impl RangeBounds, domain: usize) -> Option<(usize, usize)> { - // Both start and end are inclusive. - let start = match range.start_bound().cloned() { - Bound::Included(start) => start.as_usize(), - Bound::Excluded(start) => start.as_usize() + 1, - Bound::Unbounded => 0, - }; - let end = match range.end_bound().cloned() { - Bound::Included(end) => end.as_usize(), - Bound::Excluded(end) => end.as_usize().checked_sub(1)?, - Bound::Unbounded => domain - 1, - }; - assert!(end < domain); - if start > end { - return None; - } - Some((start, end)) -} - /// A fixed-size bitset type with a dense representation. /// /// Note 1: Since this bitset is dense, if your domain is big, and/or relatively @@ -1589,3 +1569,32 @@ const fn max_bit(word: Word) -> usize { fn count_ones(words: &[Word]) -> usize { words.iter().map(|word| word.count_ones() as usize).sum() } + +#[inline] +const fn inclusive_start_end(range: R, domain: usize) -> Option<(usize, usize)> +where + T: [const] Id, + R: [const] RangeBounds + [const] Destruct, +{ + // Both start and end are inclusive. + let start = match range.start_bound().copied() { + Bound::Included(start) => start.as_usize(), + Bound::Excluded(start) => start.as_usize() + 1, + Bound::Unbounded => 0, + }; + let end = match range.end_bound().copied() { + Bound::Included(end) => end.as_usize(), + Bound::Excluded(end) => match end.as_usize().checked_sub(1) { + Some(end) => end, + None => return None, + }, + Bound::Unbounded => domain - 1, + }; + + assert!(end < domain); + if start > end { + return None; + } + + Some((start, end)) +} diff --git a/libs/@local/hashql/core/src/id/mod.rs b/libs/@local/hashql/core/src/id/mod.rs index 3a500e8429a..333dc42c41b 100644 --- a/libs/@local/hashql/core/src/id/mod.rs +++ b/libs/@local/hashql/core/src/id/mod.rs @@ -46,7 +46,7 @@ impl Display for IdError { /// /// Provides type safety for IDs of different domains (nodes, users, etc.) /// while maintaining a consistent conversion API. -pub trait Id: +pub const trait Id: Copy + PartialEq + Eq @@ -55,9 +55,9 @@ pub trait Id: + Hash + Debug + Display - + TryFrom - + TryFrom - + TryFrom + + [const] TryFrom + + [const] TryFrom + + [const] TryFrom + 'static { /// The maximum value this ID type can represent. @@ -75,7 +75,10 @@ pub trait Id: #[inline] #[must_use] fn from_u32(index: u32) -> Self { - Self::try_from(index).expect("Cannot create ID: value outside valid range") + match Self::try_from(index) { + Ok(id) => id, + Err(_) => panic!("Cannot create ID: value outside valid range"), + } } /// Creates an ID from a [`u64`] value. @@ -87,7 +90,10 @@ pub trait Id: #[inline] #[must_use] fn from_u64(index: u64) -> Self { - Self::try_from(index).expect("Cannot create ID: value outside valid range") + match Self::try_from(index) { + Ok(id) => id, + Err(_) => panic!("Cannot create ID: value outside valid range"), + } } /// Creates an ID from a [`usize`] value. @@ -99,7 +105,10 @@ pub trait Id: #[inline] #[must_use] fn from_usize(index: usize) -> Self { - Self::try_from(index).expect("Cannot create ID: value outside valid range") + match Self::try_from(index) { + Ok(id) => id, + Err(_) => panic!("Cannot create ID: value outside valid range"), + } } /// Converts this ID to a [`u32`] value. @@ -181,16 +190,16 @@ pub trait Id: /// } /// } /// ``` -pub trait HasId { +pub const trait HasId { type Id: Id; /// Returns the ID of this entity. fn id(&self) -> Self::Id; } -impl HasId for &T +impl const HasId for &T where - T: HasId, + T: [const] HasId, { type Id = T::Id; @@ -199,7 +208,7 @@ where } } -impl HasId for (I, T) +impl const HasId for (I, T) where I: Id, { diff --git a/libs/@local/hashql/core/src/lib.rs b/libs/@local/hashql/core/src/lib.rs index 3599b4b8442..e8d766d1a4a 100644 --- a/libs/@local/hashql/core/src/lib.rs +++ b/libs/@local/hashql/core/src/lib.rs @@ -23,8 +23,13 @@ const_cmp, const_ops, const_trait_impl, + const_range, + const_clone, + const_convert, + const_destruct, debug_closure_helpers, extend_one, + bound_copied, formatting_options, get_disjoint_mut_helpers, iter_intersperse, diff --git a/libs/@local/hashql/macros/src/id/enum.rs b/libs/@local/hashql/macros/src/id/enum.rs index e3d868d6f0f..e7798207b7d 100644 --- a/libs/@local/hashql/macros/src/id/enum.rs +++ b/libs/@local/hashql/macros/src/id/enum.rs @@ -186,8 +186,10 @@ pub(super) fn expand_enum( fn prev(self) -> ::core::option::Option { let discriminant = self.into_discriminant(); - let prev = discriminant.checked_sub(1)?; - Self::try_from_discriminant(prev) + match discriminant.checked_sub(1) { + Some(prev) => Self::try_from_discriminant(prev), + None => None, + } } } }); @@ -196,7 +198,7 @@ pub(super) fn expand_enum( for int in [quote!(u32), quote!(u64), quote!(usize)] { output.extend(quote! { #[automatically_derived] - impl ::core::convert::TryFrom<#int> for #name { + impl #konst ::core::convert::TryFrom<#int> for #name { type Error = #krate::id::IdError; #[inline] @@ -218,7 +220,7 @@ pub(super) fn expand_enum( // 6. HasId impl output.extend(quote! { #[automatically_derived] - impl #krate::id::HasId for #name { + impl #konst #krate::id::HasId for #name { type Id = Self; #[inline] diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs index d04649781e8..ace56215f96 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs @@ -210,6 +210,21 @@ where } } +impl HasTop<(T, U)> for PowersetLattice +where + Self: HasTop + HasTop, +{ + #[inline] + fn top(&self) -> (T, U) { + (self.top(), self.top()) + } + + #[inline] + fn is_top(&self, value: &(T, U)) -> bool { + self.is_top(&value.0) && self.is_top(&value.1) + } +} + impl JoinSemiLattice<(T, U)> for PowersetLattice where Self: JoinSemiLattice + JoinSemiLattice, @@ -226,7 +241,27 @@ where #[inline] fn join(&self, lhs: &mut (T, U), rhs: &(T, U)) -> bool { - self.join(&mut lhs.0, &rhs.0) || self.join(&mut lhs.1, &rhs.1) + self.join(&mut lhs.0, &rhs.0) | self.join(&mut lhs.1, &rhs.1) + } +} + +impl MeetSemiLattice<(T, U)> for PowersetLattice +where + Self: MeetSemiLattice + MeetSemiLattice, +{ + #[inline] + fn meet_owned(&self, mut lhs: (T, U), rhs: &(T, U)) -> (T, U) + where + (T, U): Sized, + { + self.meet(&mut lhs.0, &rhs.0); + self.meet(&mut lhs.1, &rhs.1); + lhs + } + + #[inline] + fn meet(&self, lhs: &mut (T, U), rhs: &(T, U)) -> bool { + self.meet(&mut lhs.0, &rhs.0) | self.meet(&mut lhs.1, &rhs.1) } } @@ -338,4 +373,44 @@ mod tests { assert_bounded_lattice(&lattice, a, b, c); } + + #[test] + fn powerset_lattice_tuple() { + id::newtype!(struct Left(u32 is 0..=31)); + id::newtype!(struct Right(u32 is 0..=31)); + + let lattice = PowersetLattice::new(32); + + let mut left_a: DenseBitSet = DenseBitSet::new_empty(32); + let mut left_b: DenseBitSet = DenseBitSet::new_empty(32); + let mut left_c: DenseBitSet = DenseBitSet::new_empty(32); + + left_a.insert(Left::from_usize(0)); + left_a.insert(Left::from_usize(1)); + + left_b.insert(Left::from_usize(1)); + left_b.insert(Left::from_usize(2)); + + left_c.insert(Left::from_usize(2)); + left_c.insert(Left::from_usize(3)); + + let mut right_a: DenseBitSet = DenseBitSet::new_empty(32); + let mut right_b: DenseBitSet = DenseBitSet::new_empty(32); + let mut right_c: DenseBitSet = DenseBitSet::new_empty(32); + + right_a.insert(Right::from_usize(10)); + right_a.insert(Right::from_usize(11)); + + right_b.insert(Right::from_usize(11)); + right_b.insert(Right::from_usize(12)); + + right_c.insert(Right::from_usize(12)); + right_c.insert(Right::from_usize(13)); + + let tuple_a = (left_a, right_a); + let tuple_b = (left_b, right_b); + let tuple_c = (left_c, right_c); + + assert_bounded_lattice(&lattice, tuple_a, tuple_b, tuple_c); + } } diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index d3bf3005467..baf134a5fa2 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -1,10 +1,21 @@ +use core::debug_assert_matches; + use hashql_core::{ - id::{Id, bit_vec::FiniteBitSet}, + id::{ + Id, + bit_vec::{BitRelations as _, FiniteBitSet}, + }, symbol::{ConstantSymbol, sym}, }; -use super::access::{Access, AccessMode}; -use crate::body::place::{Projection, ProjectionKind}; +use super::{ + TraversalLattice, VertexType, + access::{Access, AccessMode}, +}; +use crate::{ + body::place::{Projection, ProjectionKind}, + pass::analysis::dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}, +}; macro_rules! sym { ($($sym:tt)::*) => { @@ -18,6 +29,7 @@ macro_rules! sym { /// exhaustively match on this to generate backend-specific access (SQL expressions, placement /// decisions, etc.) without duplicating path resolution logic. #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Id)] +#[id(const)] pub enum EntityPath { /// `properties.*` — JSONB column in `entity_editions`. Properties, @@ -223,10 +235,60 @@ impl EntityPath { } } +const HAS_ANCESTORS: [EntityPath; HAS_ANCESTOR_COUNT] = { + let mut out = [EntityPath::Archived; HAS_ANCESTOR_COUNT]; + + let mut index = 0; + let mut ptr = 0; + let paths = EntityPath::all(); + + while ptr < paths.len() { + if !paths[ptr].ancestors().is_empty() { + out[index] = paths[ptr]; + index += 1; + } + + ptr += 1; + } + + out +}; +const HAS_ANCESTOR_COUNT: usize = { + let mut count = 0; + let mut index = 0; + let paths = EntityPath::all(); + + while index < paths.len() { + if !paths[index].ancestors().is_empty() { + count += 1; + } + + index += 1; + } + + count +}; + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct EntityPathBitSet(FiniteBitSet); impl EntityPathBitSet { + const BOTTOM: Self = Self::new_empty(); + #[expect(clippy::cast_possible_truncation)] + const TOP: Self = { + let mut set = FiniteBitSet::new_empty(core::mem::variant_count::() as u32); + + set.insert_range(.., core::mem::variant_count::()); + + let mut index = 0; + while index < HAS_ANCESTOR_COUNT { + set.remove(HAS_ANCESTORS[index]); + index += 1; + } + + Self(set) + }; + #[expect(clippy::cast_possible_truncation)] #[must_use] pub const fn new_empty() -> Self { @@ -254,48 +316,69 @@ impl EntityPathBitSet { } } - pub(crate) fn insert_all(&mut self) { - const HAS_ANCESTOR_COUNT: usize = { - let mut count = 0; - let mut index = 0; - let paths = EntityPath::all(); - - while index < paths.len() { - if !paths[index].ancestors().is_empty() { - count += 1; + fn normalize(&mut self) { + for path in &self.0 { + for &ancestor in path.ancestors() { + if self.0.contains(ancestor) { + self.0.remove(path); } - - index += 1; } + } + } - count - }; + pub(crate) const fn insert_all(&mut self) { + *self = Self::TOP; + } +} + +impl HasTop for TraversalLattice { + fn top(&self) -> EntityPathBitSet { + debug_assert_matches!(self.vertex(), VertexType::Entity); + EntityPathBitSet::TOP + } + + fn is_top(&self, value: &EntityPathBitSet) -> bool { + debug_assert_matches!(self.vertex(), VertexType::Entity); + *value == EntityPathBitSet::TOP + } +} - const HAS_ANCESTORS: [EntityPath; HAS_ANCESTOR_COUNT] = { - let mut out = [EntityPath::Archived; HAS_ANCESTOR_COUNT]; +impl HasBottom for TraversalLattice { + fn bottom(&self) -> EntityPathBitSet { + debug_assert_matches!(self.vertex(), VertexType::Entity); + EntityPathBitSet::BOTTOM + } - let mut index = 0; - let mut ptr = 0; - let paths = EntityPath::all(); + fn is_bottom(&self, value: &EntityPathBitSet) -> bool { + debug_assert_matches!(self.vertex(), VertexType::Entity); + *value == EntityPathBitSet::BOTTOM + } +} - while ptr < paths.len() { - if !paths[ptr].ancestors().is_empty() { - out[index] = paths[ptr]; - index += 1; - } +impl JoinSemiLattice for TraversalLattice { + fn join(&self, lhs: &mut EntityPathBitSet, rhs: &EntityPathBitSet) -> bool { + debug_assert_matches!(self.vertex(), VertexType::Entity); - ptr += 1; - } + let mut new = *lhs; - out - }; + new.0.union(&rhs.0); + new.normalize(); + + let has_changed = new != *lhs; + *lhs = new; + has_changed + } - self.0 - .insert_range(.., core::mem::variant_count::()); + fn join_owned(&self, mut lhs: EntityPathBitSet, rhs: &EntityPathBitSet) -> EntityPathBitSet + where + EntityPathBitSet: Sized, + { + debug_assert_matches!(self.vertex(), VertexType::Entity); - for path in HAS_ANCESTORS { - self.0.remove(path); - } + lhs.0.union(&rhs.0); + lhs.normalize(); + + lhs } } diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index fe88abe84fa..d592d21a78d 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -19,6 +19,28 @@ pub(crate) use analysis::{TraversalAnalysis, Traversals}; pub(crate) use self::access::Access; pub use self::entity::{EntityPath, EntityPathBitSet}; use super::VertexType; +use crate::pass::analysis::dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}; + +/// Lattice structure for traversal path bitsets. +/// +/// Carries the [`VertexType`] so that [`bottom`](HasBottom::bottom) and [`top`](HasTop::top) +/// construct the correct variant of [`TraversalPathBitSet`]. +#[derive(Debug, Copy, Clone)] +pub struct TraversalLattice { + vertex: VertexType, +} + +impl TraversalLattice { + #[must_use] + pub const fn new(vertex: VertexType) -> Self { + Self { vertex } + } + + #[must_use] + pub const fn vertex(&self) -> VertexType { + self.vertex + } +} /// Set of resolved traversal paths for a single vertex type. /// @@ -96,6 +118,56 @@ impl TraversalPathBitSet { } } +impl HasBottom for TraversalLattice { + fn bottom(&self) -> TraversalPathBitSet { + match self.vertex { + VertexType::Entity => TraversalPathBitSet::Entity(self.bottom()), + } + } + + fn is_bottom(&self, value: &TraversalPathBitSet) -> bool { + match value { + TraversalPathBitSet::Entity(bitset) => self.is_bottom(bitset), + } + } +} + +impl HasTop for TraversalLattice { + fn top(&self) -> TraversalPathBitSet { + match self.vertex { + VertexType::Entity => TraversalPathBitSet::Entity(self.top()), + } + } + + fn is_top(&self, value: &TraversalPathBitSet) -> bool { + match value { + TraversalPathBitSet::Entity(bitset) => self.is_top(bitset), + } + } +} + +impl JoinSemiLattice for TraversalLattice { + fn join(&self, lhs: &mut TraversalPathBitSet, rhs: &TraversalPathBitSet) -> bool { + match (lhs, rhs) { + (TraversalPathBitSet::Entity(lhs), TraversalPathBitSet::Entity(rhs)) => { + self.join(lhs, rhs) + } + } + } + + fn join_owned( + &self, + lhs: TraversalPathBitSet, + rhs: &TraversalPathBitSet, + ) -> TraversalPathBitSet { + match (lhs, rhs) { + (TraversalPathBitSet::Entity(lhs), TraversalPathBitSet::Entity(rhs)) => { + TraversalPathBitSet::Entity(self.join_owned(lhs, rhs)) + } + } + } +} + /// A single resolved traversal path for a specific vertex type. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum TraversalPath { diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs index c7b1ce22cf0..0444aa62217 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs @@ -8,7 +8,16 @@ use crate::{ local::Local, place::{Projection, ProjectionKind}, }, - pass::execution::traversal::{EntityPath, EntityPathBitSet}, + pass::{ + analysis::dataflow::lattice::{ + HasTop as _, JoinSemiLattice as _, + laws::{assert_bounded_join_semilattice, assert_is_top_consistent}, + }, + execution::{ + VertexType, + traversal::{EntityPath, EntityPathBitSet, TraversalLattice, TraversalPathBitSet}, + }, + }, }; /// Helper to create a `FieldByName` projection. @@ -460,3 +469,86 @@ fn len_decreases_on_swallow() { bitset.insert(EntityPath::EntityId); assert_eq!(bitset.len(), 1); } + +// --- Lattice law tests --- + +/// Builds an `EntityPathBitSet` from a list of paths using `insert` (swallowing). +fn bitset_of(paths: &[EntityPath]) -> EntityPathBitSet { + let mut bitset = empty_bitset(); + for &path in paths { + bitset.insert(path); + } + bitset +} + +/// `EntityPathBitSet` satisfies `BoundedJoinSemiLattice` laws. +/// +/// Uses values that cross the composite hierarchy: leaves from different subtrees, +/// a mid-level composite, and a top-level composite with a sibling leaf. +#[test] +fn entity_path_bitset_bounded_join_semilattice() { + let lattice = TraversalLattice::new(VertexType::Entity); + + let set_a = bitset_of(&[EntityPath::WebId, EntityPath::DecisionTime]); + let set_b = bitset_of(&[EntityPath::EntityId, EntityPath::Properties]); + let set_c = bitset_of(&[EntityPath::RecordId, EntityPath::TransactionTime]); + + assert_bounded_join_semilattice(&lattice, set_a, set_b, set_c); +} + +/// `is_top(top())` is consistent for `EntityPathBitSet`. +#[test] +fn entity_path_bitset_top_consistent() { + let lattice = TraversalLattice::new(VertexType::Entity); + assert_is_top_consistent::<_, EntityPathBitSet>(&lattice); +} + +/// `join(top, a) = top` for `EntityPathBitSet`. +#[test] +fn entity_path_bitset_top_absorbs_join() { + let lattice = TraversalLattice::new(VertexType::Entity); + let top: EntityPathBitSet = lattice.top(); + + for path in EntityPath::all() { + let singleton = bitset_of(&[path]); + let result = lattice.join_owned(top, &singleton); + assert_eq!(result, top); + } +} + +/// `TraversalPathBitSet` satisfies `BoundedJoinSemiLattice` laws. +#[test] +fn traversal_path_bitset_bounded_join_semilattice() { + let lattice = TraversalLattice::new(VertexType::Entity); + + let set_a = + TraversalPathBitSet::Entity(bitset_of(&[EntityPath::WebId, EntityPath::DecisionTime])); + let set_b = + TraversalPathBitSet::Entity(bitset_of(&[EntityPath::EntityId, EntityPath::Properties])); + let set_c = TraversalPathBitSet::Entity(bitset_of(&[ + EntityPath::RecordId, + EntityPath::TransactionTime, + ])); + + assert_bounded_join_semilattice(&lattice, set_a, set_b, set_c); +} + +/// `is_top(top())` is consistent for `TraversalPathBitSet`. +#[test] +fn traversal_path_bitset_top_consistent() { + let lattice = TraversalLattice::new(VertexType::Entity); + assert_is_top_consistent::<_, TraversalPathBitSet>(&lattice); +} + +/// `join(top, a) = top` for `TraversalPathBitSet`. +#[test] +fn traversal_path_bitset_top_absorbs_join() { + let lattice = TraversalLattice::new(VertexType::Entity); + let top: TraversalPathBitSet = lattice.top(); + + for path in EntityPath::all() { + let singleton = TraversalPathBitSet::Entity(bitset_of(&[path])); + let result = lattice.join_owned(top, &singleton); + assert_eq!(result, top); + } +} From 960d7259b5d2c557201e2b0d7ddced3e902d175b Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 17:31:42 +0100 Subject: [PATCH 14/32] feat: checkpoint --- .../pass/analysis/dataflow/lattice/impls.rs | 116 +++++++++++++----- .../pass/analysis/dataflow/liveness/mod.rs | 20 +-- .../statement_placement/embedding/mod.rs | 54 ++++---- .../statement_placement/interpret/mod.rs | 24 ++-- .../pass/execution/statement_placement/mod.rs | 26 ++-- .../statement_placement/postgres/mod.rs | 23 ++-- 6 files changed, 165 insertions(+), 98 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs index ace56215f96..bb4725f3fa0 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs @@ -195,73 +195,77 @@ macro_rules! impl_bitset { impl_bitset!(DenseBitSet, ChunkedBitSet, MixedBitSet); -impl HasBottom<(T, U)> for PowersetLattice +impl HasBottom<(T, U)> for (A, B) where - Self: HasBottom + HasBottom, + A: HasBottom, + B: HasBottom, { #[inline] fn bottom(&self) -> (T, U) { - (self.bottom(), self.bottom()) + (self.0.bottom(), self.1.bottom()) } #[inline] fn is_bottom(&self, value: &(T, U)) -> bool { - self.is_bottom(&value.0) && self.is_bottom(&value.1) + self.0.is_bottom(&value.0) && self.1.is_bottom(&value.1) } } -impl HasTop<(T, U)> for PowersetLattice +impl HasTop<(T, U)> for (A, B) where - Self: HasTop + HasTop, + A: HasTop, + B: HasTop, { #[inline] fn top(&self) -> (T, U) { - (self.top(), self.top()) + (self.0.top(), self.1.top()) } #[inline] fn is_top(&self, value: &(T, U)) -> bool { - self.is_top(&value.0) && self.is_top(&value.1) + self.0.is_top(&value.0) && self.1.is_top(&value.1) } } -impl JoinSemiLattice<(T, U)> for PowersetLattice +impl JoinSemiLattice<(T, U)> for (A, B) where - Self: JoinSemiLattice + JoinSemiLattice, + A: JoinSemiLattice, + B: JoinSemiLattice, { #[inline] fn join_owned(&self, mut lhs: (T, U), rhs: &(T, U)) -> (T, U) where (T, U): Sized, { - self.join(&mut lhs.0, &rhs.0); - self.join(&mut lhs.1, &rhs.1); + self.0.join(&mut lhs.0, &rhs.0); + self.1.join(&mut lhs.1, &rhs.1); lhs } #[inline] fn join(&self, lhs: &mut (T, U), rhs: &(T, U)) -> bool { - self.join(&mut lhs.0, &rhs.0) | self.join(&mut lhs.1, &rhs.1) + self.0.join(&mut lhs.0, &rhs.0) | self.1.join(&mut lhs.1, &rhs.1) } } -impl MeetSemiLattice<(T, U)> for PowersetLattice +impl MeetSemiLattice<(T, U)> for (A, B) where - Self: MeetSemiLattice + MeetSemiLattice, + A: MeetSemiLattice, + B: MeetSemiLattice, { #[inline] fn meet_owned(&self, mut lhs: (T, U), rhs: &(T, U)) -> (T, U) where (T, U): Sized, { - self.meet(&mut lhs.0, &rhs.0); - self.meet(&mut lhs.1, &rhs.1); + self.0.meet(&mut lhs.0, &rhs.0); + self.1.meet(&mut lhs.1, &rhs.1); lhs } #[inline] fn meet(&self, lhs: &mut (T, U), rhs: &(T, U)) -> bool { - self.meet(&mut lhs.0, &rhs.0) | self.meet(&mut lhs.1, &rhs.1) + self.0.meet(&mut lhs.0, &rhs.0) | self.1.meet(&mut lhs.1, &rhs.1) } } @@ -316,10 +320,14 @@ where #[cfg(test)] mod tests { #![expect(clippy::min_ident_chars)] + use core::cmp::Reverse; + use hashql_core::id::{self, Id as _, bit_vec::DenseBitSet}; use super::{PowersetLattice, SaturatingSemiring, WrappingSemiring}; - use crate::pass::analysis::dataflow::lattice::laws::{assert_bounded_lattice, assert_semiring}; + use crate::pass::analysis::dataflow::lattice::laws::{ + assert_bounded_join_semilattice, assert_bounded_lattice, assert_semiring, + }; #[test] fn saturating_semiring_u32() { @@ -374,16 +382,17 @@ mod tests { assert_bounded_lattice(&lattice, a, b, c); } + /// Tuple lattice with two `PowersetLattice`s of different domain sizes. #[test] - fn powerset_lattice_tuple() { - id::newtype!(struct Left(u32 is 0..=31)); + fn tuple_lattice_different_domains() { + id::newtype!(struct Left(u32 is 0..=15)); id::newtype!(struct Right(u32 is 0..=31)); - let lattice = PowersetLattice::new(32); + let lattice = (PowersetLattice::new(16), PowersetLattice::new(32)); - let mut left_a: DenseBitSet = DenseBitSet::new_empty(32); - let mut left_b: DenseBitSet = DenseBitSet::new_empty(32); - let mut left_c: DenseBitSet = DenseBitSet::new_empty(32); + let mut left_a: DenseBitSet = DenseBitSet::new_empty(16); + let mut left_b: DenseBitSet = DenseBitSet::new_empty(16); + let mut left_c: DenseBitSet = DenseBitSet::new_empty(16); left_a.insert(Left::from_usize(0)); left_a.insert(Left::from_usize(1)); @@ -407,10 +416,59 @@ mod tests { right_c.insert(Right::from_usize(12)); right_c.insert(Right::from_usize(13)); - let tuple_a = (left_a, right_a); - let tuple_b = (left_b, right_b); - let tuple_c = (left_c, right_c); + assert_bounded_lattice( + &lattice, + (left_a, right_a), + (left_b, right_b), + (left_c, right_c), + ); + } + + /// Tuple lattice with `Reverse` and `PowersetLattice`. + /// + /// Verifies the `(A, B)` combinator works with heterogeneous lattice structures + /// where one component uses intersection-as-join (the dual lattice). + #[test] + fn tuple_lattice_heterogeneous_structures() { + id::newtype!(struct Left(u32 is 0..=15)); + id::newtype!(struct Right(u32 is 0..=31)); + + let lattice = (Reverse(PowersetLattice::new(16)), PowersetLattice::new(32)); + + let mut left_a: DenseBitSet = DenseBitSet::new_empty(16); + let mut left_b: DenseBitSet = DenseBitSet::new_empty(16); + let mut left_c: DenseBitSet = DenseBitSet::new_empty(16); + + left_a.insert(Left::from_usize(0)); + left_a.insert(Left::from_usize(1)); + left_a.insert(Left::from_usize(2)); + + left_b.insert(Left::from_usize(1)); + left_b.insert(Left::from_usize(2)); + left_b.insert(Left::from_usize(3)); + + left_c.insert(Left::from_usize(2)); + left_c.insert(Left::from_usize(3)); + left_c.insert(Left::from_usize(4)); + + let mut right_a: DenseBitSet = DenseBitSet::new_empty(32); + let mut right_b: DenseBitSet = DenseBitSet::new_empty(32); + let mut right_c: DenseBitSet = DenseBitSet::new_empty(32); + + right_a.insert(Right::from_usize(10)); + right_a.insert(Right::from_usize(11)); + + right_b.insert(Right::from_usize(11)); + right_b.insert(Right::from_usize(12)); + + right_c.insert(Right::from_usize(12)); + right_c.insert(Right::from_usize(13)); - assert_bounded_lattice(&lattice, tuple_a, tuple_b, tuple_c); + assert_bounded_join_semilattice( + &lattice, + (left_a, right_a), + (left_b, right_b), + (left_c, right_c), + ); } } diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs index 0b1c7cdf725..8ffc58fe53f 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs @@ -57,7 +57,7 @@ mod tests; use core::alloc::Allocator; -use hashql_core::{id::bit_vec::DenseBitSet, intern::Interned}; +use hashql_core::{id::bit_vec::DenseBitSet, intern::Interned, r#type::environment::Environment}; use super::{ framework::{DataflowAnalysis, Direction}, @@ -73,7 +73,10 @@ use crate::{ terminator::Terminator, }, pass::{ - execution::{VertexType, traversal::EntityPath}, + execution::{ + VertexType, + traversal::{EntityPath, TraversalLattice, TraversalPathBitSet}, + }, transform::Traversals, }, visit::{self, Visitor}, @@ -87,19 +90,22 @@ use crate::{ /// /// This allows subsequent dead code elimination to remove the source local when its only uses /// are through extracted traversal projections. -pub struct TraversalLivenessAnalysis<'ctx, 'heap> { - pub traversals: &'ctx Traversals<'heap>, +pub struct TraversalLivenessAnalysis { + vertex: VertexType } impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis<'_, '_> { - type Domain = DenseBitSet; - type Lattice = PowersetLattice; + type Domain = (DenseBitSet, TraversalPathBitSet); + type Lattice = (PowersetLattice, TraversalLattice); type SwitchIntData = !; const DIRECTION: Direction = Direction::Backward; fn lattice_in(&self, body: &Body<'heap>, _: A) -> Self::Lattice { - PowersetLattice::new(body.local_decls.len()) + let locals = PowersetLattice::new(body.local_decls.len()); + + let vertex = VertexType::from_local(self.env, &body.local_decls[]) + let paths = TraversalLattice::new(vertex) } fn initialize_boundary(&self, _: &Body<'heap>, _: &mut Self::Domain, _: A) { diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs index 170d18c6969..6288b2f14e3 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs @@ -21,35 +21,35 @@ use crate::{ #[cfg(test)] mod tests; -fn is_supported_place<'heap>( - context: &MirContext<'_, 'heap>, - body: &Body<'heap>, - domain: &DenseBitSet, - place: &Place<'heap>, -) -> bool { - // For GraphReadFilter bodies, local 1 is the filter argument (vertex). Check if the - // projection path maps to an Embedding-accessible field. - if matches!(body.source, Source::GraphReadFilter(_)) && place.local.as_usize() == 1 { - let decl = &body.local_decls[place.local]; - let Some(vertex_type) = VertexType::from_local(context.env, decl) else { - unimplemented!("lookup for declared type") - }; +struct EmbeddingSupported { + vertex: VertexType, +} - match vertex_type { - VertexType::Entity => { - return matches!( - entity_projection_access(&place.projections), - Some(Access::Embedding(_)) - ); +impl EmbeddingSupported { + fn is_supported_place<'heap>( + &self, + context: &MirContext<'_, 'heap>, + body: &Body<'heap>, + domain: &DenseBitSet, + place: &Place<'heap>, + ) -> bool { + // For GraphReadFilter bodies, local 1 is the filter argument (vertex). Check if the + // projection path maps to an Embedding-accessible field. + if matches!(body.source, Source::GraphReadFilter(_)) && place.local == Local::VERTEX { + match self.vertex { + VertexType::Entity => { + return matches!( + entity_projection_access(&place.projections), + Some(Access::Embedding(_)) + ); + } } } - } - domain.contains(place.local) + domain.contains(place.local) + } } -struct EmbeddingSupported; - impl<'heap> Supported<'heap> for EmbeddingSupported { fn is_supported_rvalue( &self, @@ -76,7 +76,7 @@ impl<'heap> Supported<'heap> for EmbeddingSupported { operand: &Operand<'heap>, ) -> bool { match operand { - Operand::Place(place) => is_supported_place(context, body, domain, place), + Operand::Place(place) => self.is_supported_place(context, body, domain, place), Operand::Constant(_) => false, } } @@ -107,7 +107,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - _traversals: &Traversals, + vertex: VertexType, alloc: A, ) -> StatementCostVec { let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); @@ -122,7 +122,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> let dispatchable = SupportedAnalysis { body, context, - supported: &EmbeddingSupported, + supported: &EmbeddingSupported { vertex }, initialize_boundary: OnceValue::new( |body: &Body<'heap>, domain: &mut DenseBitSet| { match body.source { @@ -152,7 +152,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> statement_costs, - supported: &EmbeddingSupported, + supported: &EmbeddingSupported { vertex }, }; visitor.visit_body(body); diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs index 5d8e9b2beb9..65690a09505 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs @@ -9,6 +9,7 @@ use crate::{ }, context::MirContext, pass::execution::{ + VertexType, cost::{Cost, StatementCostVec}, traversal::Traversals, }, @@ -18,15 +19,15 @@ use crate::{ #[cfg(test)] mod tests; -struct CostVisitor<'ctx, A: Allocator> { +struct CostVisitor<'ctx, A: Allocator, S: Allocator> { cost: Cost, traversal_overhead: Cost, statement_costs: StatementCostVec, - traversals: &'ctx Traversals, + traversals: &'ctx Traversals, } -impl<'heap, A: Allocator> Visitor<'heap> for CostVisitor<'_, A> { +impl<'heap, A: Allocator, S: Allocator> Visitor<'heap> for CostVisitor<'_, A, S> { type Result = Result<(), !>; fn visit_statement( @@ -67,26 +68,31 @@ impl<'heap, A: Allocator> Visitor<'heap> for CostVisitor<'_, A> { /// target. /// /// Supports all statements unconditionally, serving as the universal fallback. -pub(crate) struct InterpreterStatementPlacement { +pub(crate) struct InterpreterStatementPlacement<'ctx, S: Allocator> { traversal_overhead: Cost, statement_cost: Cost, + + traversals: &'ctx Traversals, } -impl InterpreterStatementPlacement { - pub(crate) const fn new() -> Self { +impl<'ctx, S: Allocator> InterpreterStatementPlacement<'ctx, S> { + pub(crate) const fn new(traversals: &'ctx Traversals) -> Self { Self { traversal_overhead: cost!(4), statement_cost: cost!(8), + traversals, } } } -impl<'heap, A: Allocator + Clone> StatementPlacement<'heap, A> for InterpreterStatementPlacement { +impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> + for InterpreterStatementPlacement<'_, S> +{ fn statement_placement_in( &mut self, _: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals, + vertex: VertexType, alloc: A, ) -> StatementCostVec { let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); @@ -102,7 +108,7 @@ impl<'heap, A: Allocator + Clone> StatementPlacement<'heap, A> for InterpreterSt cost: self.statement_cost, statement_costs, traversal_overhead: self.traversal_overhead, - traversals, + traversals: self.traversals, }; visitor.visit_body(body); diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs index e7f7d8e7af7..b73d6896bac 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs @@ -23,7 +23,7 @@ pub(crate) use self::{ embedding::EmbeddingStatementPlacement, interpret::InterpreterStatementPlacement, postgres::PostgresStatementPlacement, }; -use super::{target::TargetId, traversal::Traversals}; +use super::{VertexType, target::TargetId, traversal::Traversals}; use crate::{body::Body, context::MirContext, pass::execution::cost::StatementCostVec}; /// Computes statement placement costs for a specific execution target. @@ -48,22 +48,24 @@ pub(crate) trait StatementPlacement<'heap, A: Allocator> { &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals, + vertex: VertexType, alloc: A, ) -> StatementCostVec; } -pub(crate) enum TargetPlacementStatement<'heap, S: Allocator> { - Interpreter(InterpreterStatementPlacement), +pub(crate) enum TargetPlacementStatement<'ctx, 'heap, S: Allocator> { + Interpreter(InterpreterStatementPlacement<'ctx, S>), Postgres(PostgresStatementPlacement<'heap, S>), Embedding(EmbeddingStatementPlacement), } -impl TargetPlacementStatement<'_, S> { +impl<'ctx, S: Allocator + Clone> TargetPlacementStatement<'ctx, '_, S> { #[must_use] - pub(crate) fn new_in(target: TargetId, scratch: S) -> Self { + pub(crate) fn new_in(target: TargetId, traversals: &'ctx Traversals, scratch: S) -> Self { match target { - TargetId::Interpreter => Self::Interpreter(InterpreterStatementPlacement::new()), + TargetId::Interpreter => { + Self::Interpreter(InterpreterStatementPlacement::new(traversals)) + } TargetId::Postgres => Self::Postgres(PostgresStatementPlacement::new_in(scratch)), TargetId::Embedding => Self::Embedding(EmbeddingStatementPlacement::new_in(scratch)), } @@ -71,25 +73,25 @@ impl TargetPlacementStatement<'_, S> { } impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> - for TargetPlacementStatement<'heap, S> + for TargetPlacementStatement<'_, 'heap, S> { #[inline] fn statement_placement_in( &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals, + vertex: VertexType, alloc: A, ) -> StatementCostVec { match self { TargetPlacementStatement::Interpreter(placement) => { - placement.statement_placement_in(context, body, traversals, alloc) + placement.statement_placement_in(context, body, vertex, alloc) } TargetPlacementStatement::Postgres(placement) => { - placement.statement_placement_in(context, body, traversals, alloc) + placement.statement_placement_in(context, body, vertex, alloc) } TargetPlacementStatement::Embedding(placement) => { - placement.statement_placement_in(context, body, traversals, alloc) + placement.statement_placement_in(context, body, vertex, alloc) } } } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs index e67ba18cf38..ef2c6051824 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs @@ -331,6 +331,7 @@ struct PostgresSupported<'ctx, 'heap, A: Allocator> { /// /// Fields containing closures or dicts with non-string keys are excluded. env_domain: &'ctx DenseBitSet, + vertex: VertexType, guard: LocalLock<&'ctx mut RecursiveVisitorGuard<'heap, A>>, } @@ -369,19 +370,12 @@ impl<'heap, A: Allocator> PostgresSupported<'_, 'heap, A> { Some(self.env_domain.contains(field)) } - Local::VERTEX => { - let decl = &body.local_decls[place.local]; - let Some(vertex_type) = VertexType::from_local(context.env, decl) else { - unimplemented!("lookup for declared type") - }; - - match vertex_type { - VertexType::Entity => Some(matches!( - entity_projection_access(&place.projections), - Some(Access::Postgres(_)) - )), - } - } + Local::VERTEX => match self.vertex { + VertexType::Entity => Some(matches!( + entity_projection_access(&place.projections), + Some(Access::Postgres(_)) + )), + }, _ => None, } } @@ -708,7 +702,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - _: &Traversals, + vertex: VertexType, alloc: A, ) -> StatementCostVec { let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); @@ -724,6 +718,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> let supported = PostgresSupported { env_domain: &env_domain, + vertex, guard: LocalLock::new(&mut self.type_visitor_guard), }; From 7aa802aed9f5860ad592bf69df2537d1a024629f Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 17:54:18 +0100 Subject: [PATCH 15/32] feat: define lattice over the entity --- .../pass/analysis/dataflow/liveness/mod.rs | 132 ++++++++---------- .../hashql/mir/src/pass/execution/mod.rs | 12 +- .../statement_placement/interpret/tests.rs | 39 ++++-- .../statement_placement/postgres/tests.rs | 6 +- .../execution/statement_placement/tests.rs | 20 +-- .../execution/terminator_placement/mod.rs | 30 ++-- 6 files changed, 122 insertions(+), 117 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs index 8ffc58fe53f..99fb16a2a3a 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs @@ -15,30 +15,28 @@ //! This module provides two liveness analyses: //! //! - [`LivenessAnalysis`]: Standard liveness following the gen/kill semantics above. -//! - [`TraversalLivenessAnalysis`]: Traversal-aware liveness that suppresses uses of a traversal -//! source when assigning to a known traversal destination. +//! - [`TraversalLivenessAnalysis`]: Tracks local liveness alongside per-vertex path liveness. //! //! ## Traversal-Aware Liveness //! -//! When performing traversal extraction, a source local (e.g., `entity`) may have multiple -//! partial projections extracted into separate destination locals (e.g., `entity.uuid`, -//! `entity.name`). Standard liveness would mark `entity` as live at every assignment to these -//! destinations, even though only the projections are needed. +//! In a [`GraphReadFilter`] body, the vertex local (`_1`) is an input representing a graph +//! vertex. Rather than tracking the vertex as a monolithic live value, this analysis resolves +//! each vertex projection to an [`EntityPath`] and records it in a [`TraversalPathBitSet`]. +//! The vertex local itself is never marked live in the local bitset. //! -//! [`TraversalLivenessAnalysis`] takes a [`Traversals`] reference and modifies the transfer -//! function: when an assignment's left-hand side is a full definition of a registered traversal -//! destination, uses of the traversal source on the right-hand side are *not* generated. +//! This allows edge cost computation to sum only the [`InformationRange`] of live paths, +//! rather than charging the full entity size at every edge where the vertex is used. //! //! ```text -//! // Given: traversals.source() = _1, traversals.contains(_2) = true //! bb0: -//! _2 = _1.uuid // Standard: gens _1. Traversal-aware: skips _1 (full def of _2) -//! _3 = _1.name // If _3 not in traversals: gens _1 normally +//! _2 = _1.metadata.archived // gens EntityPath::Archived in path bitset, _1 stays dead +//! _3 = _1.properties // gens EntityPath::Properties in path bitset, _1 stays dead +//! _4 = _1 // unresolvable: insert_all in path bitset, _1 stays dead //! return _2 //! ``` //! -//! This allows dead code elimination to remove the source local when all its uses are through -//! extracted traversals. +//! [`GraphReadFilter`]: crate::body::Source::GraphReadFilter +//! [`InformationRange`]: crate::pass::analysis::size_estimation::InformationRange //! //! # Example //! @@ -57,7 +55,7 @@ mod tests; use core::alloc::Allocator; -use hashql_core::{id::bit_vec::DenseBitSet, intern::Interned, r#type::environment::Environment}; +use hashql_core::{id::bit_vec::DenseBitSet, intern::Interned}; use super::{ framework::{DataflowAnalysis, Direction}, @@ -69,32 +67,31 @@ use crate::{ local::Local, location::Location, place::{DefUse, Place, PlaceContext}, - statement::{Assign, Statement, StatementKind}, + statement::Statement, terminator::Terminator, }, - pass::{ - execution::{ - VertexType, - traversal::{EntityPath, TraversalLattice, TraversalPathBitSet}, - }, - transform::Traversals, + pass::execution::{ + VertexType, + traversal::{EntityPath, TraversalLattice, TraversalPathBitSet}, }, visit::{self, Visitor}, }; -/// Traversal-aware liveness analysis. +/// Liveness analysis that tracks local liveness and per-vertex path liveness in parallel. /// -/// Extends standard liveness with special handling for traversal extraction. When the left-hand -/// side of an assignment is a full definition of a traversal destination, uses of the traversal -/// source on the right-hand side are suppressed (not added to the live set). +/// The domain is `(DenseBitSet, TraversalPathBitSet)`: +/// - The local bitset tracks which locals are live, with the vertex local excluded entirely. +/// - The path bitset tracks which vertex field paths are live (resolved via [`EntityPath`]). /// -/// This allows subsequent dead code elimination to remove the source local when its only uses -/// are through extracted traversal projections. +/// When the vertex is accessed through a resolvable projection (e.g., `_1.metadata.archived`), +/// the corresponding [`EntityPath`] is gen'd in the path bitset. When the projection cannot be +/// resolved (bare `_1` or unknown path), all paths are marked live via +/// [`insert_all`](EntityPathBitSet::insert_all). pub struct TraversalLivenessAnalysis { - vertex: VertexType + pub vertex: VertexType, } -impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis<'_, '_> { +impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis { type Domain = (DenseBitSet, TraversalPathBitSet); type Lattice = (PowersetLattice, TraversalLattice); type SwitchIntData = !; @@ -103,9 +100,9 @@ impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis<'_, '_> { fn lattice_in(&self, body: &Body<'heap>, _: A) -> Self::Lattice { let locals = PowersetLattice::new(body.local_decls.len()); + let paths = TraversalLattice::new(self.vertex); - let vertex = VertexType::from_local(self.env, &body.local_decls[]) - let paths = TraversalLattice::new(vertex) + (locals, paths) } fn initialize_boundary(&self, _: &Body<'heap>, _: &mut Self::Domain, _: A) { @@ -118,7 +115,9 @@ impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis<'_, '_> { params: Interned<'heap, [Local]>, state: &mut Self::Domain, ) { - Ok(()) = TraversalTransferFunction(state, None).visit_basic_block_params(location, params); + let (locals, paths) = state; + Ok(()) = + TraversalTransferFunction { locals, paths }.visit_basic_block_params(location, params); } fn transfer_statement( @@ -127,20 +126,8 @@ impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis<'_, '_> { statement: &Statement<'heap>, state: &mut Self::Domain, ) { - // This is the pattern that's exhibited by explicit traversal extraction, in particular. - // Meaning we skip any assignments to our local, as long as it is a `Def`, to the particular - // chosen source. - let skip_uses_of = if let StatementKind::Assign(Assign { lhs, rhs: _ }) = &statement.kind - && lhs.projections.is_empty() - && self.traversals.contains(lhs.local) - { - Some(self.traversals.source()) - } else { - None - }; - - Ok(()) = - TraversalTransferFunction(state, skip_uses_of).visit_statement(location, statement); + let (locals, paths) = state; + Ok(()) = TraversalTransferFunction { locals, paths }.visit_statement(location, statement); } fn transfer_terminator( @@ -149,11 +136,15 @@ impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis<'_, '_> { terminator: &Terminator<'heap>, state: &mut Self::Domain, ) { - Ok(()) = TraversalTransferFunction(state, None).visit_terminator(location, terminator); + let (locals, paths) = state; + Ok(()) = TraversalTransferFunction { locals, paths }.visit_terminator(location, terminator); } } -struct TraversalTransferFunction<'mir>(&'mir mut DenseBitSet, Option); +struct TraversalTransferFunction<'mir> { + locals: &'mir mut DenseBitSet, + paths: &'mir mut TraversalPathBitSet, +} impl Visitor<'_> for TraversalTransferFunction<'_> { type Result = Result<(), !>; @@ -163,12 +154,18 @@ impl Visitor<'_> for TraversalTransferFunction<'_> { return Ok(()); }; + if local == Local::VERTEX { + debug_assert_eq!( + def_use, + DefUse::Use, + "vertex local is immutable in GraphReadFilter bodies" + ); + return Ok(()); + } + match def_use { - // Full definition kills liveness - the variable gets a new value - DefUse::Def => self.0.remove(local), - // Partial definitions and uses generate liveness - the current value is needed - DefUse::Use if Some(local) == self.1 => false, - DefUse::PartialDef | DefUse::Use => self.0.insert(local), + DefUse::Def => self.locals.remove(local), + DefUse::PartialDef | DefUse::Use => self.locals.insert(local), }; Ok(()) @@ -180,23 +177,16 @@ impl Visitor<'_> for TraversalTransferFunction<'_> { context: PlaceContext, place: &Place<'_>, ) -> Self::Result { - let Some(def_use) = context.into_def_use() else { - return Ok(()); - }; - - let _vertex = VertexType::Entity; // TODO: actually do this properly - - // Check if the place is a vertex, and the vertex type results in a partial result, in that - // case we do *not* continue, because it is considered a partial traversal and does not - // contribute to the liveness analysis of the partially hydrated entity. - if def_use == DefUse::Use - && place.local == Local::VERTEX - && EntityPath::resolve(&place.projections).is_some() - { - // This is a *valid* partial traversal, and does therefore not contribute to the full - // liveness of the entity. (This is required to ensure that we're not evaluating the - // full size of the entity on transition if we don't need it.) - return Ok(()); + if place.local == Local::VERTEX && Some(DefUse::Use) == context.into_def_use() { + match self.paths { + TraversalPathBitSet::Entity(bitset) => { + if let Some((path, _)) = EntityPath::resolve(&place.projections) { + bitset.insert(path); + } else { + bitset.insert_all(); + } + } + } } visit::r#ref::walk_place(self, location, context, place) diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 5a162e99b9d..084cd728af5 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -38,7 +38,7 @@ use self::{ }; use super::{analysis::size_estimation::BodyFootprint, transform::Traversals}; use crate::{ - body::{Body, Source, basic_block::BasicBlockVec}, + body::{Body, Source, basic_block::BasicBlockVec, local::Local}, context::MirContext, def::DefIdSlice, }; @@ -60,6 +60,11 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { ) { assert_matches!(body.source, Source::GraphReadFilter(_)); + let Some(vertex) = VertexType::from_local(context.env, &body.local_decls[Local::VERTEX]) + else { + unreachable!("unsupported graph read target") + }; + let mut traversals = TraversalAnalysis::traversal_analysis_in(context, body, &self.scratch); let mut statement_costs: TargetArray<_> = TargetArray::from_fn(|_| None); @@ -68,9 +73,10 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { targets.reverse(); // We reverse the order, so that earlier targets (aka the interpreter) can have access to traversal costs for target in targets { - let mut statement = TargetPlacementStatement::new_in(target, &self.scratch); + let mut statement = + TargetPlacementStatement::new_in(target, &traversals, &self.scratch); let statement_cost = - statement.statement_placement_in(context, body, &traversals, &self.scratch); + statement.statement_placement_in(context, body, vertex, &self.scratch); statement_costs[target] = Some(statement_cost); } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs index c0b677d1a55..c745ba8c2b0 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs @@ -9,9 +9,12 @@ use crate::{ context::MirContext, def::DefId, intern::Interner, - pass::execution::statement_placement::{ - InterpreterStatementPlacement, StatementPlacement as _, - tests::{assert_placement, run_placement}, + pass::execution::{ + statement_placement::{ + InterpreterStatementPlacement, StatementPlacement as _, + tests::{assert_placement, run_placement}, + }, + traversal::TraversalAnalysis, }, }; @@ -51,14 +54,15 @@ fn all_statements_supported() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let mut placement = InterpreterStatementPlacement::new(); + let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( @@ -91,14 +95,15 @@ fn traversal_single_path_cost() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let mut placement = InterpreterStatementPlacement::new(); + let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( @@ -133,14 +138,15 @@ fn traversal_multiple_paths_cost() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let mut placement = InterpreterStatementPlacement::new(); + let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( @@ -176,14 +182,15 @@ fn traversal_swallowing_reduces_cost() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let mut placement = InterpreterStatementPlacement::new(); + let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( @@ -218,14 +225,15 @@ fn non_traversal_unaffected_by_costs() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let mut placement = InterpreterStatementPlacement::new(); + let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( @@ -262,14 +270,15 @@ fn storage_statements_zero_cost() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let mut placement = InterpreterStatementPlacement::new(); + let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs index 9442d885dcc..b7d867414d4 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs @@ -32,7 +32,6 @@ use crate::{ PostgresStatementPlacement, StatementPlacement as _, tests::{assert_placement, run_placement}, }, - traversal::Traversals, }, }; @@ -575,10 +574,9 @@ fn graph_read_edge_unsupported() { diagnostics: DiagnosticIssues::new(), }; - let traversals = Traversals::new_in(&body.basic_blocks, VertexType::Entity, &heap); - let mut placement = PostgresStatementPlacement::new_in(Global); - let statement_costs = placement.statement_placement_in(&context, &body, &traversals, &heap); + let statement_costs = + placement.statement_placement_in(&context, &body, VertexType::Entity, &heap); assert_placement( "graph_read_edge_unsupported", diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs index db3e0b68eef..3cbc04c0608 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs @@ -15,7 +15,7 @@ use insta::{Settings, assert_snapshot}; use super::StatementPlacement; use crate::{ - body::{Body, location::Location, statement::Statement}, + body::{Body, local::Local, location::Location, statement::Statement}, builder::body, context::MirContext, intern::Interner, @@ -25,7 +25,7 @@ use crate::{ statement_placement::{ EmbeddingStatementPlacement, InterpreterStatementPlacement, PostgresStatementPlacement, }, - traversal::{TraversalAnalysis, Traversals}, + traversal::Traversals, }, pretty::{TextFormatAnnotations, TextFormatOptions}, }; @@ -102,10 +102,10 @@ pub(crate) fn run_placement<'heap>( placement: &mut impl StatementPlacement<'heap, &'heap Heap>, body: Body<'heap>, ) -> (Body<'heap>, StatementCostVec<&'heap Heap>) { - let traversals = TraversalAnalysis::traversal_analysis_in(context, &body, context.heap); + let vertex = VertexType::from_local(context.env, &body.local_decls[Local::VERTEX]) + .unwrap_or_else(|| unimplemented!("lookup for declared type")); - let statement_costs = - placement.statement_placement_in(context, &body, &traversals, context.heap); + let statement_costs = placement.statement_placement_in(context, &body, vertex, context.heap); (body, statement_costs) } @@ -147,13 +147,13 @@ fn non_graph_read_filter_returns_empty() { let traversals = Traversals::new_in(&body.basic_blocks, VertexType::Entity, &heap); let mut postgres = PostgresStatementPlacement::new_in(Global); - let mut interpreter = InterpreterStatementPlacement::new(); + let mut interpreter = InterpreterStatementPlacement::new(&traversals); let mut embedding = EmbeddingStatementPlacement::new_in(Global); - let postgres_statement = postgres.statement_placement_in(&context, &body, &traversals, &heap); - let interpreter_statement = - interpreter.statement_placement_in(&context, &body, &traversals, &heap); - let embedding_statement = embedding.statement_placement_in(&context, &body, &traversals, &heap); + let vertex = VertexType::Entity; + let postgres_statement = postgres.statement_placement_in(&context, &body, vertex, &heap); + let interpreter_statement = interpreter.statement_placement_in(&context, &body, vertex, &heap); + let embedding_statement = embedding.statement_placement_in(&context, &body, vertex, &heap); assert!(postgres_statement.all_unassigned()); assert!(interpreter_statement.all_unassigned()); diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs index 801f9ddcbf3..d0574584d6a 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs @@ -50,9 +50,10 @@ use hashql_core::{ }; use super::{ - Cost, + Cost, VertexType, block_partitioned_vec::BlockPartitionedVec, target::{TargetBitSet, TargetId}, + traversal::{EntityPathBitSet, TraversalPathBitSet}, }; use crate::{ body::{ @@ -442,13 +443,13 @@ impl TerminatorPlacement { fn compute_liveness<'heap>( &self, body: &Body<'heap>, - traversals: &Traversals<'heap>, - ) -> BasicBlockVec, &S> { + vertex: VertexType, + ) -> BasicBlockVec<(DenseBitSet, TraversalPathBitSet), &S> { let DataflowResults { analysis: _, entry_states: live_in, exit_states: _, - } = TraversalLivenessAnalysis { traversals }.iterate_to_fixpoint_in(body, &self.scratch); + } = TraversalLivenessAnalysis { vertex }.iterate_to_fixpoint_in(body, &self.scratch); live_in } @@ -464,11 +465,11 @@ impl TerminatorPlacement { pub(crate) fn terminator_placement<'heap>( &self, body: &Body<'heap>, + vertex: VertexType, footprint: &BodyFootprint<&'heap Heap>, - traversals: &Traversals<'heap>, targets: &BasicBlockSlice, ) -> TerminatorCostVec { - self.terminator_placement_in(body, footprint, traversals, targets, Global) + self.terminator_placement_in(body, vertex, footprint, targets, Global) } /// Computes transition costs for all terminator edges in `body`. @@ -483,12 +484,12 @@ impl TerminatorPlacement { pub(crate) fn terminator_placement_in<'heap, A: Allocator + Clone>( &self, body: &Body<'heap>, + vertex: VertexType, footprint: &BodyFootprint<&'heap Heap>, - traversals: &Traversals<'heap>, targets: &BasicBlockSlice, alloc: A, ) -> TerminatorCostVec { - let live_in = self.compute_liveness(body, traversals); + let live_in = self.compute_liveness(body, vertex); let scc = self.compute_scc(body); let mut output = TerminatorCostVec::new(&body.basic_blocks, alloc); @@ -534,16 +535,17 @@ impl TerminatorPlacement { required_locals: &mut DenseBitSet, body: &Body, footprint: &BodyFootprint<&Heap>, - live_in: &BasicBlockSlice>, + live_in: &BasicBlockSlice<(DenseBitSet, TraversalPathBitSet)>, successor: BasicBlockId, ) -> Cost { - required_locals.clone_from(&live_in[successor]); + todo!() + // required_locals.clone_from(&live_in[successor]); - for ¶m in body.basic_blocks[successor].params { - required_locals.insert(param); - } + // for ¶m in body.basic_blocks[successor].params { + // required_locals.insert(param); + // } - self.sum_local_sizes(footprint, required_locals) + // self.sum_local_sizes(footprint, required_locals) } /// Sums the estimated sizes of all locals in the set. From 164293479004fec848ad824ae4974edec8e42a3d Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 18:12:53 +0100 Subject: [PATCH 16/32] feat: terminator placement --- .../pass/analysis/dataflow/liveness/tests.rs | 238 +++++++++--------- .../execution/terminator_placement/mod.rs | 37 ++- .../execution/terminator_placement/tests.rs | 145 +++-------- .../terminator_placement_snapshot.snap | 16 +- 4 files changed, 194 insertions(+), 242 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs index e6292ba3076..e376ea6af98 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs @@ -8,23 +8,22 @@ use hashql_core::{ heap::Heap, id::bit_vec::DenseBitSet, pretty::Formatter, - r#type::{TypeBuilder, TypeFormatter, TypeFormatterOptions, environment::Environment}, + symbol::sym, + r#type::{TypeFormatter, TypeFormatterOptions, environment::Environment}, }; use insta::{Settings, assert_snapshot}; use super::{LivenessAnalysis, TraversalLivenessAnalysis}; use crate::{ - body::{ - Body, - basic_block::BasicBlockId, - local::Local, - place::{FieldIndex, Place, ProjectionKind}, - }, + body::{Body, basic_block::BasicBlockId, local::Local}, builder::body, intern::Interner, pass::{ analysis::dataflow::framework::{DataflowAnalysis, DataflowResults, Direction}, - transform::Traversals, + execution::{ + VertexType, + traversal::{EntityPath, TraversalPathBitSet}, + }, }, pretty::TextFormatOptions, }; @@ -342,164 +341,177 @@ fn diamond_one_branch_uses() { // TraversalLivenessAnalysis Tests // ============================================================================= -#[track_caller] -fn assert_traversal_liveness<'heap>( - name: &'static str, - env: &Environment<'heap>, - body: &Body<'heap>, - traversals: &Traversals<'heap>, -) { - let analysis = TraversalLivenessAnalysis { traversals }; - let results = analysis.iterate_to_fixpoint(body); +fn traversal_liveness<'a>(body: &'a Body<'a>) -> DataflowResults<'a, TraversalLivenessAnalysis> { + let analysis = TraversalLivenessAnalysis { + vertex: VertexType::Entity, + }; + analysis.iterate_to_fixpoint(body) +} - let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - let mut settings = Settings::clone_current(); - settings.set_snapshot_path(dir.join("tests/ui/pass/liveness")); - settings.set_prepend_module_to_snapshot(false); +fn entry_locals<'a>( + results: &'a DataflowResults<'a, TraversalLivenessAnalysis>, + block: BasicBlockId, +) -> &'a DenseBitSet { + &results.entry_states[block].0 +} - let _drop = settings.bind_to_scope(); +fn entry_paths<'a>( + results: &'a DataflowResults<'a, TraversalLivenessAnalysis>, + block: BasicBlockId, +) -> &'a TraversalPathBitSet { + &results.entry_states[block].1 +} - assert_snapshot!( - name, - format!( - "{}\n\n========\n\n{}", - format_body(env, body), - format_liveness(body, &results) - ) - ); +/// Vertex local (`_1`) is never marked live in the local bitset. +#[test] +fn vertex_excluded_from_local_bitset() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + // _0 = env, _1 = vertex, _2 = props + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], props: ?; + @proj properties = vertex.properties: ?; + + bb0() { + props = load properties; + goto bb1(); + }, + bb1() { + return props; + } + }); + + let results = traversal_liveness(&body); + + // At bb1 entry, _2 (props) is live (used in return), vertex is not + let bb1_locals = entry_locals(&results, BasicBlockId::new(1)); + assert!(bb1_locals.contains(Local::new(2))); + assert!(!bb1_locals.contains(Local::VERTEX)); + + // At bb0 entry, _2 is killed by its definition, vertex is never live + let bb0_locals = entry_locals(&results, BasicBlockId::new(0)); + assert!(!bb0_locals.contains(Local::VERTEX)); + assert!(!bb0_locals.contains(Local::new(2))); } -/// Assigning to a traversal destination does not mark the source as live. +/// Vertex field accesses are recorded as EntityPaths in the path bitset. #[test] -fn traversal_assignment_skips_source() { +fn vertex_access_records_entity_path() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - // _0 = env, _1 = source, _2 = traversal destination, _3 = result - let body = body!(interner, env; fn@0/2 -> Int { - decl env: (), source: (Int, Int), dest: Int; - @proj source_0 = source.0: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], props: ?; + @proj properties = vertex.properties: ?; bb0() { - dest = load source_0; - return dest; + props = load properties; + return props; } }); - // source = _1, destinations = {_2} - let source = Local::new(1); - let dest = Local::new(2); - let mut traversals = Traversals::with_capacity_in(source, body.local_decls.len(), &heap); - // The projection type is Int (the element type of the tuple) - traversals.insert( - dest, - Place::local(source).project( - &interner, - TypeBuilder::synthetic(&env).integer(), - ProjectionKind::Field(FieldIndex::new(0)), - ), - ); + let results = traversal_liveness(&body); + let paths = entry_paths(&results, BasicBlockId::new(0)); - assert_traversal_liveness( - "traversal_assignment_skips_source", - &env, - &body, - &traversals, - ); + let entity_paths = paths.as_entity().expect("should be entity variant"); + assert!(entity_paths.contains(EntityPath::Properties)); } -/// Assigning to a non-traversal local marks the source as live. +/// Bare vertex access sets all bits in the path bitset. #[test] -fn non_traversal_assignment_gens_source() { +fn bare_vertex_access_sets_all_paths() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - // _0 = env, _1 = source, _2 = NOT in traversals, _3 = result - let body = body!(interner, env; fn@0/2 -> Int { - decl env: (), source: (Int, Int), other: Int; - @proj source_0 = source.0: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; bb0() { - other = load source_0; - return other; + val = load vertex; + return val; } }); - // source = _1, destinations = {} (empty - _2 is NOT a traversal destination) - let source = Local::new(1); - let traversals = Traversals::with_capacity_in(source, body.local_decls.len(), &heap); + let results = traversal_liveness(&body); + let paths = entry_paths(&results, BasicBlockId::new(0)); - assert_traversal_liveness( - "non_traversal_assignment_gens_source", - &env, - &body, - &traversals, - ); + let entity_paths = paths.as_entity().expect("should be entity variant"); + // 25 variants - 7 children = 18 top-level paths + assert_eq!(entity_paths.len(), 18); } -/// Assignment with projections on LHS (partial def) does not trigger traversal skip. +/// Non-vertex locals are tracked normally in the local bitset. #[test] -fn lhs_projection_does_not_skip() { +fn non_vertex_locals_tracked_normally() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - // _0 = env, _1 = source, _2 = traversal destination (tuple), _3 = result - let body = body!(interner, env; fn@0/2 -> (Int, Int) { - decl env: (), source: (Int, Int), dest: (Int, Int); - @proj source_0 = source.0: Int, dest_0 = dest.0: Int; + // _0 = env, _1 = vertex, _2 = val, _3 = result + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (Int), vertex: [Opaque sym::path::Entity; ?], val: Int, result: Int; + @proj env_0 = env.0: Int; bb0() { - dest_0 = load source_0; - return dest; + val = load env_0; + goto bb1(); + }, + bb1() { + result = load val; + return result; } }); - // source = _1, destinations = {_2} - // Even though _2 is in traversals, the assignment is to dest.0 (has projection), - // so it should NOT skip the source use. - let _env = Local::new(0); - let source = Local::new(1); - let dest = Local::new(2); - - let mut traversals = Traversals::with_capacity_in(source, body.local_decls.len(), &heap); - let source_0_place = Place::local(source).project( - &interner, - TypeBuilder::synthetic(&env).integer(), - ProjectionKind::Field(FieldIndex::new(0)), - ); - traversals.insert(dest, source_0_place); + let results = traversal_liveness(&body); - assert_traversal_liveness("lhs_projection_does_not_skip", &env, &body, &traversals); + // At bb1 entry, val (_2) is live (used by the load) + let bb1_locals = entry_locals(&results, BasicBlockId::new(1)); + assert!(bb1_locals.contains(Local::new(2))); + assert!(!bb1_locals.contains(Local::VERTEX)); + + // Path bitset is empty (no vertex access) + let bb1_paths = entry_paths(&results, BasicBlockId::new(1)); + assert!(bb1_paths.is_empty()); } -/// Empty traversals set produces identical results to standard liveness. +/// Paths from multiple blocks are joined at merge points. #[test] -fn empty_traversals_is_standard_liveness() { +fn paths_joined_across_branches() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/2 -> Int { - decl env: (), source: (Int, Int), dest: Int; - @proj source_0 = source.0: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + props: ?, archived: Bool, cond: Bool; + @proj properties = vertex.properties: ?, + metadata = vertex.metadata: ?, + archived_proj = metadata.archived: Bool; bb0() { - dest = load source_0; - return dest; + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + props = load properties; + return props; + }, + bb2() { + archived = load archived_proj; + return archived; } }); - // source = _1, destinations = {} (empty) - let source = Local::new(1); - let traversals = Traversals::with_capacity_in(source, body.local_decls.len(), &heap); + let results = traversal_liveness(&body); + let paths = entry_paths(&results, BasicBlockId::new(0)); - assert_traversal_liveness( - "empty_traversals_is_standard_liveness", - &env, - &body, - &traversals, - ); + let entity_paths = paths.as_entity().expect("should be entity variant"); + // Join of {Properties} and {Archived} + assert!(entity_paths.contains(EntityPath::Properties)); + assert!(entity_paths.contains(EntityPath::Archived)); + assert_eq!(entity_paths.len(), 2); } diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs index d0574584d6a..c02c6c2578a 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs @@ -63,15 +63,12 @@ use crate::{ local::Local, terminator::TerminatorKind, }, - pass::{ - analysis::{ - dataflow::{ - TraversalLivenessAnalysis, - framework::{DataflowAnalysis as _, DataflowResults}, - }, - size_estimation::{BodyFootprint, Cardinality, InformationRange}, + pass::analysis::{ + dataflow::{ + TraversalLivenessAnalysis, + framework::{DataflowAnalysis as _, DataflowResults}, }, - transform::Traversals, + size_estimation::{BodyFootprint, Cardinality, InformationRange}, }, }; @@ -440,9 +437,9 @@ impl TerminatorPlacement { } } - fn compute_liveness<'heap>( + fn compute_liveness( &self, - body: &Body<'heap>, + body: &Body<'_>, vertex: VertexType, ) -> BasicBlockVec<(DenseBitSet, TraversalPathBitSet), &S> { let DataflowResults { @@ -546,6 +543,26 @@ impl TerminatorPlacement { // } // self.sum_local_sizes(footprint, required_locals) + + // let (locals, paths) = &live_in[successor]; + // required_locals.clone_from(locals); + + // for ¶m in body.basic_blocks[successor].params { + // required_locals.insert(param); + // } + + // let local_cost = self.sum_local_sizes(footprint, required_locals); + + // if paths.is_empty() { + // return local_cost; + // } + + // let Some(max) = self.entity_size.inclusive_max() else { + // return Cost::MAX; + // }; + + // let avg = self.entity_size.min().midpoint(max); + // local_cost.saturating_add(Cost::new_saturating(avg.as_u32())) } /// Sums the estimated sizes of all locals in the set. diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs index 36d349ffac9..babdaf14a57 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs @@ -12,6 +12,7 @@ use hashql_core::{ heap::Heap, id::{Id as _, bit_vec::FiniteBitSet}, pretty::Formatter, + symbol::sym, r#type::{TypeFormatter, TypeFormatterOptions, builder::TypeBuilder, environment::Environment}, }; use hashql_diagnostics::DiagnosticIssues; @@ -22,9 +23,8 @@ use crate::{ body::{ Body, basic_block::{BasicBlockId, BasicBlockSlice}, - local::{Local, LocalVec}, + local::LocalVec, operand::Operand, - place::{FieldIndex, Place, ProjectionKind}, terminator::{GraphRead, GraphReadHead, GraphReadTail, TerminatorKind}, }, builder::{BodyBuilder, body}, @@ -32,8 +32,10 @@ use crate::{ intern::Interner, pass::{ analysis::size_estimation::{BodyFootprint, Footprint, InformationRange}, - execution::target::{TargetBitSet, TargetId}, - transform::Traversals, + execution::{ + VertexType, + target::{TargetBitSet, TargetId}, + }, }, pretty::TextFormatOptions, }; @@ -84,10 +86,6 @@ fn make_full_footprint<'heap>(body: &Body<'heap>, heap: &'heap Heap) -> BodyFoot } } -fn empty_traversals<'heap>(body: &Body<'heap>, heap: &'heap Heap) -> Traversals<'heap> { - Traversals::with_capacity_in(Local::new(0), body.local_decls.len(), heap) -} - fn assert_snapshot<'heap>( name: &'static str, context: &MirContext<'_, 'heap>, @@ -191,8 +189,8 @@ fn goto_allows_cross_backend_non_postgres() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl param: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], param: Int; bb0() { goto bb1(1); @@ -209,11 +207,10 @@ fn goto_allows_cross_backend_non_postgres() { let footprint = make_scalar_footprint(&body, &heap); let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -230,8 +227,8 @@ fn switchint_blocks_cross_backend() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl selector: Int, param: Int, result: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], selector: Int, param: Int, result: Int; bb0() { selector = load 1; @@ -254,11 +251,10 @@ fn switchint_blocks_cross_backend() { let footprint = make_scalar_footprint(&body, &heap); let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -276,8 +272,8 @@ fn switchint_edge_targets_are_branch_specific() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl selector: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], selector: Int; bb0() { selector = load 1; @@ -299,11 +295,10 @@ fn switchint_edge_targets_are_branch_specific() { let footprint = make_scalar_footprint(&body, &heap); let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -383,11 +378,10 @@ fn graphread_interpreter_only() { let footprint = make_scalar_footprint(&body, &heap); let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -407,8 +401,8 @@ fn postgres_incoming_removed() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl value: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], value: Int; bb0() { goto bb1(); @@ -423,11 +417,10 @@ fn postgres_incoming_removed() { let footprint = make_scalar_footprint(&body, &heap); let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -446,8 +439,8 @@ fn postgres_removed_in_loops() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl value: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], value: Int; bb0() { value = load 0; @@ -462,11 +455,10 @@ fn postgres_removed_in_loops() { let footprint = make_scalar_footprint(&body, &heap); let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -485,8 +477,8 @@ fn postgres_removed_in_self_loops() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl value: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], value: Int; bb0() { value = load 0; @@ -498,11 +490,10 @@ fn postgres_removed_in_self_loops() { let footprint = make_scalar_footprint(&body, &heap); let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -521,8 +512,8 @@ fn transfer_cost_counts_live_and_params() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl live: Int, param: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], live: Int, param: Int; bb0() { live = load 10; @@ -544,11 +535,10 @@ fn transfer_cost_counts_live_and_params() { let footprint = make_scalar_footprint(&body, &heap); let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -559,79 +549,14 @@ fn transfer_cost_counts_live_and_params() { ); } -#[test] -fn traversal_assignment_skips_source_transfer_cost() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - let body = body!(interner, env; fn@0/0 -> Int { - decl source: (Int, Int), dest: Int; - @proj source_0 = source.0: Int; - - bb0() { - goto bb1(); - }, - bb1() { - dest = load source_0; - return 0; - } - }); - - // _0 = source, _1 = dest - let source = Local::new(0); - let dest = Local::new(1); - - let mut traversals = Traversals::with_capacity_in(source, body.local_decls.len(), &heap); - traversals.insert( - dest, - Place::local(source).project( - &interner, - TypeBuilder::synthetic(&env).integer(), - ProjectionKind::Field(FieldIndex::new(0)), - ), - ); - - let targets = [ - target_set(&[TargetId::Interpreter, TargetId::Postgres]), - target_set(&[TargetId::Interpreter, TargetId::Postgres]), - ]; - - let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversal_costs = placement.terminator_placement( - &body, - &footprint, - &traversals, - build_targets(&body, &targets), - ); - let standard_costs = placement.terminator_placement( - &body, - &footprint, - &empty_traversals(&body, &heap), - build_targets(&body, &targets), - ); - - let traversal_matrix = traversal_costs.of(BasicBlockId::new(0))[0]; - let standard_matrix = standard_costs.of(BasicBlockId::new(0))[0]; - assert_eq!( - traversal_matrix.get(TargetId::Postgres, TargetId::Interpreter), - Some(cost!(0)) - ); - assert_eq!( - standard_matrix.get(TargetId::Postgres, TargetId::Interpreter), - Some(cost!(1)) - ); -} - #[test] fn transfer_cost_is_max_for_unbounded() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl arg: [List Int], param: [List Int]; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], arg: [List Int], param: [List Int]; bb0() { arg = list 1, 2; @@ -653,11 +578,10 @@ fn transfer_cost_is_max_for_unbounded() { let footprint = make_full_footprint(&body, &heap); let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -674,8 +598,8 @@ fn terminator_placement_snapshot() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl selector: Int, live: Int, param: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], selector: Int, live: Int, param: Int; bb0() { live = load 10; @@ -698,11 +622,10 @@ fn terminator_placement_snapshot() { let footprint = make_scalar_footprint(&body, &heap); let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap index 4996a0af2f9..023c596cbeb 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap @@ -2,20 +2,20 @@ source: libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs expression: output --- -fn {closure@4294967040}() -> Integer { - let %0: Integer - let %1: Integer +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { let %2: Integer + let %3: Integer + let %4: Integer bb0(): { - %1 = 10 - %0 = 1 + %3 = 10 + %2 = 1 - switchInt(%0) -> [0: bb1(%2), otherwise: bb2()] + switchInt(%2) -> [0: bb1(%4), otherwise: bb2()] } - bb1(%2): { - return %1 + bb1(%4): { + return %3 } bb2(): { From 1d5b5fdf47b493925366ec693cc3db0b42b82905 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 19:31:58 +0100 Subject: [PATCH 17/32] feat: terminator placement --- .../execution/terminator_placement/mod.rs | 62 ++++---- .../execution/terminator_placement/tests.rs | 127 ++++++++++++++-- .../src/pass/execution/traversal/entity.rs | 142 +++++++++++++++++- .../mir/src/pass/execution/traversal/mod.rs | 16 +- 4 files changed, 301 insertions(+), 46 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs index c02c6c2578a..e5a8fcee86a 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs @@ -53,7 +53,7 @@ use super::{ Cost, VertexType, block_partitioned_vec::BlockPartitionedVec, target::{TargetBitSet, TargetId}, - traversal::{EntityPathBitSet, TraversalPathBitSet}, + traversal::{EntityPathBitSet, TransferCostConfig, TraversalPathBitSet}, }; use crate::{ body::{ @@ -420,20 +420,21 @@ impl PopulateEdgeMatrix { /// ``` pub(crate) struct TerminatorPlacement { scratch: S, - entity_size: InformationRange, + transfer_config: TransferCostConfig, } impl TerminatorPlacement { /// Creates a new placement analyzer. /// - /// The `entity_size` estimate is used when computing transfer costs — it represents the - /// expected size of entity data that may need to cross backend boundaries. + /// The [`TransferCostConfig`] provides size estimates for the variable-cost entity fields + /// (properties, embeddings, provenance). Fixed-size fields (UUIDs, timestamps, scalars) + /// use constants derived from the entity schema. #[inline] #[must_use] - pub(crate) const fn new_in(entity_size: InformationRange, scratch: S) -> Self { + pub(crate) const fn new_in(transfer_config: TransferCostConfig, scratch: S) -> Self { Self { scratch, - entity_size, + transfer_config, } } @@ -524,9 +525,11 @@ impl TerminatorPlacement { /// Computes the cost of transferring live data across an edge to `successor`. /// - /// The cost is the sum of estimated sizes for all locals that are: - /// - Live at the successor's entry - /// - Passed as parameters to the successor block + /// The cost has two components: + /// - **Local cost**: estimated sizes of all non-vertex locals that are live at the successor's + /// entry or passed as block parameters. + /// - **Path cost**: estimated sizes of all live entity field paths, computed from per-path + /// transfer sizes rather than the monolithic entity size. fn compute_transfer_cost( &self, required_locals: &mut DenseBitSet, @@ -535,34 +538,28 @@ impl TerminatorPlacement { live_in: &BasicBlockSlice<(DenseBitSet, TraversalPathBitSet)>, successor: BasicBlockId, ) -> Cost { - todo!() - // required_locals.clone_from(&live_in[successor]); + let (locals, paths) = &live_in[successor]; + required_locals.clone_from(locals); - // for ¶m in body.basic_blocks[successor].params { - // required_locals.insert(param); - // } - - // self.sum_local_sizes(footprint, required_locals) + for ¶m in body.basic_blocks[successor].params { + required_locals.insert(param); + } - // let (locals, paths) = &live_in[successor]; - // required_locals.clone_from(locals); + let local_cost = self.sum_local_sizes(footprint, required_locals); - // for ¶m in body.basic_blocks[successor].params { - // required_locals.insert(param); - // } + if paths.is_empty() { + return local_cost; + } - // let local_cost = self.sum_local_sizes(footprint, required_locals); + let path_range = paths.transfer_size(&self.transfer_config); - // if paths.is_empty() { - // return local_cost; - // } + let Some(max) = path_range.inclusive_max() else { + return Cost::MAX; + }; - // let Some(max) = self.entity_size.inclusive_max() else { - // return Cost::MAX; - // }; + let path_cost = Cost::new_saturating(path_range.min().midpoint(max).as_u32()); - // let avg = self.entity_size.min().midpoint(max); - // local_cost.saturating_add(Cost::new_saturating(avg.as_u32())) + local_cost.saturating_add(path_cost) } /// Sums the estimated sizes of all locals in the set. @@ -578,7 +575,10 @@ impl TerminatorPlacement { for local in locals { let Some(size_estimate) = footprint.locals[local].average( - &[InformationRange::zero(), self.entity_size], + &[ + InformationRange::zero(), + self.transfer_config.properties_size, + ], &[Cardinality::one(), Cardinality::one()], ) else { return Cost::MAX; diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs index babdaf14a57..1c3fe519e84 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs @@ -35,6 +35,7 @@ use crate::{ execution::{ VertexType, target::{TargetBitSet, TargetId}, + traversal::TransferCostConfig, }, }, pretty::TextFormatOptions, @@ -206,7 +207,8 @@ fn goto_allows_cross_backend_non_postgres() { ]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, VertexType::Entity, @@ -250,7 +252,8 @@ fn switchint_blocks_cross_backend() { ]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, VertexType::Entity, @@ -294,7 +297,8 @@ fn switchint_edge_targets_are_branch_specific() { ]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, VertexType::Entity, @@ -377,7 +381,8 @@ fn graphread_interpreter_only() { let targets = [all_targets(), all_targets()]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, VertexType::Entity, @@ -416,7 +421,8 @@ fn postgres_incoming_removed() { let targets = [all_targets(), all_targets()]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, VertexType::Entity, @@ -454,7 +460,8 @@ fn postgres_removed_in_loops() { let targets = [all_targets(), all_targets()]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, VertexType::Entity, @@ -489,7 +496,8 @@ fn postgres_removed_in_self_loops() { let targets = [all_targets()]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, VertexType::Entity, @@ -534,7 +542,8 @@ fn transfer_cost_counts_live_and_params() { ]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, VertexType::Entity, @@ -577,7 +586,8 @@ fn transfer_cost_is_max_for_unbounded() { ]; let footprint = make_full_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, VertexType::Entity, @@ -592,6 +602,102 @@ fn transfer_cost_is_max_for_unbounded() { ); } +/// Edition provenance live across a goto edge produces path-based transfer cost. +/// +/// `edition_provenance_size` defaults to `3..=20`, midpoint 11. With no other live locals, +/// the Postgres→Interpreter transition cost is purely the path cost. +#[test] +fn path_cost_from_edition_provenance() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + edition = prov.edition: ?; + + bb0() { + goto bb1(); + }, + bb1() { + val = load edition; + return val; + } + }); + + let targets = [ + target_set(&[TargetId::Interpreter, TargetId::Postgres]), + target_set(&[TargetId::Interpreter, TargetId::Postgres]), + ]; + + let footprint = make_scalar_footprint(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); + let costs = placement.terminator_placement( + &body, + VertexType::Entity, + &footprint, + build_targets(&body, &targets), + ); + + // edition_provenance_size = 3..=20, midpoint(3, 20) = 11 + let matrix = costs.of(BasicBlockId::new(0))[0]; + assert_eq!( + matrix.get(TargetId::Postgres, TargetId::Interpreter), + Some(cost!(11)) + ); +} + +/// Inferred provenance produces a different (lower) cost than edition provenance. +/// +/// `ProvenanceInferred` has a static size `3..=5` (fixed structure, no config), midpoint 4. +/// This verifies the split: without per-variant sizing, both would produce the same cost. +#[test] +fn path_cost_from_inferred_provenance() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + inferred = prov.inferred: ?; + + bb0() { + goto bb1(); + }, + bb1() { + val = load inferred; + return val; + } + }); + + let targets = [ + target_set(&[TargetId::Interpreter, TargetId::Postgres]), + target_set(&[TargetId::Interpreter, TargetId::Postgres]), + ]; + + let footprint = make_scalar_footprint(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); + let costs = placement.terminator_placement( + &body, + VertexType::Entity, + &footprint, + build_targets(&body, &targets), + ); + + // ProvenanceInferred is static 3..=5, midpoint(3, 5) = 4 + let matrix = costs.of(BasicBlockId::new(0))[0]; + assert_eq!( + matrix.get(TargetId::Postgres, TargetId::Interpreter), + Some(cost!(4)) + ); +} + #[test] fn terminator_placement_snapshot() { let heap = Heap::new(); @@ -621,7 +727,8 @@ fn terminator_placement_snapshot() { ]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, VertexType::Entity, diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index baf134a5fa2..5b350ef5908 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -1,4 +1,4 @@ -use core::debug_assert_matches; +use core::{debug_assert_matches, num::NonZero, ops::Bound}; use hashql_core::{ id::{ @@ -14,7 +14,10 @@ use super::{ }; use crate::{ body::place::{Projection, ProjectionKind}, - pass::analysis::dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}, + pass::analysis::{ + dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}, + size_estimation::{InformationRange, InformationUnit}, + }, }; macro_rules! sym { @@ -103,6 +106,63 @@ pub enum EntityPath { RightEntityProvenance, } +/// Configuration for entity field transfer cost estimation. +/// +/// Separates the variable-size components (properties, embeddings, provenance) from the +/// fixed-size schema fields. The fixed costs (UUIDs, timestamps, scalars) are constants on +/// [`EntityPath::transfer_size`]; this config provides the values that vary per entity type +/// or deployment. +#[derive(Debug, Copy, Clone)] +pub(crate) struct TransferCostConfig { + /// Size of the entity's properties (the `T` parameter in `Entity`). + pub properties_size: InformationRange, + /// Size of a single embedding vector. + pub embedding_size: InformationRange, + /// Size of `EntityEditionProvenance` JSONB (`entity_editions.provenance`). + /// + /// Variable structure: `created_by_id` + optional `archived_by_id` + `actor_type` + + /// `OriginProvenance` (tag + optional strings) + `Vec` (typically 0-2 + /// items, each with optional entity ID, authors, location, and timestamps). + pub edition_provenance_size: InformationRange, + /// Size of `PropertyProvenance` JSONB on entity edges (`entity_edge.provenance`). + /// + /// Just `Vec`. Incoming edges are always empty; outgoing edges + /// carry the caller-provided provenance, typically 0-1 sources. + pub edge_provenance_size: InformationRange, + /// Divisor for estimating property metadata size from properties size. + /// + /// Property metadata stores per-key metadata (confidence, provenance) rather than values, + /// so it is lighter than properties. The estimate is `properties_size / divisor`. + /// + /// This is a placeholder until the confirmed entity type set is available, at which point + /// the metadata size can be computed directly from the property key count. + pub property_metadata_divisor: NonZero, +} + +impl TransferCostConfig { + /// Creates a config with the current HASH schema defaults. + /// + /// Uses the known embedding dimension (`vector(3072)`) and a metadata-to-properties ratio + /// of 1:4. Provenance sizes are derived from the actual JSONB structures stored by the + /// graph service. Only `properties_size` varies per entity type. + #[must_use] + pub(crate) const fn new(properties_size: InformationRange) -> Self { + Self { + properties_size, + embedding_size: InformationRange::value(InformationUnit::new(3072)), + edition_provenance_size: InformationRange::new( + InformationUnit::new(3), + Bound::Included(InformationUnit::new(20)), + ), + edge_provenance_size: InformationRange::new( + InformationUnit::new(0), + Bound::Included(InformationUnit::new(10)), + ), + property_metadata_divisor: NonZero::new(4).expect("infallible"), + } + } +} + type FiniteBitSetWidth = u32; const _: () = { assert!( @@ -222,6 +282,73 @@ impl EntityPath { } } + /// Returns the estimated transfer size for this path in information units. + /// + /// Fixed-size fields (UUIDs, timestamps, scalars) return known constants derived from the + /// entity schema. [`Properties`](Self::Properties) depends on the entity's type parameter. + /// [`PropertyMetadata`](Self::PropertyMetadata) is estimated at 1/4 of properties size, + /// since it stores lightweight per-property-key metadata rather than values. + pub(crate) fn transfer_size(self, config: &TransferCostConfig) -> InformationRange { + #[expect(clippy::match_same_arms, reason = "readability")] + #[expect(clippy::integer_division)] + match self { + Self::Properties => config.properties_size, + Self::PropertyMetadata => { + let divisor = config.property_metadata_divisor; + let min = InformationUnit::new(config.properties_size.min().as_u32() / divisor); + config.properties_size.inclusive_max().map_or_else( + || InformationRange::new(min, Bound::Unbounded), + |max| { + InformationRange::new( + min, + Bound::Included(InformationUnit::new(max.as_u32() / divisor)), + ) + }, + ) + } + + Self::Vectors => config.embedding_size, + + // Composites: sum of leaf children + Self::RecordId => InformationRange::value(InformationUnit::new(4)), + Self::EntityId => InformationRange::value(InformationUnit::new(3)), + Self::TemporalVersioning => InformationRange::value(InformationUnit::new(4)), + + // UUID fields + Self::WebId + | Self::EntityUuid + | Self::DraftId + | Self::EditionId + | Self::LeftEntityWebId + | Self::LeftEntityUuid + | Self::RightEntityWebId + | Self::RightEntityUuid => InformationRange::one(), + + // Temporal intervals (start + end timestamps) + Self::DecisionTime | Self::TransactionTime => { + InformationRange::value(InformationUnit::new(2)) + } + + // Type ID list (variable length, at least one type) + Self::EntityTypeIds => InformationRange::new(InformationUnit::new(1), Bound::Unbounded), + + // Scalar metadata + Self::Archived + | Self::Confidence + | Self::LeftEntityConfidence + | Self::RightEntityConfidence => InformationRange::one(), + + // Provenance: inferred is a fixed structure (3 required + 2 optional scalars) + Self::ProvenanceInferred => InformationRange::new( + InformationUnit::new(3), + Bound::Included(InformationUnit::new(5)), + ), + // Provenance: edition and edge have Vec, sized from config + Self::ProvenanceEdition => config.edition_provenance_size, + Self::LeftEntityProvenance | Self::RightEntityProvenance => config.edge_provenance_size, + } + } + const fn is_jsonb(self) -> bool { matches!( self, @@ -329,6 +456,17 @@ impl EntityPathBitSet { pub(crate) const fn insert_all(&mut self) { *self = Self::TOP; } + + /// Sums the [`transfer_size`](EntityPath::transfer_size) of every path in this set. + pub(crate) fn transfer_size(self, config: &TransferCostConfig) -> InformationRange { + let mut total = InformationRange::zero(); + + for path in &self.0 { + total += path.transfer_size(config); + } + + total + } } impl HasTop for TraversalLattice { diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index d592d21a78d..3953354bfaa 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -16,10 +16,13 @@ mod tests; pub(crate) use analysis::{TraversalAnalysis, Traversals}; -pub(crate) use self::access::Access; pub use self::entity::{EntityPath, EntityPathBitSet}; +pub(crate) use self::{access::Access, entity::TransferCostConfig}; use super::VertexType; -use crate::pass::analysis::dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}; +use crate::pass::analysis::{ + dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}, + size_estimation::InformationRange, +}; /// Lattice structure for traversal path bitsets. /// @@ -37,7 +40,7 @@ impl TraversalLattice { } #[must_use] - pub const fn vertex(&self) -> VertexType { + pub const fn vertex(self) -> VertexType { self.vertex } } @@ -116,6 +119,13 @@ impl TraversalPathBitSet { (Self::Entity(bitset), TraversalPath::Entity(path)) => bitset.insert(path), } } + + /// Sums the [`transfer_size`](EntityPath::transfer_size) of every path in this set. + pub(crate) fn transfer_size(self, config: &TransferCostConfig) -> InformationRange { + match self { + Self::Entity(entity_paths) => entity_paths.transfer_size(config), + } + } } impl HasBottom for TraversalLattice { From 575cf3bf98c02f8ccc86925fdc16647fd27b415f Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 19:43:24 +0100 Subject: [PATCH 18/32] chore: tests --- .../src/pass/execution/traversal/entity.rs | 2 +- .../mir/src/pass/execution/traversal/tests.rs | 198 ++++++++---------- 2 files changed, 90 insertions(+), 110 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index 5b350ef5908..af9b75ae0b5 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -256,7 +256,7 @@ impl EntityPath { /// For example, [`WebId`](Self::WebId) has ancestors /// [`EntityId`](Self::EntityId) and [`RecordId`](Self::RecordId). /// Top-level paths return an empty slice. - const fn ancestors(self) -> &'static [Self] { + pub(crate) const fn ancestors(self) -> &'static [Self] { match self { Self::WebId | Self::EntityUuid | Self::DraftId => &[Self::EntityId, Self::RecordId], Self::EntityId | Self::EditionId => &[Self::RecordId], diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs index 0444aa62217..694c280fc1d 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs @@ -1,21 +1,29 @@ -//! Unit tests for entity projection path lookup, composite swallowing, and traversal analysis. +//! Unit tests for entity projection path lookup, composite swallowing, transfer sizing, +//! and traversal analysis. + +use core::ops::Bound; use hashql_core::{symbol::sym, r#type::TypeId}; -use super::access::{Access, AccessMode}; use crate::{ body::{ local::Local, place::{Projection, ProjectionKind}, }, pass::{ - analysis::dataflow::lattice::{ - HasTop as _, JoinSemiLattice as _, - laws::{assert_bounded_join_semilattice, assert_is_top_consistent}, + analysis::{ + dataflow::lattice::{ + HasTop as _, JoinSemiLattice as _, + laws::{assert_bounded_join_semilattice, assert_is_top_consistent}, + }, + size_estimation::{InformationRange, InformationUnit}, }, execution::{ VertexType, - traversal::{EntityPath, EntityPathBitSet, TraversalLattice, TraversalPathBitSet}, + traversal::{ + EntityPath, EntityPathBitSet, TransferCostConfig, TraversalLattice, + TraversalPathBitSet, + }, }, }, }; @@ -28,76 +36,6 @@ fn proj(name: impl Into>) -> Projection<'st } } -/// `[.properties]` → `Access::Postgres(Direct)` (JSONB column). -#[test] -fn properties_is_postgres() { - let projections = &[proj(sym::properties)]; - let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); - - assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); -} - -/// `[.properties.foo.bar]` → Postgres (JSONB otherwise). -/// -/// JSONB nodes have `otherwise` set, so any sub-path is also Postgres-accessible. -#[test] -fn properties_subpath_is_postgres() { - let projections = &[proj(sym::properties), proj(sym::foo), proj(sym::bar)]; - let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); - - assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); -} - -/// `[.encodings.vectors]` → `Access::Embedding(Direct)`. -#[test] -fn vectors_is_embedding() { - let projections = &[proj(sym::encodings), proj(sym::vectors)]; - let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); - - assert_eq!(access, Some(Access::Embedding(AccessMode::Direct))); -} - -/// Various metadata paths map to Postgres columns. -#[test] -fn metadata_columns_are_postgres() { - // metadata.archived -> Direct - let projections = &[proj(sym::metadata), proj(sym::archived)]; - assert_eq!( - EntityPath::resolve(projections).map(|(path, _)| path.access()), - Some(Access::Postgres(AccessMode::Direct)) - ); - - // metadata.record_id -> Composite - let projections = &[proj(sym::metadata), proj(sym::record_id)]; - assert_eq!( - EntityPath::resolve(projections).map(|(path, _)| path.access()), - Some(Access::Postgres(AccessMode::Composite)) - ); - - // metadata.record_id.entity_id.web_id -> Direct - let projections = &[ - proj(sym::metadata), - proj(sym::record_id), - proj(sym::entity_id), - proj(sym::web_id), - ]; - assert_eq!( - EntityPath::resolve(projections).map(|(path, _)| path.access()), - Some(Access::Postgres(AccessMode::Direct)) - ); - - // metadata.temporal_versioning.decision_time -> Direct - let projections = &[ - proj(sym::metadata), - proj(sym::temporal_versioning), - proj(sym::decision_time), - ]; - assert_eq!( - EntityPath::resolve(projections).map(|(path, _)| path.access()), - Some(Access::Postgres(AccessMode::Direct)) - ); -} - /// `link_data.left_entity_id.draft_id` → `None` (synthesized, not stored). #[test] fn link_data_synthesized_is_none() { @@ -106,18 +44,14 @@ fn link_data_synthesized_is_none() { proj(sym::left_entity_id), proj(sym::draft_id), ]; - let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); - - assert_eq!(access, None); + assert_eq!(EntityPath::resolve(projections), None); } /// Invalid path like `[.unknown]` → `None`. #[test] fn unknown_path_returns_none() { let projections = &[proj(sym::unknown)]; - let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); - - assert_eq!(access, None); + assert_eq!(EntityPath::resolve(projections), None); } /// The returned index reflects how many projections were consumed during resolution. @@ -299,19 +233,6 @@ fn child_suppressed_by_ancestor() { assert!(!bitset.contains(EntityPath::EditionId)); } -/// A grandparent composite suppresses grandchildren. -#[test] -fn grandparent_suppresses_grandchild() { - let mut bitset = empty_bitset(); - bitset.insert(EntityPath::RecordId); - - // WebId is a grandchild of RecordId (through EntityId) - bitset.insert(EntityPath::WebId); - - assert!(bitset.contains(EntityPath::RecordId)); - assert!(!bitset.contains(EntityPath::WebId)); -} - /// Inserting a top-level composite swallows the entire subtree. #[test] fn record_id_swallows_entire_subtree() { @@ -417,20 +338,6 @@ fn insert_all_len() { assert_eq!(bitset.len(), 18); } -/// `insert_all` after individual inserts produces the same result as a fresh `insert_all`. -#[test] -fn insert_all_is_idempotent_over_existing() { - let mut bitset = empty_bitset(); - bitset.insert(EntityPath::WebId); - bitset.insert(EntityPath::Properties); - bitset.insert_all(); - - let mut fresh = empty_bitset(); - fresh.insert_all(); - - assert_eq!(bitset, fresh); -} - /// An empty bitset has len 0. #[test] fn empty_bitset_len() { @@ -552,3 +459,76 @@ fn traversal_path_bitset_top_absorbs_join() { assert_eq!(result, top); } } + +/// `join` normalizes ancestor+descendant pairs produced by raw union. +/// +/// When one side has a leaf and the other has its ancestor composite, the union +/// contains both. `normalize` must remove the descendant since the ancestor covers it. +#[test] +fn join_normalizes_ancestor_descendant_pairs() { + let lattice = TraversalLattice::new(VertexType::Entity); + + let mut lhs = bitset_of(&[EntityPath::WebId, EntityPath::Properties]); + let rhs = bitset_of(&[EntityPath::RecordId]); + + lattice.join(&mut lhs, &rhs); + + assert!(lhs.contains(EntityPath::RecordId)); + assert!(lhs.contains(EntityPath::Properties)); + assert!(!lhs.contains(EntityPath::WebId)); + assert_eq!(lhs.len(), 2); +} + +// --- Transfer size tests --- + +/// Each composite's `transfer_size` equals the sum of its immediate children's `transfer_sizes`. +/// +/// Immediate children are identified automatically via `ancestors()`: a path is an immediate +/// child of composite C if C is its nearest ancestor (`ancestors()[0] == C`). This catches +/// drift if a new child is added to the hierarchy without updating the composite constant. +#[test] +fn composite_transfer_size_matches_children() { + let config = TransferCostConfig::new(InformationRange::zero()); + + for composite in EntityPath::all() { + let mut expected = InformationRange::zero(); + let mut has_children = false; + + for path in EntityPath::all() { + if path.ancestors().first() == Some(&composite) { + expected += path.transfer_size(&config); + has_children = true; + } + } + + if has_children { + assert_eq!( + composite.transfer_size(&config), + expected, + "{composite:?} transfer_size doesn't match sum of immediate children" + ); + } + } +} + +/// `ProvenanceInferred` has a static `transfer_size` independent of config. +/// +/// The type is a fixed structure (3 required scalars + 2 optional timestamps), so its +/// size is a constant `3..=5` regardless of `TransferCostConfig` values. +#[test] +fn inferred_provenance_transfer_size_is_static() { + let small_config = TransferCostConfig::new(InformationRange::zero()); + let large_config = TransferCostConfig::new(InformationRange::value(InformationUnit::new(1000))); + + let small = EntityPath::ProvenanceInferred.transfer_size(&small_config); + let large = EntityPath::ProvenanceInferred.transfer_size(&large_config); + + assert_eq!(small, large); + assert_eq!( + small, + InformationRange::new( + InformationUnit::new(3), + Bound::Included(InformationUnit::new(5)) + ) + ); +} From 21e1b5c1a834528921955dbe2923cbf04abd5c21 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 19:51:35 +0100 Subject: [PATCH 19/32] feat: re-enable unused lints --- libs/@local/hashql/mir/src/lib.rs | 1 - .../hashql/mir/src/pass/execution/mod.rs | 64 ++++++++++--------- .../statement_placement/embedding/mod.rs | 11 ++-- .../statement_placement/embedding/tests.rs | 10 +-- .../statement_placement/interpret/mod.rs | 2 +- .../statement_placement/interpret/tests.rs | 2 +- .../statement_placement/postgres/mod.rs | 14 ++-- .../statement_placement/postgres/tests.rs | 48 +++++++------- .../execution/terminator_placement/mod.rs | 2 +- .../execution/terminator_placement/tests.rs | 57 ++++++++++++++++- 10 files changed, 131 insertions(+), 80 deletions(-) diff --git a/libs/@local/hashql/mir/src/lib.rs b/libs/@local/hashql/mir/src/lib.rs index 926e4807bab..f3b4006cd96 100644 --- a/libs/@local/hashql/mir/src/lib.rs +++ b/libs/@local/hashql/mir/src/lib.rs @@ -2,7 +2,6 @@ //! //! ## Workspace dependencies #![cfg_attr(doc, doc = simple_mermaid::mermaid!("../docs/dependency-diagram.mmd"))] -#![allow(unused)] #![feature( // Language Features associated_type_defaults, diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 084cd728af5..0230aea33b0 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -31,16 +31,21 @@ pub use self::{ vertex::VertexType, }; use self::{ + fusion::BasicBlockFusion, + island::IslandPlacement, + placement::{ArcConsistency, PlacementSolverContext}, splitting::BasicBlockSplitting, statement_placement::{StatementPlacement as _, TargetPlacementStatement}, target::TargetArray, - traversal::TraversalAnalysis, + terminator_placement::TerminatorPlacement, + traversal::{TransferCostConfig, TraversalAnalysis}, }; use super::{analysis::size_estimation::BodyFootprint, transform::Traversals}; use crate::{ body::{Body, Source, basic_block::BasicBlockVec, local::Local}, context::MirContext, def::DefIdSlice, + pass::analysis::size_estimation::InformationRange, }; pub struct ExecutionAnalysis<'ctx, 'heap, S: Allocator> { @@ -84,7 +89,7 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { let mut statement_costs = statement_costs.map(|cost| cost.unwrap_or_else(|| unreachable!())); - let _possibilities = BasicBlockSplitting::new_in(&self.scratch).split_in( + let mut possibilities = BasicBlockSplitting::new_in(&self.scratch).split_in( context, body, &mut statement_costs, @@ -94,38 +99,39 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { // The body has been split (sequentially) and like the statement costs needs to be remapped traversals.remap(&body.basic_blocks); - todo!() - - // let terminators = TerminatorPlacement::new_in(InformationRange::full(), &self.scratch); - // let mut terminator_costs = terminators.terminator_placement_in( - // body, - // &self.footprints[body.id], - // traversals, - // &possibilities, - // &self.scratch, - // ); + let terminators = TerminatorPlacement::new_in( + TransferCostConfig::new(InformationRange::full()), + &self.scratch, + ); + let mut terminator_costs = terminators.terminator_placement_in( + body, + vertex, + &self.footprints[body.id], + &possibilities, + &self.scratch, + ); - // ArcConsistency { - // blocks: &mut possibilities, - // terminators: &mut terminator_costs, - // } - // .run_in(body, &self.scratch); + ArcConsistency { + blocks: &mut possibilities, + terminators: &mut terminator_costs, + } + .run_in(body, &self.scratch); - // let mut solver = PlacementSolverContext { - // assignment: &possibilities, - // statements: &statement_costs, - // terminators: &terminator_costs, - // } - // .build_in(body, &self.scratch); + let mut solver = PlacementSolverContext { + assignment: &possibilities, + statements: &statement_costs, + terminators: &terminator_costs, + } + .build_in(body, &self.scratch); - // let mut assignment = solver.run(context, body); + // TODO: move to per island requirements here + let mut assignment = solver.run(context, body); - // let fusion = BasicBlockFusion::new_in(&self.scratch); - // fusion.fuse(body, &mut assignment); + let fusion = BasicBlockFusion::new_in(&self.scratch); + fusion.fuse(body, &mut assignment); - // let islands = IslandPlacement::new_in(&self.scratch).run(body, &assignment, - // context.heap); + let islands = IslandPlacement::new_in(&self.scratch).run(body, &assignment, context.heap); - // (assignment, islands) + (assignment, islands) } } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs index 6288b2f14e3..90a4da7c7bf 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs @@ -10,10 +10,8 @@ use crate::{ body::{Body, Source, local::Local, operand::Operand, place::Place, rvalue::RValue}, context::MirContext, pass::execution::{ - Cost, VertexType, - cost::StatementCostVec, - statement_placement::common::entity_projection_access, - traversal::{Access, Traversals}, + Cost, VertexType, cost::StatementCostVec, + statement_placement::common::entity_projection_access, traversal::Access, }, visit::Visitor as _, }; @@ -28,7 +26,6 @@ struct EmbeddingSupported { impl EmbeddingSupported { fn is_supported_place<'heap>( &self, - context: &MirContext<'_, 'heap>, body: &Body<'heap>, domain: &DenseBitSet, place: &Place<'heap>, @@ -70,13 +67,13 @@ impl<'heap> Supported<'heap> for EmbeddingSupported { fn is_supported_operand( &self, - context: &MirContext<'_, 'heap>, + _: &MirContext<'_, 'heap>, body: &Body<'heap>, domain: &DenseBitSet, operand: &Operand<'heap>, ) -> bool { match operand { - Operand::Place(place) => self.is_supported_place(context, body, domain, place), + Operand::Place(place) => self.is_supported_place(body, domain, place), Operand::Constant(_) => false, } } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs index f4df5de0f07..3e72f682221 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs @@ -37,7 +37,7 @@ fn only_vectors_projection_supported() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -78,7 +78,7 @@ fn all_args_excluded() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -117,7 +117,7 @@ fn non_vectors_entity_projection_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -158,7 +158,7 @@ fn storage_statements_zero_cost() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -211,7 +211,7 @@ fn other_operations_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs index 65690a09505..9a8627a1f4b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs @@ -92,7 +92,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> &mut self, _: &MirContext<'_, 'heap>, body: &Body<'heap>, - vertex: VertexType, + _: VertexType, alloc: A, ) -> StatementCostVec { let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs index c745ba8c2b0..c99a69c18aa 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs @@ -11,7 +11,7 @@ use crate::{ intern::Interner, pass::execution::{ statement_placement::{ - InterpreterStatementPlacement, StatementPlacement as _, + InterpreterStatementPlacement, tests::{assert_placement, run_placement}, }, traversal::TraversalAnalysis, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs index ef2c6051824..10be791ff9c 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs @@ -31,7 +31,7 @@ use crate::{ VertexType, cost::{Cost, StatementCostVec}, statement_placement::common::entity_projection_access, - traversal::{Access, Traversals}, + traversal::Access, }, visit::Visitor as _, }; @@ -343,13 +343,7 @@ impl<'heap, A: Allocator> PostgresSupported<'_, 'heap, A> { /// any other local (falls through to the regular domain check). /// /// [`GraphReadFilter`]: Source::GraphReadFilter - fn is_supported_place_graph_read_filter( - &self, - context: &MirContext<'_, 'heap>, - body: &Body<'heap>, - - place: &Place<'heap>, - ) -> Option { + fn is_supported_place_graph_read_filter(&self, place: &Place<'heap>) -> Option { match place.local { Local::ENV => { // The environment projections depend on the first projection, because that @@ -382,7 +376,7 @@ impl<'heap, A: Allocator> PostgresSupported<'_, 'heap, A> { fn is_supported_place( &self, - context: &MirContext<'_, 'heap>, + _: &MirContext<'_, 'heap>, body: &Body<'heap>, domain: &DenseBitSet, place: &Place<'heap>, @@ -391,7 +385,7 @@ impl<'heap, A: Allocator> PostgresSupported<'_, 'heap, A> { // env fields are checked against env_domain, vertex projections against entity // field access. Other locals fall through to the regular domain check. if matches!(body.source, Source::GraphReadFilter(_)) - && let Some(result) = self.is_supported_place_graph_read_filter(context, body, place) + && let Some(result) = self.is_supported_place_graph_read_filter(place) { return result; } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs index b7d867414d4..7770c6d45b5 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs @@ -58,7 +58,7 @@ fn binary_unary_ops_supported() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -98,7 +98,7 @@ fn aggregate_tuple_supported() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -141,7 +141,7 @@ fn aggregate_closure_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -187,7 +187,7 @@ fn apply_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -226,7 +226,7 @@ fn input_supported() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -267,7 +267,7 @@ fn env_with_closure_type_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -307,7 +307,7 @@ fn env_without_closure_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -346,7 +346,7 @@ fn entity_projection_column() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -386,7 +386,7 @@ fn entity_projection_jsonb() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -430,7 +430,7 @@ fn storage_statements_zero_cost() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -492,7 +492,7 @@ fn diamond_must_analysis() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -609,7 +609,7 @@ fn env_closure_field_rejected_other_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -649,7 +649,7 @@ fn env_dict_non_string_key_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -688,7 +688,7 @@ fn env_dict_string_key_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -731,7 +731,7 @@ fn env_dict_opaque_string_key_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -800,7 +800,7 @@ fn fnptr_constant_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -845,7 +845,7 @@ fn eq_dict_vs_struct_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -890,7 +890,7 @@ fn eq_list_vs_tuple_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -935,7 +935,7 @@ fn eq_unknown_type_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -980,7 +980,7 @@ fn eq_same_type_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -1025,7 +1025,7 @@ fn ne_dict_vs_struct_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -1599,7 +1599,7 @@ fn eq_place_vs_constant_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -2134,7 +2134,7 @@ fn serialization_unsafe_statement_no_cost() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -2188,7 +2188,7 @@ fn serialization_unsafe_edge_propagates() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs index e5a8fcee86a..dd8d8e6d1ff 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs @@ -53,7 +53,7 @@ use super::{ Cost, VertexType, block_partitioned_vec::BlockPartitionedVec, target::{TargetBitSet, TargetId}, - traversal::{EntityPathBitSet, TransferCostConfig, TraversalPathBitSet}, + traversal::{TransferCostConfig, TraversalPathBitSet}, }; use crate::{ body::{ diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs index 1c3fe519e84..fc46f9163f4 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs @@ -31,7 +31,7 @@ use crate::{ context::MirContext, intern::Interner, pass::{ - analysis::size_estimation::{BodyFootprint, Footprint, InformationRange}, + analysis::size_estimation::{BodyFootprint, Footprint, InformationRange, InformationUnit}, execution::{ VertexType, target::{TargetBitSet, TargetId}, @@ -698,6 +698,61 @@ fn path_cost_from_inferred_provenance() { ); } +/// Transfer cost sums both live locals and live entity paths. +/// +/// A scalar local (`live`) costs 1. Two entity paths (`ProvenanceEdition` at 3..=20 +/// and `Properties` at 10..=10) sum to 13..=30, midpoint 21. Total = 1 + 21 = 22. +#[test] +fn transfer_cost_combines_locals_and_paths() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], live: Int, val1: ?, val2: ?; + @proj metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + edition = prov.edition: ?, + props = vertex.properties: ?; + + bb0() { + live = load 42; + goto bb1(); + }, + bb1() { + val1 = load edition; + val2 = load props; + return live; + } + }); + + let targets = [ + target_set(&[TargetId::Interpreter, TargetId::Postgres]), + target_set(&[TargetId::Interpreter, TargetId::Postgres]), + ]; + + let footprint = make_scalar_footprint(&body, &heap); + let placement = TerminatorPlacement::new_in( + TransferCostConfig::new(InformationRange::value(InformationUnit::new(10))), + Global, + ); + let costs = placement.terminator_placement( + &body, + VertexType::Entity, + &footprint, + build_targets(&body, &targets), + ); + + // local_cost: `live` scalar = 1 + // path_cost: Properties(10..=10) + ProvenanceEdition(3..=20) = 13..=30, midpoint(13, 30) = 21 + // total = 1 + 21 = 22 + let matrix = costs.of(BasicBlockId::new(0))[0]; + assert_eq!( + matrix.get(TargetId::Postgres, TargetId::Interpreter), + Some(cost!(22)) + ); +} + #[test] fn terminator_placement_snapshot() { let heap = Heap::new(); From b15d42089a1e35b9daf118ac54e5a26b738b3b32 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 22:06:53 +0100 Subject: [PATCH 20/32] feat: checkpoint --- .../pass/analysis/dataflow/liveness/tests.rs | 2 +- .../pass/execution/block_partitioned_vec.rs | 34 +-- .../hashql/mir/src/pass/execution/cost.rs | 4 +- .../mir/src/pass/execution/fusion/mod.rs | 80 ++++-- .../mir/src/pass/execution/fusion/tests.rs | 249 +++++++++++------- .../mir/src/pass/execution/island/mod.rs | 28 +- .../mir/src/pass/execution/island/tests.rs | 208 ++++++++++----- .../hashql/mir/src/pass/execution/mod.rs | 18 +- .../mir/src/pass/execution/splitting/mod.rs | 8 +- .../statement_placement/interpret/tests.rs | 19 +- .../execution/terminator_placement/mod.rs | 2 +- .../pass/execution/traversal/analysis/mod.rs | 134 +++++++--- .../fusion/fuse_joins_traversal_paths.snap | 19 ++ 13 files changed, 544 insertions(+), 261 deletions(-) create mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_joins_traversal_paths.snap diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs index e376ea6af98..678a0d37af4 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs @@ -396,7 +396,7 @@ fn vertex_excluded_from_local_bitset() { assert!(!bb0_locals.contains(Local::new(2))); } -/// Vertex field accesses are recorded as EntityPaths in the path bitset. +/// Vertex field accesses are recorded as `EntityPaths` in the path bitset. #[test] fn vertex_access_records_entity_path() { let heap = Heap::new(); diff --git a/libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs b/libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs index 98222cb0d81..65d47cb89f4 100644 --- a/libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs +++ b/libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs @@ -24,6 +24,17 @@ pub(crate) struct BlockPartitionedVec { } impl BlockPartitionedVec { + pub(crate) fn new_in(counts: impl ExactSizeIterator, value: T, alloc: A) -> Self + where + T: Clone, + A: Clone, + { + let (offsets, length) = Self::build_offsets(counts, alloc.clone()); + let data = alloc::vec::from_elem_in(value, length, alloc); + + Self { offsets, data } + } + #[expect(unsafe_code)] fn build_offsets( mut iter: impl ExactSizeIterator, @@ -104,17 +115,6 @@ impl BlockPartitionedVec { } } -impl BlockPartitionedVec { - /// Creates a new `BlockPartitionedVec` from per-block counts, with all values initialized - /// to `value`. - pub(crate) fn new(counts: impl ExactSizeIterator, value: T, alloc: A) -> Self { - let (offsets, length) = Self::build_offsets(counts, alloc.clone()); - let data = alloc::vec::from_elem_in(value, length, alloc); - - Self { offsets, data } - } -} - #[cfg(test)] mod tests { #![expect(clippy::cast_possible_truncation)] @@ -126,7 +126,7 @@ mod tests { /// Single block with 5 elements: all accessible via `of()`/`of_mut()`. #[test] fn single_block() { - let mut vec = BlockPartitionedVec::new([5].into_iter(), 0_u32, Global); + let mut vec = BlockPartitionedVec::new_in([5].into_iter(), 0_u32, Global); assert_eq!(vec.len(), 5); assert_eq!(vec.block_count(), 1); @@ -143,7 +143,7 @@ mod tests { /// Multiple blocks with varying sizes: elements are correctly partitioned. #[test] fn multiple_blocks() { - let mut vec = BlockPartitionedVec::new([2, 3, 1].into_iter(), 0_u32, Global); + let mut vec = BlockPartitionedVec::new_in([2, 3, 1].into_iter(), 0_u32, Global); assert_eq!(vec.len(), 6); assert_eq!(vec.block_count(), 3); @@ -163,7 +163,7 @@ mod tests { /// Blocks with zero elements produce empty slices. #[test] fn empty_blocks() { - let vec = BlockPartitionedVec::new([0, 3, 0].into_iter(), 0_u32, Global); + let vec = BlockPartitionedVec::new_in([0, 3, 0].into_iter(), 0_u32, Global); assert_eq!(vec.len(), 3); assert_eq!(vec.block_count(), 3); @@ -175,7 +175,7 @@ mod tests { /// Zero blocks is valid. #[test] fn no_blocks() { - let vec = BlockPartitionedVec::new(core::iter::empty::(), 0_u32, Global); + let vec = BlockPartitionedVec::new_in(core::iter::empty::(), 0_u32, Global); assert_eq!(vec.len(), 0); assert_eq!(vec.block_count(), 0); @@ -184,7 +184,7 @@ mod tests { /// `iter()` yields all elements in flat order. #[test] fn iter_all_elements() { - let mut vec = BlockPartitionedVec::new([2, 1].into_iter(), 0_u32, Global); + let mut vec = BlockPartitionedVec::new_in([2, 1].into_iter(), 0_u32, Global); vec.of_mut(BasicBlockId::new(0))[0] = 1; vec.of_mut(BasicBlockId::new(0))[1] = 2; @@ -197,7 +197,7 @@ mod tests { /// `remap()` rebuilds the offset table without changing data. #[test] fn remap_preserves_data() { - let mut vec = BlockPartitionedVec::new([3, 3].into_iter(), 0_u32, Global); + let mut vec = BlockPartitionedVec::new_in([3, 3].into_iter(), 0_u32, Global); // Write sequential values for (index, value) in vec.of_mut(BasicBlockId::new(0)).iter_mut().enumerate() { diff --git a/libs/@local/hashql/mir/src/pass/execution/cost.rs b/libs/@local/hashql/mir/src/pass/execution/cost.rs index ec3e5ab350d..d6b14c77bfd 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost.rs @@ -319,7 +319,7 @@ pub(crate) struct StatementCostVec(BlockPartitionedVec StatementCostVec { #[cfg(test)] pub(crate) fn from_iter(iter: impl ExactSizeIterator, alloc: A) -> Self { - Self(BlockPartitionedVec::new(iter, None, alloc)) + Self(BlockPartitionedVec::new_in(iter, None, alloc)) } /// Creates a cost map with space for all statements in the given blocks. @@ -327,7 +327,7 @@ impl StatementCostVec { /// All costs are initialized to `None` (unsupported). Use indexing to assign costs. #[expect(clippy::cast_possible_truncation)] pub(crate) fn new_in(blocks: &BasicBlocks, alloc: A) -> Self { - Self(BlockPartitionedVec::new( + Self(BlockPartitionedVec::new_in( blocks.iter().map(|block| block.statements.len() as u32), None, alloc, diff --git a/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs b/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs index d10c00d0969..32e14bdd6c4 100644 --- a/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs @@ -21,9 +21,9 @@ use alloc::alloc::Global; use core::{alloc::Allocator, convert::Infallible, mem}; -use hashql_core::{graph::Predecessors as _, heap::Heap, id::Id as _}; +use hashql_core::{graph::Predecessors as _, id::Id as _}; -use super::target::TargetId; +use super::{target::TargetId, traversal::Traversals}; use crate::{ body::{ Body, @@ -31,6 +31,10 @@ use crate::{ location::Location, terminator::TerminatorKind, }, + pass::{ + analysis::dataflow::lattice::{HasBottom as _, JoinSemiLattice as _}, + execution::traversal::{TraversalLattice, TraversalPathBitSet}, + }, visit::{VisitorMut, r#mut::filter}, }; @@ -131,6 +135,8 @@ fn fuse_blocks( scratch: S, body: &mut Body<'_>, targets: &mut BasicBlockVec, + per_block_paths: &mut BasicBlockVec, + lattice: TraversalLattice, ) { let reverse_postorder = body .basic_blocks @@ -175,6 +181,11 @@ fn fuse_blocks( // The tail block is now dead tail_block.terminator.kind = TerminatorKind::Unreachable; + + // We effectively do the same we've done for the block and simply join the head with the + // joined tail paths. We dot need to do that with the targets, as the targets are the same. + let tail_paths = per_block_paths[block_id]; + lattice.join(&mut per_block_paths[block_head], &tail_paths); } // Phase 3: compaction. @@ -212,10 +223,12 @@ fn fuse_blocks( body.basic_blocks.as_mut().swap(old_id, new_id); targets.swap(old_id, new_id); + per_block_paths.swap(old_id, new_id); } body.basic_blocks.as_mut().truncate(new_len); targets.truncate(new_len); + per_block_paths.truncate(new_len); } /// Fuses adjacent MIR [`BasicBlock`]s that share the same execution target. @@ -226,47 +239,72 @@ fn fuse_blocks( /// /// [`BasicBlock`]: crate::body::basic_block::BasicBlock /// [`BasicBlockSplitting`]: super::splitting::BasicBlockSplitting -pub(crate) struct BasicBlockFusion { - alloc: A, +pub(crate) struct BasicBlockFusion { + traversals: Traversals, + scratch: S, } impl BasicBlockFusion { /// Creates a new pass using the global allocator. #[must_use] - pub(crate) const fn new() -> Self { - Self::new_in(Global) + #[cfg(test)] + pub(crate) const fn new(traversals: Traversals) -> Self { + Self::new_in(traversals, Global) } } -impl BasicBlockFusion { +impl BasicBlockFusion { /// Creates a new pass using the provided allocator. - pub(crate) const fn new_in(alloc: A) -> Self { - Self { alloc } + pub(crate) const fn new_in(traversals: Traversals, scratch: S) -> Self { + Self { + traversals, + scratch, + } + } + + #[cfg(test)] + pub(crate) fn fuse( + &self, + body: &mut Body<'_>, + targets: &mut BasicBlockVec, + ) -> BasicBlockVec { + self.fuse_in(body, targets, Global) } /// Fuses blocks in `body` that share the same target assignment. /// /// Modifies both `body` and `targets` in place. The `targets` vec is compacted to match /// the new block layout. - pub(crate) fn fuse<'heap>( + pub(crate) fn fuse_in( &self, - body: &mut Body<'heap>, - targets: &mut BasicBlockVec, - ) where - A: Clone, - { + body: &mut Body<'_>, + targets: &mut BasicBlockVec, + alloc: A, + ) -> BasicBlockVec { debug_assert_eq!( body.basic_blocks.len(), targets.len(), "target vec length must match basic block count" ); - fuse_blocks(self.alloc.clone(), body, targets); - } -} + let vertex = self.traversals.vertex(); + let lattice = TraversalLattice::new(vertex); + + let mut per_block_paths = BasicBlockVec::from_domain_derive_in( + |id, _| { + self.traversals + .of(id) + .iter() + .fold(lattice.bottom(), |lhs: TraversalPathBitSet, rhs| { + lattice.join_owned(lhs, rhs) + }) + }, + &body.basic_blocks, + alloc, + ); + + fuse_blocks(&self.scratch, body, targets, &mut per_block_paths, lattice); -impl Default for BasicBlockFusion { - fn default() -> Self { - Self::new() + per_block_paths } } diff --git a/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs b/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs index b3b5919de9f..e6be262de90 100644 --- a/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs @@ -1,12 +1,14 @@ //! Tests for basic block fusion. #![expect(clippy::min_ident_chars)] +use alloc::alloc::Global; use core::assert_matches; use std::{io::Write as _, path::PathBuf}; use hashql_core::{ heap::Heap, pretty::Formatter, + symbol::sym, r#type::{TypeFormatter, TypeFormatterOptions, environment::Environment}, }; use hashql_diagnostics::DiagnosticIssues; @@ -22,27 +24,32 @@ use crate::{ builder::body, context::MirContext, intern::Interner, - pass::execution::target::TargetId, + pass::execution::{ + VertexType, + target::TargetId, + traversal::{EntityPath, TraversalAnalysis, Traversals}, + }, pretty::TextFormatOptions, }; -fn make_targets<'heap>( - heap: &'heap Heap, - assignments: &[TargetId], -) -> BasicBlockVec { - let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), heap); +fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { + let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), Global); for &target in assignments { targets.push(target); } targets } +fn empty_traversals(body: &Body<'_>) -> Traversals { + Traversals::new_in(&body.basic_blocks, VertexType::Entity, Global) +} + #[track_caller] fn assert_fusion<'heap>( name: &'static str, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - targets: &BasicBlockVec, + targets: &BasicBlockVec, ) { let formatter = Formatter::new(context.heap); let type_formatter = TypeFormatter::new(&formatter, context.env, TypeFormatterOptions::terse()); @@ -94,7 +101,7 @@ fn fusable_into_same_target_goto() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Interpreter]); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); let result = fusable_into(&body, &targets, BasicBlockId::new(1)); assert_eq!(result, Some(BasicBlockId::new(0))); @@ -119,7 +126,7 @@ fn fusable_into_different_targets() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Postgres]); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Postgres]); let result = fusable_into(&body, &targets, BasicBlockId::new(1)); assert_eq!(result, None); @@ -151,15 +158,12 @@ fn fusable_into_multiple_predecessors() { } }); - let targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - ], - ); + let targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + ]); // bb3 has two predecessors — not fusable let result = fusable_into(&body, &targets, BasicBlockId::new(3)); @@ -184,7 +188,7 @@ fn fusable_into_goto_with_args() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Interpreter]); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); // The Goto carries an argument — not fusable even though targets match. let result = fusable_into(&body, &targets, BasicBlockId::new(1)); @@ -212,7 +216,7 @@ fn fusable_into_target_has_params() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Interpreter]); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); let result = fusable_into(&body, &targets, BasicBlockId::new(1)); assert_eq!(result, None); @@ -241,12 +245,14 @@ fn fuse_no_changes_needed() { diagnostics: DiagnosticIssues::new(), }; - let mut targets = make_targets(&heap, &[TargetId::Interpreter]); + let mut targets = make_targets(&[TargetId::Interpreter]); - BasicBlockFusion::new().fuse(&mut body, &mut targets); + let traversals = empty_traversals(&body); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 1); assert_eq!(targets.len(), 1); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_fusion("fuse_no_changes_needed", &context, &body, &targets); } @@ -276,12 +282,14 @@ fn fuse_two_same_target_blocks() { diagnostics: DiagnosticIssues::new(), }; - let mut targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Interpreter]); + let mut targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); - BasicBlockFusion::new().fuse(&mut body, &mut targets); + let traversals = empty_traversals(&body); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 1); assert_eq!(targets.len(), 1); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_eq!(targets[BasicBlockId::START], TargetId::Interpreter); assert_matches!( body.basic_blocks[BasicBlockId::START].terminator.kind, @@ -320,15 +328,14 @@ fn fuse_chain_of_three() { diagnostics: DiagnosticIssues::new(), }; - let mut targets = make_targets( - &heap, - &[TargetId::Postgres, TargetId::Postgres, TargetId::Postgres], - ); + let mut targets = make_targets(&[TargetId::Postgres, TargetId::Postgres, TargetId::Postgres]); - BasicBlockFusion::new().fuse(&mut body, &mut targets); + let traversals = empty_traversals(&body); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 1); assert_eq!(targets.len(), 1); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_eq!(targets[BasicBlockId::START], TargetId::Postgres); assert_eq!(body.basic_blocks[BasicBlockId::START].statements.len(), 3); assert_fusion("fuse_chain_of_three", &context, &body, &targets); @@ -365,19 +372,18 @@ fn fuse_preserves_different_targets() { }; // bb0 and bb1 are Interpreter, bb2 is Postgres — bb2 cannot fuse into bb1 - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Postgres, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Postgres, + ]); - BasicBlockFusion::new().fuse(&mut body, &mut targets); + let traversals = empty_traversals(&body); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 2); assert_eq!(targets.len(), 2); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_eq!(targets[BasicBlockId::new(0)], TargetId::Interpreter); assert_eq!(targets[BasicBlockId::new(1)], TargetId::Postgres); assert_fusion( @@ -423,20 +429,19 @@ fn fuse_partial_chain() { }; // bb0-bb1 are Interpreter, bb2-bb3 are Postgres - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Postgres, - TargetId::Postgres, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Postgres, + ]); - BasicBlockFusion::new().fuse(&mut body, &mut targets); + let traversals = empty_traversals(&body); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 2); assert_eq!(targets.len(), 2); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_eq!(targets[BasicBlockId::new(0)], TargetId::Interpreter); assert_eq!(targets[BasicBlockId::new(1)], TargetId::Postgres); assert_eq!(body.basic_blocks[BasicBlockId::new(0)].statements.len(), 2); @@ -479,20 +484,19 @@ fn fuse_updates_branch_references() { }; // bb0 and bb1 same target — fusable. bb2 and bb3 are leaves. - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + ]); - BasicBlockFusion::new().fuse(&mut body, &mut targets); + let traversals = empty_traversals(&body); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 3); assert_eq!(targets.len(), 3); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); // The fused bb0 should have a SwitchInt terminator pointing to remapped bb2→bb1 and bb3→bb2 let fused = &body.basic_blocks[BasicBlockId::START]; @@ -535,20 +539,19 @@ fn fuse_does_not_fuse_join_points() { }; // All same target, but bb3 has 2 predecessors — not fusable - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + ]); - BasicBlockFusion::new().fuse(&mut body, &mut targets); + let traversals = empty_traversals(&body); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 4); assert_eq!(targets.len(), 4); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_fusion("fuse_does_not_fuse_join_points", &context, &body, &targets); } @@ -577,13 +580,15 @@ fn fuse_goto_with_args_not_fused() { diagnostics: DiagnosticIssues::new(), }; - let mut targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Interpreter]); + let mut targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); - BasicBlockFusion::new().fuse(&mut body, &mut targets); + let traversals = empty_traversals(&body); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); // Both blocks survive because the Goto carries arguments. assert_eq!(body.basic_blocks.len(), 2); assert_eq!(targets.len(), 2); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_fusion("fuse_goto_with_args_not_fused", &context, &body, &targets); } @@ -637,28 +642,38 @@ fn fuse_diamond_non_monotonic_rpo() { // bb2 and bb3 same target (bb3 fuses into bb2), bb1 and bb4 same target (bb4 fuses // into bb1). bb5 has two predecessors — not fusable. - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Postgres, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Postgres, - TargetId::Interpreter, - ], - ); - - BasicBlockFusion::new().fuse(&mut body, &mut targets); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Interpreter, + ]); + + let traversals = empty_traversals(&body); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); // Surviving: bb0(→0), bb1(→1), bb2(→2), bb5(→3) assert_eq!(body.basic_blocks.len(), 4); assert_eq!(targets.len(), 4); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_eq!(targets[BasicBlockId::new(0)], TargetId::Interpreter); assert_eq!(targets[BasicBlockId::new(1)], TargetId::Postgres); assert_eq!(targets[BasicBlockId::new(2)], TargetId::Interpreter); assert_eq!(targets[BasicBlockId::new(3)], TargetId::Interpreter); + // Verify per_block_paths survived swap-based compaction at the correct indices. + // With empty traversals all entries are empty, but this exercises the swap codepath. + for index in 0..4 { + assert!( + per_block_paths[BasicBlockId::new(index)] + .as_entity() + .expect("entity vertex") + .is_empty(), + ); + } + // bb1 absorbed bb4's statements. assert_eq!(body.basic_blocks[BasicBlockId::new(1)].statements.len(), 2); // bb2 absorbed bb3's statements. @@ -704,21 +719,20 @@ fn fuse_backward_chain() { diagnostics: DiagnosticIssues::new(), }; - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + ]); - BasicBlockFusion::new().fuse(&mut body, &mut targets); + let traversals = empty_traversals(&body); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); // Surviving: bb0(→0), bb2(→1), bb3(→2). bb1 fused into bb2. assert_eq!(body.basic_blocks.len(), 3); assert_eq!(targets.len(), 3); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); // bb2 absorbed bb1's return terminator. assert_matches!( @@ -734,3 +748,52 @@ fn fuse_backward_chain() { assert_fusion("fuse_backward_chain", &context, &body, &targets); } + +#[test] +fn fuse_joins_traversal_paths() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let mut body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val1: ?, val2: ?; + @proj props = vertex.properties: ?, + metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + edition = prov.edition: ?; + + bb0() { + val1 = load props; + goto bb1(); + }, + bb1() { + val2 = load edition; + return val2; + } + }); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let mut targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); + + let traversals = + TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, Global); + let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + + assert_eq!(body.basic_blocks.len(), 1); + assert_eq!(targets.len(), 1); + assert_eq!(per_block_paths.len(), body.basic_blocks.len()); + + let fused = per_block_paths[BasicBlockId::START] + .as_entity() + .expect("entity vertex"); + assert!(fused.contains(EntityPath::Properties)); + assert!(fused.contains(EntityPath::ProvenanceEdition)); + + assert_fusion("fuse_joins_traversal_paths", &context, &body, &targets); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs index 52d95518f2f..49f192e096e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs @@ -15,10 +15,17 @@ use hashql_core::{ id::{self, bit_vec::DenseBitSet}, }; -use super::target::TargetId; -use crate::body::{ - Body, - basic_block::{BasicBlockId, BasicBlockSlice, BasicBlockUnionFind, BasicBlockVec}, +use super::{ + VertexType, + target::TargetId, + traversal::{TraversalLattice, TraversalPathBitSet}, +}; +use crate::{ + body::{ + Body, + basic_block::{BasicBlockId, BasicBlockSlice, BasicBlockUnionFind, BasicBlockVec}, + }, + pass::analysis::dataflow::lattice::{HasBottom as _, JoinSemiLattice as _}, }; #[cfg(test)] @@ -43,6 +50,7 @@ id::newtype_collections!(pub type Island* from IslandId); pub struct Island { target: TargetId, members: DenseBitSet, + traversals: TraversalPathBitSet, } impl Island { @@ -74,6 +82,11 @@ impl Island { self.members.is_empty() } + #[must_use] + pub const fn traversals(&self) -> TraversalPathBitSet { + self.traversals + } + /// Iterates over the [`BasicBlockId`]s in this island in ascending order. #[inline] pub fn iter(&self) -> impl Iterator + '_ { @@ -117,12 +130,17 @@ impl IslandPlacement { pub(crate) fn run( &self, body: &Body<'_>, + vertex: VertexType, + targets: &BasicBlockSlice, + traversals: &BasicBlockSlice, + alloc: A, ) -> IslandVec where A: Allocator, { + let lattice = TraversalLattice::new(vertex); let mut union = BasicBlockUnionFind::new_in(body.basic_blocks.len(), self.scratch.clone()); for bb in body.basic_blocks.ids() { @@ -144,10 +162,12 @@ impl IslandPlacement { islands.push(Island { target: targets[root], members: DenseBitSet::new_empty(body.basic_blocks.len()), + traversals: lattice.bottom(), }) }); islands[index].members.insert(bb); + lattice.join(&mut islands[index].traversals, &traversals[bb]); } islands diff --git a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs index d5846243f0c..ffae265c17b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs @@ -3,29 +3,38 @@ use alloc::alloc::Global; -use hashql_core::{heap::Heap, r#type::environment::Environment}; +use hashql_core::{heap::Heap, symbol::sym, r#type::environment::Environment}; +use hashql_diagnostics::DiagnosticIssues; use crate::{ body::basic_block::{BasicBlockId, BasicBlockVec}, builder::body, + context::MirContext, intern::Interner, pass::execution::{ + VertexType, island::{IslandId, IslandPlacement}, target::TargetId, + traversal::{EntityPath, TraversalAnalysis, TraversalPathBitSet}, }, }; -fn make_targets<'heap>( - heap: &'heap Heap, - assignments: &[TargetId], -) -> BasicBlockVec { - let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), heap); +fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { + let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), Global); for &target in assignments { targets.push(target); } targets } +fn empty_per_block_paths(block_count: usize) -> BasicBlockVec { + BasicBlockVec::from_elem_in( + TraversalPathBitSet::empty(VertexType::Entity), + block_count, + Global, + ) +} + /// Single block — produces exactly one island containing that block. #[test] fn single_block() { @@ -42,8 +51,9 @@ fn single_block() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter]); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[TargetId::Interpreter]); + let paths = empty_per_block_paths(body.basic_blocks.len()); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Interpreter); @@ -71,8 +81,9 @@ fn same_target_chain() { } }); - let targets = make_targets(&heap, &[TargetId::Postgres, TargetId::Postgres]); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[TargetId::Postgres, TargetId::Postgres]); + let paths = empty_per_block_paths(body.basic_blocks.len()); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Postgres); @@ -101,8 +112,9 @@ fn different_targets() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Postgres]); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Postgres]); + let paths = empty_per_block_paths(body.basic_blocks.len()); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); assert_eq!(islands.len(), 2); @@ -148,16 +160,14 @@ fn diamond_same_target() { } }); - let targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - ], - ); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + ]); + let paths = empty_per_block_paths(body.basic_blocks.len()); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Interpreter); @@ -196,16 +206,14 @@ fn diamond_mixed_targets() { // bb1 nor bb2 has the same target as bb3, so bb0 and bb3 are only connected // transitively through different-target blocks. No direct same-target edge between // bb0 and bb3, so they must be separate islands. - let targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Postgres, - TargetId::Embedding, - TargetId::Interpreter, - ], - ); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Embedding, + TargetId::Interpreter, + ]); + let paths = empty_per_block_paths(body.basic_blocks.len()); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); // bb0 alone, bb1 alone, bb2 alone, bb3 alone — 4 islands, since no same-target // edges exist between any pair of connected blocks. @@ -221,6 +229,9 @@ fn diamond_mixed_targets() { } /// Linear chain with alternating targets — each block is its own island. +/// +/// Also verifies that same-target blocks separated by a different-target block (bb0 and bb2 +/// are both Interpreter but bb1 is Postgres between them) end up in separate islands. #[test] fn alternating_targets() { let heap = Heap::new(); @@ -248,79 +259,138 @@ fn alternating_targets() { } }); - let targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Postgres, - TargetId::Interpreter, - TargetId::Postgres, - ], - ); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Postgres, + ]); + let paths = empty_per_block_paths(body.basic_blocks.len()); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); assert_eq!(islands.len(), 4); for island_id in islands.ids() { let island = &islands[island_id]; + assert_eq!(island.count(), 1); + let block = island.iter().next().expect("island is non-empty"); assert_eq!(island.target(), targets[block]); + assert!(island.contains(block)); } + + // bb0 and bb2 share a target (Interpreter) but must be in different islands + // because no direct same-target edge connects them. + let bb0_island = islands + .ids() + .find(|&id| islands[id].contains(BasicBlockId::new(0))) + .unwrap(); + let bb2_island = islands + .ids() + .find(|&id| islands[id].contains(BasicBlockId::new(2))) + .unwrap(); + assert_ne!(bb0_island, bb2_island); } -/// `Island::is_empty` is false for any island produced by the pass. +/// Three same-target blocks in a chain — union-find transitively merges into one island. #[test] -fn island_is_never_empty() { +fn transitive_same_target_chain() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int; + decl a: Int, b: Int, c: Int; bb0() { - x = load 1; - return x; + a = load 1; + goto bb1(); + }, + bb1() { + b = load 2; + goto bb2(); + }, + bb2() { + c = load 3; + return c; } }); - let targets = make_targets(&heap, &[TargetId::Interpreter]); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[TargetId::Postgres, TargetId::Postgres, TargetId::Postgres]); + let paths = empty_per_block_paths(body.basic_blocks.len()); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); - for island_id in islands.ids() { - assert!(!islands[island_id].is_empty()); - } + assert_eq!(islands.len(), 1); + assert_eq!(islands[IslandId::new(0)].count(), 3); + assert!(islands[IslandId::new(0)].contains(BasicBlockId::new(0))); + assert!(islands[IslandId::new(0)].contains(BasicBlockId::new(1))); + assert!(islands[IslandId::new(0)].contains(BasicBlockId::new(2))); } -/// `Island::iter` yields exactly the blocks reported by `contains`. +/// Island traversals are the join of per-block paths for all blocks in the island. +/// +/// Two same-target blocks access different vertex paths (.properties and +/// .metadata.provenance.edition). The island's traversals must contain both. #[test] -fn iter_matches_contains() { +fn island_joins_traversal_paths() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, y: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val1: ?, val2: ?; + @proj props = vertex.properties: ?, + metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + edition = prov.edition: ?; bb0() { - x = load 1; + val1 = load props; goto bb1(); }, bb1() { - y = load 2; - return y; + val2 = load edition; + return val2; } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Postgres]); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let traversals = + TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, Global); + let vertex = traversals.vertex(); + + // Fold per-location traversals into per-block bitsets (same logic as fusion's fuse_in). + use crate::pass::{ + analysis::dataflow::lattice::{HasBottom as _, JoinSemiLattice as _}, + execution::traversal::TraversalLattice, + }; + + let lattice = TraversalLattice::new(vertex); + let per_block_paths = BasicBlockVec::from_domain_derive_in( + |block_id, _| { + traversals + .of(block_id) + .iter() + .fold(lattice.bottom(), |lhs: TraversalPathBitSet, rhs| { + lattice.join_owned(lhs, rhs) + }) + }, + &body.basic_blocks, + Global, + ); - for island_id in islands.ids() { - let island = &islands[island_id]; - let members: Vec<_> = island.iter().collect(); - assert_eq!(members.len(), island.count()); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); + let islands = IslandPlacement::new().run(&body, vertex, &targets, &per_block_paths, Global); - for &block in &members { - assert!(island.contains(block)); - } - } + assert_eq!(islands.len(), 1); + let island = &islands[IslandId::new(0)]; + let traversal_paths = island.traversals(); + let joined = traversal_paths.as_entity().expect("entity vertex"); + assert!(joined.contains(EntityPath::Properties)); + assert!(joined.contains(EntityPath::ProvenanceEdition)); } diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 0230aea33b0..491aeb310aa 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -70,7 +70,7 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { unreachable!("unsupported graph read target") }; - let mut traversals = TraversalAnalysis::traversal_analysis_in(context, body, &self.scratch); + let traversals = TraversalAnalysis::new(vertex).traversal_analysis_in(body, &self.scratch); let mut statement_costs: TargetArray<_> = TargetArray::from_fn(|_| None); @@ -96,9 +96,6 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { &self.scratch, ); - // The body has been split (sequentially) and like the statement costs needs to be remapped - traversals.remap(&body.basic_blocks); - let terminators = TerminatorPlacement::new_in( TransferCostConfig::new(InformationRange::full()), &self.scratch, @@ -124,13 +121,18 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { } .build_in(body, &self.scratch); - // TODO: move to per island requirements here let mut assignment = solver.run(context, body); - let fusion = BasicBlockFusion::new_in(&self.scratch); - fusion.fuse(body, &mut assignment); + let fusion = BasicBlockFusion::new_in(traversals, &self.scratch); + let traversals = fusion.fuse_in(body, &mut assignment, context.heap); - let islands = IslandPlacement::new_in(&self.scratch).run(body, &assignment, context.heap); + let islands = IslandPlacement::new_in(&self.scratch).run( + body, + vertex, + &assignment, + &traversals, + context.heap, + ); (assignment, islands) } diff --git a/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs b/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs index 8ba21bdb58a..6ae7387422c 100644 --- a/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs @@ -306,9 +306,13 @@ impl BasicBlockSplitting { self.split_in(context, body, statement_costs, Global) } - /// Splits [`Body`] blocks and returns per-block [`TargetBitSet`] affinities. + /// Splits [`Body`] blocks and returns per-block [`TargetBitSet`] affinities along with + /// the per-block region counts used during splitting. /// - /// The returned vector is indexed by the new [`BasicBlockId`]s. + /// The first element is indexed by the new [`BasicBlockId`]s. The second element maps + /// each original block to the number of blocks it was split into, which callers can use + /// to redistribute parallel data structures via + /// [`split_remap`](super::traversal::Traversals::split_remap). pub(crate) fn split_in<'heap, A: Allocator>( &self, context: &MirContext<'_, 'heap>, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs index c99a69c18aa..924f2da2cb2 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs @@ -10,6 +10,7 @@ use crate::{ def::DefId, intern::Interner, pass::execution::{ + VertexType, statement_placement::{ InterpreterStatementPlacement, tests::{assert_placement, run_placement}, @@ -61,7 +62,8 @@ fn all_statements_supported() { diagnostics: DiagnosticIssues::new(), }; - let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let traversals = + TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); @@ -102,7 +104,8 @@ fn traversal_single_path_cost() { diagnostics: DiagnosticIssues::new(), }; - let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let traversals = + TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); @@ -145,7 +148,8 @@ fn traversal_multiple_paths_cost() { diagnostics: DiagnosticIssues::new(), }; - let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let traversals = + TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); @@ -189,7 +193,8 @@ fn traversal_swallowing_reduces_cost() { diagnostics: DiagnosticIssues::new(), }; - let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let traversals = + TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); @@ -232,7 +237,8 @@ fn non_traversal_unaffected_by_costs() { diagnostics: DiagnosticIssues::new(), }; - let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let traversals = + TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); @@ -277,7 +283,8 @@ fn storage_statements_zero_cost() { diagnostics: DiagnosticIssues::new(), }; - let traversals = TraversalAnalysis::traversal_analysis_in(&context, &body, context.heap); + let traversals = + TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); let mut placement = InterpreterStatementPlacement::new(&traversals); let (body, statement_costs) = run_placement(&context, &mut placement, body); diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs index dd8d8e6d1ff..3206c794d35 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs @@ -241,7 +241,7 @@ pub(crate) struct TerminatorCostVec(BlockPartitionedVec TerminatorCostVec { /// Creates a cost vector sized for `blocks`, with all transitions initially disallowed. pub(crate) fn new(blocks: &BasicBlocks, alloc: A) -> Self { - Self(BlockPartitionedVec::new( + Self(BlockPartitionedVec::new_in( blocks.iter().map(|block| Self::successor_count(block)), TransMatrix::new(), alloc, diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs index 8f740f4b010..bb7a6f8bd6e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs @@ -10,7 +10,7 @@ use super::TraversalPathBitSet; use crate::{ body::{ Body, Source, - basic_block::BasicBlockId, + basic_block::{BasicBlockId, BasicBlockVec}, basic_blocks::BasicBlocks, local::Local, location::Location, @@ -22,12 +22,68 @@ use crate::{ }, visit::{self, Visitor}, }; +/// Per-block aggregated traversal paths. +/// +/// Stores a single [`TraversalPathBitSet`] per basic block, representing the union of all +/// vertex field accesses across every statement and terminator in that block. +pub(crate) struct BlockTraversals { + vertex: VertexType, + inner: BasicBlockVec, +} + +impl BlockTraversals { + pub(crate) fn new_in(len: usize, vertex: VertexType, alloc: A) -> Self { + Self { + vertex, + inner: BasicBlockVec::from_elem_in(TraversalPathBitSet::empty(vertex), len, alloc), + } + } + + #[inline] + pub(crate) fn len(&self) -> usize { + self.inner.len() + } +} + +impl Index for BlockTraversals { + type Output = TraversalPathBitSet; + + #[inline] + fn index(&self, index: BasicBlockId) -> &Self::Output { + &self.inner[index] + } +} + +impl IndexMut for BlockTraversals { + #[inline] + fn index_mut(&mut self, index: BasicBlockId) -> &mut Self::Output { + &mut self.inner[index] + } +} + +impl Index for BlockTraversals { + type Output = TraversalPathBitSet; + + #[inline] + fn index(&self, index: Location) -> &Self::Output { + &self.inner[index.block] + } +} + +impl IndexMut for BlockTraversals { + #[inline] + fn index_mut(&mut self, index: Location) -> &mut Self::Output { + &mut self.inner[index.block] + } +} + /// Per-location resolved traversal paths for a graph read filter body. /// /// Stores a [`TraversalPathBitSet`] for every statement and terminator position, recording /// which vertex fields each location accesses. Indexed by [`Location`] (1-based statement /// index, with the terminator at `statements.len() + 1`). pub(crate) struct Traversals { + vertex: VertexType, inner: BlockPartitionedVec, } @@ -38,7 +94,8 @@ impl Traversals { #[expect(clippy::cast_possible_truncation)] pub(crate) fn new_in(blocks: &BasicBlocks, vertex: VertexType, alloc: A) -> Self { Self { - inner: BlockPartitionedVec::new( + vertex, + inner: BlockPartitionedVec::new_in( blocks .iter() .map(|block| (block.statements.len() + 1) as u32), @@ -53,13 +110,15 @@ impl Traversals { /// Returns the traversal path sets for all statements in `block`. /// /// The returned slice is indexed by statement position (0-based within the block). + #[inline] pub(crate) fn of(&self, block: BasicBlockId) -> &[TraversalPathBitSet] { self.inner.of(block) } - /// Returns a mutable slice of traversal path sets for all statements in `block`. - pub(crate) fn of_mut(&mut self, block: BasicBlockId) -> &mut [TraversalPathBitSet] { - self.inner.of_mut(block) + /// Returns the vertex type of the traversal. + #[inline] + pub(crate) const fn vertex(&self) -> VertexType { + self.vertex } /// Returns the number of vertex paths accessed by the statement at `location`. @@ -68,22 +127,6 @@ impl Traversals { pub(crate) fn path_count(&self, location: Location) -> usize { self[location].len() } - - /// Rebuilds the offset table for a new block layout. - /// - /// Call after transforms that change statement counts per block. Does not resize or clear - /// the data; callers must ensure the total statement count remains unchanged. - #[expect(clippy::cast_possible_truncation)] - pub(crate) fn remap(&mut self, blocks: &BasicBlocks) - where - A: Clone, - { - self.inner.remap( - blocks - .iter() - .map(|block| (block.statements.len() + 1) as u32), - ); - } } impl Index for Traversals { @@ -100,12 +143,15 @@ impl IndexMut for Traversals { } } -struct TraversalAnalysisVisitor { +struct TraversalAnalysisVisitor { vertex: VertexType, - traversals: Traversals, + traversals: T, } -impl<'heap, A: Allocator> Visitor<'heap> for TraversalAnalysisVisitor { +impl<'heap, T> Visitor<'heap> for TraversalAnalysisVisitor +where + T: IndexMut, +{ type Result = Result<(), !>; fn visit_place( @@ -148,29 +194,43 @@ impl<'heap, A: Allocator> Visitor<'heap> for TraversalAnalysisVisitor { } } -pub(crate) struct TraversalAnalysis; +pub(crate) struct TraversalAnalysis { + vertex: VertexType, +} impl TraversalAnalysis { + pub(crate) fn new(vertex: VertexType) -> Self { + Self { vertex } + } + pub(crate) fn traversal_analysis_in<'heap, A: Allocator + Clone>( - context: &MirContext<'_, 'heap>, + &self, body: &Body<'heap>, alloc: A, ) -> Traversals { - match body.source { - Source::GraphReadFilter(_) => {} - Source::Ctor(_) | Source::Closure(..) | Source::Thunk(..) | Source::Intrinsic(_) => { - panic!("traversal analysis may only be called on graph related operations") - } - } + let traversals = Traversals::new_in(&body.basic_blocks, self.vertex, alloc); - let Some(vertex) = VertexType::from_local(context.env, &body.local_decls[Local::VERTEX]) - else { - unimplemented!("lookup for declared type") + let mut visitor = TraversalAnalysisVisitor { + vertex: self.vertex, + traversals, }; + Ok(()) = visitor.visit_body(body); - let traversals = Traversals::new_in(&body.basic_blocks, vertex, alloc); + visitor.traversals + } + + pub(crate) fn coarse_traversal_analysis_in<'heap, A: Allocator + Clone>( + &self, + body: &Body<'heap>, + alloc: A, + ) -> BlockTraversals { + let traversals = BlockTraversals::new_in(body.basic_blocks.len(), self.vertex, alloc); + + let mut visitor = TraversalAnalysisVisitor { + vertex: self.vertex, + traversals, + }; - let mut visitor = TraversalAnalysisVisitor { vertex, traversals }; Ok(()) = visitor.visit_body(body); visitor.traversals diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_joins_traversal_paths.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_joins_traversal_paths.snap new file mode 100644 index 00000000000..339822d2ecb --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_joins_traversal_paths.snap @@ -0,0 +1,19 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { + let %2: ? + let %3: ? + + bb0(): { + %2 = %1.properties + %3 = %1.metadata.provenance.edition + + return %3 + } +} + +================= Block Targets ================== + +bb0: interpreter From dac3186f3a7876030bb62332050dfb6b5cac911b Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 22:36:00 +0100 Subject: [PATCH 21/32] fix: remove the old traversals intermediary --- .../mir/src/pass/execution/fusion/mod.rs | 61 +----- .../mir/src/pass/execution/island/mod.rs | 18 +- .../hashql/mir/src/pass/execution/mod.rs | 20 +- .../statement_placement/interpret/mod.rs | 40 ++-- .../pass/execution/statement_placement/mod.rs | 16 +- .../pass/execution/traversal/analysis/mod.rs | 198 +++--------------- .../src/pass/execution/traversal/entity.rs | 1 + .../mir/src/pass/execution/traversal/mod.rs | 11 +- 8 files changed, 90 insertions(+), 275 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs b/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs index 32e14bdd6c4..563a57c0fb1 100644 --- a/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs @@ -23,7 +23,7 @@ use core::{alloc::Allocator, convert::Infallible, mem}; use hashql_core::{graph::Predecessors as _, id::Id as _}; -use super::{target::TargetId, traversal::Traversals}; +use super::target::TargetId; use crate::{ body::{ Body, @@ -31,10 +31,6 @@ use crate::{ location::Location, terminator::TerminatorKind, }, - pass::{ - analysis::dataflow::lattice::{HasBottom as _, JoinSemiLattice as _}, - execution::traversal::{TraversalLattice, TraversalPathBitSet}, - }, visit::{VisitorMut, r#mut::filter}, }; @@ -135,8 +131,6 @@ fn fuse_blocks( scratch: S, body: &mut Body<'_>, targets: &mut BasicBlockVec, - per_block_paths: &mut BasicBlockVec, - lattice: TraversalLattice, ) { let reverse_postorder = body .basic_blocks @@ -181,11 +175,6 @@ fn fuse_blocks( // The tail block is now dead tail_block.terminator.kind = TerminatorKind::Unreachable; - - // We effectively do the same we've done for the block and simply join the head with the - // joined tail paths. We dot need to do that with the targets, as the targets are the same. - let tail_paths = per_block_paths[block_id]; - lattice.join(&mut per_block_paths[block_head], &tail_paths); } // Phase 3: compaction. @@ -223,12 +212,10 @@ fn fuse_blocks( body.basic_blocks.as_mut().swap(old_id, new_id); targets.swap(old_id, new_id); - per_block_paths.swap(old_id, new_id); } body.basic_blocks.as_mut().truncate(new_len); targets.truncate(new_len); - per_block_paths.truncate(new_len); } /// Fuses adjacent MIR [`BasicBlock`]s that share the same execution target. @@ -240,7 +227,6 @@ fn fuse_blocks( /// [`BasicBlock`]: crate::body::basic_block::BasicBlock /// [`BasicBlockSplitting`]: super::splitting::BasicBlockSplitting pub(crate) struct BasicBlockFusion { - traversals: Traversals, scratch: S, } @@ -248,63 +234,32 @@ impl BasicBlockFusion { /// Creates a new pass using the global allocator. #[must_use] #[cfg(test)] - pub(crate) const fn new(traversals: Traversals) -> Self { - Self::new_in(traversals, Global) + pub(crate) const fn new() -> Self { + Self::new_in(Global) } } impl BasicBlockFusion { /// Creates a new pass using the provided allocator. - pub(crate) const fn new_in(traversals: Traversals, scratch: S) -> Self { - Self { - traversals, - scratch, - } - } - - #[cfg(test)] - pub(crate) fn fuse( - &self, - body: &mut Body<'_>, - targets: &mut BasicBlockVec, - ) -> BasicBlockVec { - self.fuse_in(body, targets, Global) + pub(crate) const fn new_in(scratch: S) -> Self { + Self { scratch } } /// Fuses blocks in `body` that share the same target assignment. /// /// Modifies both `body` and `targets` in place. The `targets` vec is compacted to match /// the new block layout. - pub(crate) fn fuse_in( + pub(crate) fn fuse( &self, body: &mut Body<'_>, targets: &mut BasicBlockVec, - alloc: A, - ) -> BasicBlockVec { + ) { debug_assert_eq!( body.basic_blocks.len(), targets.len(), "target vec length must match basic block count" ); - let vertex = self.traversals.vertex(); - let lattice = TraversalLattice::new(vertex); - - let mut per_block_paths = BasicBlockVec::from_domain_derive_in( - |id, _| { - self.traversals - .of(id) - .iter() - .fold(lattice.bottom(), |lhs: TraversalPathBitSet, rhs| { - lattice.join_owned(lhs, rhs) - }) - }, - &body.basic_blocks, - alloc, - ); - - fuse_blocks(&self.scratch, body, targets, &mut per_block_paths, lattice); - - per_block_paths + fuse_blocks(&self.scratch, body, targets); } } diff --git a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs index 49f192e096e..be767150b88 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs @@ -18,14 +18,15 @@ use hashql_core::{ use super::{ VertexType, target::TargetId, - traversal::{TraversalLattice, TraversalPathBitSet}, + traversal::{TraversalAnalysisVisitor, TraversalLattice, TraversalPathBitSet, TraversalResult}, }; use crate::{ body::{ Body, basic_block::{BasicBlockId, BasicBlockSlice, BasicBlockUnionFind, BasicBlockVec}, }, - pass::analysis::dataflow::lattice::{HasBottom as _, JoinSemiLattice as _}, + pass::analysis::dataflow::lattice::HasBottom as _, + visit::Visitor as _, }; #[cfg(test)] @@ -133,7 +134,6 @@ impl IslandPlacement { vertex: VertexType, targets: &BasicBlockSlice, - traversals: &BasicBlockSlice, alloc: A, ) -> IslandVec @@ -167,7 +167,17 @@ impl IslandPlacement { }); islands[index].members.insert(bb); - lattice.join(&mut islands[index].traversals, &traversals[bb]); + } + + for island in &mut islands { + let mut visitor = TraversalAnalysisVisitor::new(vertex, |_, result| match result { + TraversalResult::Path(path) => island.traversals.insert(path), + TraversalResult::Complete => island.traversals.insert_all(), + }); + + for id in &island.members { + Ok(()) = visitor.visit_basic_block(id, &body.basic_blocks[id]); + } } islands diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 491aeb310aa..d117d99c07f 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -38,7 +38,7 @@ use self::{ statement_placement::{StatementPlacement as _, TargetPlacementStatement}, target::TargetArray, terminator_placement::TerminatorPlacement, - traversal::{TransferCostConfig, TraversalAnalysis}, + traversal::TransferCostConfig, }; use super::{analysis::size_estimation::BodyFootprint, transform::Traversals}; use crate::{ @@ -70,16 +70,13 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { unreachable!("unsupported graph read target") }; - let traversals = TraversalAnalysis::new(vertex).traversal_analysis_in(body, &self.scratch); - let mut statement_costs: TargetArray<_> = TargetArray::from_fn(|_| None); let mut targets = TargetId::all(); targets.reverse(); // We reverse the order, so that earlier targets (aka the interpreter) can have access to traversal costs for target in targets { - let mut statement = - TargetPlacementStatement::new_in(target, &traversals, &self.scratch); + let mut statement = TargetPlacementStatement::new_in(target, &self.scratch); let statement_cost = statement.statement_placement_in(context, body, vertex, &self.scratch); @@ -123,16 +120,11 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { let mut assignment = solver.run(context, body); - let fusion = BasicBlockFusion::new_in(traversals, &self.scratch); - let traversals = fusion.fuse_in(body, &mut assignment, context.heap); + let fusion = BasicBlockFusion::new_in(&self.scratch); + fusion.fuse(body, &mut assignment); - let islands = IslandPlacement::new_in(&self.scratch).run( - body, - vertex, - &assignment, - &traversals, - context.heap, - ); + let islands = + IslandPlacement::new_in(&self.scratch).run(body, vertex, &assignment, context.heap); (assignment, islands) } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs index 9a8627a1f4b..23308aef483 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs @@ -11,7 +11,7 @@ use crate::{ pass::execution::{ VertexType, cost::{Cost, StatementCostVec}, - traversal::Traversals, + traversal::{TraversalAnalysisVisitor, TraversalPathBitSet, TraversalResult}, }, visit::Visitor, }; @@ -19,15 +19,15 @@ use crate::{ #[cfg(test)] mod tests; -struct CostVisitor<'ctx, A: Allocator, S: Allocator> { +struct CostVisitor { cost: Cost, + vertex: VertexType, traversal_overhead: Cost, statement_costs: StatementCostVec, - traversals: &'ctx Traversals, } -impl<'heap, A: Allocator, S: Allocator> Visitor<'heap> for CostVisitor<'_, A, S> { +impl<'heap, A: Allocator> Visitor<'heap> for CostVisitor { type Result = Result<(), !>; fn visit_statement( @@ -37,7 +37,7 @@ impl<'heap, A: Allocator, S: Allocator> Visitor<'heap> for CostVisitor<'_, A, S> ) -> Self::Result { // All statements are supported; TraversalAnalysis provides backend data access match &statement.kind { - StatementKind::Assign(Assign { lhs, rhs: _ }) => { + StatementKind::Assign(Assign { lhs, rhs }) => { // If it's a traversal load (aka we add the interpreter cost, as well as the cost to // load the statement). We assume worst case for the traversal. #[expect( @@ -45,10 +45,15 @@ impl<'heap, A: Allocator, S: Allocator> Visitor<'heap> for CostVisitor<'_, A, S> reason = "variant count is under u32::MAX" )] let cost = if lhs.projections.is_empty() { - self.cost.saturating_add( - self.traversal_overhead - .saturating_mul(self.traversals.path_count(location) as u32), - ) + let mut bitset = TraversalPathBitSet::empty(self.vertex); + Ok(()) = TraversalAnalysisVisitor::new(self.vertex, |_, result| match result { + TraversalResult::Path(path) => bitset.insert(path), + TraversalResult::Complete => bitset.insert_all(), + }) + .visit_rvalue(location, rhs); + + self.cost + .saturating_add(self.traversal_overhead.saturating_mul(bitset.len() as u32)) } else { self.cost }; @@ -68,31 +73,26 @@ impl<'heap, A: Allocator, S: Allocator> Visitor<'heap> for CostVisitor<'_, A, S> /// target. /// /// Supports all statements unconditionally, serving as the universal fallback. -pub(crate) struct InterpreterStatementPlacement<'ctx, S: Allocator> { +pub(crate) struct InterpreterStatementPlacement { traversal_overhead: Cost, statement_cost: Cost, - - traversals: &'ctx Traversals, } -impl<'ctx, S: Allocator> InterpreterStatementPlacement<'ctx, S> { - pub(crate) const fn new(traversals: &'ctx Traversals) -> Self { +impl InterpreterStatementPlacement { + pub(crate) const fn new() -> Self { Self { traversal_overhead: cost!(4), statement_cost: cost!(8), - traversals, } } } -impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> - for InterpreterStatementPlacement<'_, S> -{ +impl<'heap, A: Allocator + Clone> StatementPlacement<'heap, A> for InterpreterStatementPlacement { fn statement_placement_in( &mut self, _: &MirContext<'_, 'heap>, body: &Body<'heap>, - _: VertexType, + vertex: VertexType, alloc: A, ) -> StatementCostVec { let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); @@ -108,7 +108,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> cost: self.statement_cost, statement_costs, traversal_overhead: self.traversal_overhead, - traversals: self.traversals, + vertex, }; visitor.visit_body(body); diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs index b73d6896bac..3b9ca7852be 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs @@ -23,7 +23,7 @@ pub(crate) use self::{ embedding::EmbeddingStatementPlacement, interpret::InterpreterStatementPlacement, postgres::PostgresStatementPlacement, }; -use super::{VertexType, target::TargetId, traversal::Traversals}; +use super::{VertexType, target::TargetId}; use crate::{body::Body, context::MirContext, pass::execution::cost::StatementCostVec}; /// Computes statement placement costs for a specific execution target. @@ -53,19 +53,17 @@ pub(crate) trait StatementPlacement<'heap, A: Allocator> { ) -> StatementCostVec; } -pub(crate) enum TargetPlacementStatement<'ctx, 'heap, S: Allocator> { - Interpreter(InterpreterStatementPlacement<'ctx, S>), +pub(crate) enum TargetPlacementStatement<'heap, S: Allocator> { + Interpreter(InterpreterStatementPlacement), Postgres(PostgresStatementPlacement<'heap, S>), Embedding(EmbeddingStatementPlacement), } -impl<'ctx, S: Allocator + Clone> TargetPlacementStatement<'ctx, '_, S> { +impl TargetPlacementStatement<'_, S> { #[must_use] - pub(crate) fn new_in(target: TargetId, traversals: &'ctx Traversals, scratch: S) -> Self { + pub(crate) fn new_in(target: TargetId, scratch: S) -> Self { match target { - TargetId::Interpreter => { - Self::Interpreter(InterpreterStatementPlacement::new(traversals)) - } + TargetId::Interpreter => Self::Interpreter(InterpreterStatementPlacement::new()), TargetId::Postgres => Self::Postgres(PostgresStatementPlacement::new_in(scratch)), TargetId::Embedding => Self::Embedding(EmbeddingStatementPlacement::new_in(scratch)), } @@ -73,7 +71,7 @@ impl<'ctx, S: Allocator + Clone> TargetPlacementStatement<'ctx, '_, S> { } impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> - for TargetPlacementStatement<'_, 'heap, S> + for TargetPlacementStatement<'heap, S> { #[inline] fn statement_placement_in( diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs index bb7a6f8bd6e..07fc7bb501b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs @@ -6,151 +6,47 @@ use core::{ ops::{Index, IndexMut}, }; -use super::TraversalPathBitSet; +use super::{TraversalPath, TraversalPathBitSet}; use crate::{ body::{ - Body, Source, basic_block::{BasicBlockId, BasicBlockVec}, - basic_blocks::BasicBlocks, local::Local, location::Location, place::{DefUse, Place, PlaceContext}, }, - context::MirContext, - pass::execution::{ - VertexType, block_partitioned_vec::BlockPartitionedVec, traversal::EntityPath, - }, + pass::execution::{VertexType, traversal::EntityPath}, visit::{self, Visitor}, }; -/// Per-block aggregated traversal paths. -/// -/// Stores a single [`TraversalPathBitSet`] per basic block, representing the union of all -/// vertex field accesses across every statement and terminator in that block. -pub(crate) struct BlockTraversals { - vertex: VertexType, - inner: BasicBlockVec, -} - -impl BlockTraversals { - pub(crate) fn new_in(len: usize, vertex: VertexType, alloc: A) -> Self { - Self { - vertex, - inner: BasicBlockVec::from_elem_in(TraversalPathBitSet::empty(vertex), len, alloc), - } - } - - #[inline] - pub(crate) fn len(&self) -> usize { - self.inner.len() - } -} - -impl Index for BlockTraversals { - type Output = TraversalPathBitSet; - - #[inline] - fn index(&self, index: BasicBlockId) -> &Self::Output { - &self.inner[index] - } -} - -impl IndexMut for BlockTraversals { - #[inline] - fn index_mut(&mut self, index: BasicBlockId) -> &mut Self::Output { - &mut self.inner[index] - } -} - -impl Index for BlockTraversals { - type Output = TraversalPathBitSet; - - #[inline] - fn index(&self, index: Location) -> &Self::Output { - &self.inner[index.block] - } -} -impl IndexMut for BlockTraversals { - #[inline] - fn index_mut(&mut self, index: Location) -> &mut Self::Output { - &mut self.inner[index.block] - } +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub(crate) enum TraversalResult { + Path(TraversalPath), + Complete, } -/// Per-location resolved traversal paths for a graph read filter body. -/// -/// Stores a [`TraversalPathBitSet`] for every statement and terminator position, recording -/// which vertex fields each location accesses. Indexed by [`Location`] (1-based statement -/// index, with the terminator at `statements.len() + 1`). -pub(crate) struct Traversals { +// TODO: Each consumer (statement placement per target, island placement) resolves traversal paths +// independently. Consider caching resolved paths per body to avoid redundant work. +// See: https://linear.app/hash/issue/BE-435 +pub(crate) struct TraversalAnalysisVisitor { vertex: VertexType, - inner: BlockPartitionedVec, + on_traversal: F, } -impl Traversals { - /// Creates a traversal map with space for all statements and terminators in the given blocks. - /// - /// All positions are initialized to an empty bitset for the given vertex type. - #[expect(clippy::cast_possible_truncation)] - pub(crate) fn new_in(blocks: &BasicBlocks, vertex: VertexType, alloc: A) -> Self { +impl TraversalAnalysisVisitor { + pub(crate) const fn new(vertex: VertexType, on_traversal: F) -> Self + where + F: FnMut(Location, TraversalResult), + { Self { vertex, - inner: BlockPartitionedVec::new_in( - blocks - .iter() - .map(|block| (block.statements.len() + 1) as u32), - TraversalPathBitSet::empty(vertex), - alloc, - ), + on_traversal, } } } -impl Traversals { - /// Returns the traversal path sets for all statements in `block`. - /// - /// The returned slice is indexed by statement position (0-based within the block). - #[inline] - pub(crate) fn of(&self, block: BasicBlockId) -> &[TraversalPathBitSet] { - self.inner.of(block) - } - - /// Returns the vertex type of the traversal. - #[inline] - pub(crate) const fn vertex(&self) -> VertexType { - self.vertex - } - - /// Returns the number of vertex paths accessed by the statement at `location`. - #[inline] - #[must_use] - pub(crate) fn path_count(&self, location: Location) -> usize { - self[location].len() - } -} - -impl Index for Traversals { - type Output = TraversalPathBitSet; - - fn index(&self, index: Location) -> &Self::Output { - &self.inner.of(index.block)[index.statement_index - 1] - } -} - -impl IndexMut for Traversals { - fn index_mut(&mut self, index: Location) -> &mut Self::Output { - &mut self.inner.of_mut(index.block)[index.statement_index - 1] - } -} - -struct TraversalAnalysisVisitor { - vertex: VertexType, - traversals: T, -} - -impl<'heap, T> Visitor<'heap> for TraversalAnalysisVisitor +impl<'heap, F> Visitor<'heap> for TraversalAnalysisVisitor where - T: IndexMut, + F: FnMut(Location, TraversalResult), { type Result = Result<(), !>; @@ -172,20 +68,17 @@ where match self.vertex { VertexType::Entity => { - let current = self.traversals[location] - .as_entity_mut() - .unwrap_or_else(|| { - unreachable!("a graph body cannot traverse over multiple types") - }); - let path = EntityPath::resolve(&place.projections); if let Some((path, _)) = path { - current.insert(path); + (self.on_traversal)( + location, + TraversalResult::Path(TraversalPath::Entity(path)), + ); } else { // The path leads to "nothing", indicating that we must hydrate the entire // entity. - current.insert_all(); + (self.on_traversal)(location, TraversalResult::Complete); } } } @@ -193,46 +86,3 @@ where visit::r#ref::walk_place(self, location, context, place) } } - -pub(crate) struct TraversalAnalysis { - vertex: VertexType, -} - -impl TraversalAnalysis { - pub(crate) fn new(vertex: VertexType) -> Self { - Self { vertex } - } - - pub(crate) fn traversal_analysis_in<'heap, A: Allocator + Clone>( - &self, - body: &Body<'heap>, - alloc: A, - ) -> Traversals { - let traversals = Traversals::new_in(&body.basic_blocks, self.vertex, alloc); - - let mut visitor = TraversalAnalysisVisitor { - vertex: self.vertex, - traversals, - }; - Ok(()) = visitor.visit_body(body); - - visitor.traversals - } - - pub(crate) fn coarse_traversal_analysis_in<'heap, A: Allocator + Clone>( - &self, - body: &Body<'heap>, - alloc: A, - ) -> BlockTraversals { - let traversals = BlockTraversals::new_in(body.basic_blocks.len(), self.vertex, alloc); - - let mut visitor = TraversalAnalysisVisitor { - vertex: self.vertex, - traversals, - }; - - Ok(()) = visitor.visit_body(body); - - visitor.traversals - } -} diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index af9b75ae0b5..af8f380e795 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -453,6 +453,7 @@ impl EntityPathBitSet { } } + #[inline] pub(crate) const fn insert_all(&mut self) { *self = Self::TOP; } diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index 3953354bfaa..183d15eaf86 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -14,7 +14,7 @@ mod analysis; #[cfg(test)] mod tests; -pub(crate) use analysis::{TraversalAnalysis, Traversals}; +pub(crate) use analysis::{TraversalAnalysisVisitor, TraversalResult}; pub use self::entity::{EntityPath, EntityPathBitSet}; pub(crate) use self::{access::Access, entity::TransferCostConfig}; @@ -120,7 +120,16 @@ impl TraversalPathBitSet { } } + /// Inserts all possible paths into the set. + #[inline] + pub const fn insert_all(&mut self) { + match self { + Self::Entity(bitset) => bitset.insert_all(), + } + } + /// Sums the [`transfer_size`](EntityPath::transfer_size) of every path in this set. + #[inline] pub(crate) fn transfer_size(self, config: &TransferCostConfig) -> InformationRange { match self { Self::Entity(entity_paths) => entity_paths.transfer_size(config), From d3fc684604fc8eb0bac1992f0335a52b72ae1d53 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 22:49:02 +0100 Subject: [PATCH 22/32] fix: remove the old traversals intermediary --- libs/@local/hashql/mir/benches/execution.rs | 35 +--- .../mir/src/pass/execution/fusion/tests.rs | 111 ++---------- .../mir/src/pass/execution/island/tests.rs | 70 ++------ .../hashql/mir/src/pass/execution/mod.rs | 3 +- .../statement_placement/interpret/tests.rs | 34 +--- .../execution/statement_placement/tests.rs | 5 +- .../hashql/mir/src/pass/execution/tests.rs | 13 +- .../pass/execution/traversal/analysis/mod.rs | 8 +- .../execution/traversal/analysis/tests.rs | 166 +++++------------- ...ed_postgres_embedding_interpreter.snap.new | 10 ++ 10 files changed, 93 insertions(+), 362 deletions(-) create mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new diff --git a/libs/@local/hashql/mir/benches/execution.rs b/libs/@local/hashql/mir/benches/execution.rs index eb92c4812d4..52c4122d4ec 100644 --- a/libs/@local/hashql/mir/benches/execution.rs +++ b/libs/@local/hashql/mir/benches/execution.rs @@ -13,10 +13,7 @@ use hashql_mir::{ builder::body, def::DefIdSlice, intern::Interner, - pass::{ - Changed, GlobalAnalysisPass as _, TransformPass as _, - analysis::size_estimation::SizeEstimationAnalysis, transform::TraversalExtraction, - }, + pass::{GlobalAnalysisPass as _, analysis::size_estimation::SizeEstimationAnalysis}, }; use self::run::run_bencher; @@ -98,22 +95,12 @@ fn execution_analysis(criterion: &mut Criterion) { group.bench_function("simple", |bencher| { run_bencher(bencher, create_simple, |context, [body], scratch| { - let mut extraction = TraversalExtraction::new_in(&mut *scratch); - let _: Changed = extraction.run(context, body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - scratch.reset(); - let mut size_analysis = SizeEstimationAnalysis::new_in(&*scratch); size_analysis.run(context, DefIdSlice::from_raw(core::slice::from_ref(&*body))); let footprints = size_analysis.finish(); scratch.reset(); - let bodies = [Some(traversals)]; - let analysis = hashql_mir::pass::execution::ExecutionAnalysis { - traversals: DefIdSlice::from_raw(&bodies), footprints: &footprints, scratch: &mut *scratch, }; @@ -127,22 +114,12 @@ fn execution_analysis(criterion: &mut Criterion) { bencher, create_entity_projections, |context, [body], scratch| { - let mut extraction = TraversalExtraction::new_in(&mut *scratch); - let _: Changed = extraction.run(context, body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - scratch.reset(); - let mut size_analysis = SizeEstimationAnalysis::new_in(&*scratch); size_analysis.run(context, DefIdSlice::from_raw(core::slice::from_ref(&*body))); let footprints = size_analysis.finish(); scratch.reset(); - let bodies = [Some(traversals)]; - let analysis = hashql_mir::pass::execution::ExecutionAnalysis { - traversals: DefIdSlice::from_raw(&bodies), footprints: &footprints, scratch: &mut *scratch, }; @@ -154,22 +131,12 @@ fn execution_analysis(criterion: &mut Criterion) { group.bench_function("diamond_cfg", |bencher| { run_bencher(bencher, create_diamond_cfg, |context, [body], scratch| { - let mut extraction = TraversalExtraction::new_in(&mut *scratch); - let _: Changed = extraction.run(context, body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - scratch.reset(); - let mut size_analysis = SizeEstimationAnalysis::new_in(&scratch); size_analysis.run(context, DefIdSlice::from_raw(core::slice::from_ref(&*body))); let footprints = size_analysis.finish(); scratch.reset(); - let bodies = [Some(traversals)]; - let analysis = hashql_mir::pass::execution::ExecutionAnalysis { - traversals: DefIdSlice::from_raw(&bodies), footprints: &footprints, scratch: &mut *scratch, }; diff --git a/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs b/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs index e6be262de90..1fad31b2a09 100644 --- a/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs @@ -8,7 +8,6 @@ use std::{io::Write as _, path::PathBuf}; use hashql_core::{ heap::Heap, pretty::Formatter, - symbol::sym, r#type::{TypeFormatter, TypeFormatterOptions, environment::Environment}, }; use hashql_diagnostics::DiagnosticIssues; @@ -24,11 +23,7 @@ use crate::{ builder::body, context::MirContext, intern::Interner, - pass::execution::{ - VertexType, - target::TargetId, - traversal::{EntityPath, TraversalAnalysis, Traversals}, - }, + pass::execution::target::TargetId, pretty::TextFormatOptions, }; @@ -40,10 +35,6 @@ fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { targets } -fn empty_traversals(body: &Body<'_>) -> Traversals { - Traversals::new_in(&body.basic_blocks, VertexType::Entity, Global) -} - #[track_caller] fn assert_fusion<'heap>( name: &'static str, @@ -247,12 +238,10 @@ fn fuse_no_changes_needed() { let mut targets = make_targets(&[TargetId::Interpreter]); - let traversals = empty_traversals(&body); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + BasicBlockFusion::new().fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 1); assert_eq!(targets.len(), 1); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_fusion("fuse_no_changes_needed", &context, &body, &targets); } @@ -284,12 +273,10 @@ fn fuse_two_same_target_blocks() { let mut targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); - let traversals = empty_traversals(&body); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + BasicBlockFusion::new().fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 1); assert_eq!(targets.len(), 1); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_eq!(targets[BasicBlockId::START], TargetId::Interpreter); assert_matches!( body.basic_blocks[BasicBlockId::START].terminator.kind, @@ -330,12 +317,10 @@ fn fuse_chain_of_three() { let mut targets = make_targets(&[TargetId::Postgres, TargetId::Postgres, TargetId::Postgres]); - let traversals = empty_traversals(&body); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + BasicBlockFusion::new().fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 1); assert_eq!(targets.len(), 1); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_eq!(targets[BasicBlockId::START], TargetId::Postgres); assert_eq!(body.basic_blocks[BasicBlockId::START].statements.len(), 3); assert_fusion("fuse_chain_of_three", &context, &body, &targets); @@ -378,12 +363,10 @@ fn fuse_preserves_different_targets() { TargetId::Postgres, ]); - let traversals = empty_traversals(&body); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + BasicBlockFusion::new().fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 2); assert_eq!(targets.len(), 2); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_eq!(targets[BasicBlockId::new(0)], TargetId::Interpreter); assert_eq!(targets[BasicBlockId::new(1)], TargetId::Postgres); assert_fusion( @@ -436,12 +419,10 @@ fn fuse_partial_chain() { TargetId::Postgres, ]); - let traversals = empty_traversals(&body); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + BasicBlockFusion::new().fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 2); assert_eq!(targets.len(), 2); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_eq!(targets[BasicBlockId::new(0)], TargetId::Interpreter); assert_eq!(targets[BasicBlockId::new(1)], TargetId::Postgres); assert_eq!(body.basic_blocks[BasicBlockId::new(0)].statements.len(), 2); @@ -491,12 +472,10 @@ fn fuse_updates_branch_references() { TargetId::Interpreter, ]); - let traversals = empty_traversals(&body); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + BasicBlockFusion::new().fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 3); assert_eq!(targets.len(), 3); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); // The fused bb0 should have a SwitchInt terminator pointing to remapped bb2→bb1 and bb3→bb2 let fused = &body.basic_blocks[BasicBlockId::START]; @@ -546,12 +525,10 @@ fn fuse_does_not_fuse_join_points() { TargetId::Interpreter, ]); - let traversals = empty_traversals(&body); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + BasicBlockFusion::new().fuse(&mut body, &mut targets); assert_eq!(body.basic_blocks.len(), 4); assert_eq!(targets.len(), 4); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_fusion("fuse_does_not_fuse_join_points", &context, &body, &targets); } @@ -582,13 +559,11 @@ fn fuse_goto_with_args_not_fused() { let mut targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); - let traversals = empty_traversals(&body); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + BasicBlockFusion::new().fuse(&mut body, &mut targets); // Both blocks survive because the Goto carries arguments. assert_eq!(body.basic_blocks.len(), 2); assert_eq!(targets.len(), 2); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_fusion("fuse_goto_with_args_not_fused", &context, &body, &targets); } @@ -651,29 +626,16 @@ fn fuse_diamond_non_monotonic_rpo() { TargetId::Interpreter, ]); - let traversals = empty_traversals(&body); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + BasicBlockFusion::new().fuse(&mut body, &mut targets); // Surviving: bb0(→0), bb1(→1), bb2(→2), bb5(→3) assert_eq!(body.basic_blocks.len(), 4); assert_eq!(targets.len(), 4); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); assert_eq!(targets[BasicBlockId::new(0)], TargetId::Interpreter); assert_eq!(targets[BasicBlockId::new(1)], TargetId::Postgres); assert_eq!(targets[BasicBlockId::new(2)], TargetId::Interpreter); assert_eq!(targets[BasicBlockId::new(3)], TargetId::Interpreter); - // Verify per_block_paths survived swap-based compaction at the correct indices. - // With empty traversals all entries are empty, but this exercises the swap codepath. - for index in 0..4 { - assert!( - per_block_paths[BasicBlockId::new(index)] - .as_entity() - .expect("entity vertex") - .is_empty(), - ); - } - // bb1 absorbed bb4's statements. assert_eq!(body.basic_blocks[BasicBlockId::new(1)].statements.len(), 2); // bb2 absorbed bb3's statements. @@ -726,13 +688,11 @@ fn fuse_backward_chain() { TargetId::Interpreter, ]); - let traversals = empty_traversals(&body); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); + BasicBlockFusion::new().fuse(&mut body, &mut targets); // Surviving: bb0(→0), bb2(→1), bb3(→2). bb1 fused into bb2. assert_eq!(body.basic_blocks.len(), 3); assert_eq!(targets.len(), 3); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); // bb2 absorbed bb1's return terminator. assert_matches!( @@ -748,52 +708,3 @@ fn fuse_backward_chain() { assert_fusion("fuse_backward_chain", &context, &body, &targets); } - -#[test] -fn fuse_joins_traversal_paths() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - let mut body = body!(interner, env; [graph::read::filter]@0/2 -> ? { - decl env: (), vertex: [Opaque sym::path::Entity; ?], val1: ?, val2: ?; - @proj props = vertex.properties: ?, - metadata = vertex.metadata: ?, - prov = metadata.provenance: ?, - edition = prov.edition: ?; - - bb0() { - val1 = load props; - goto bb1(); - }, - bb1() { - val2 = load edition; - return val2; - } - }); - - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let mut targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); - - let traversals = - TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, Global); - let per_block_paths = BasicBlockFusion::new(traversals).fuse(&mut body, &mut targets); - - assert_eq!(body.basic_blocks.len(), 1); - assert_eq!(targets.len(), 1); - assert_eq!(per_block_paths.len(), body.basic_blocks.len()); - - let fused = per_block_paths[BasicBlockId::START] - .as_entity() - .expect("entity vertex"); - assert!(fused.contains(EntityPath::Properties)); - assert!(fused.contains(EntityPath::ProvenanceEdition)); - - assert_fusion("fuse_joins_traversal_paths", &context, &body, &targets); -} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs index ffae265c17b..d24ef1b7f8e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs @@ -4,18 +4,16 @@ use alloc::alloc::Global; use hashql_core::{heap::Heap, symbol::sym, r#type::environment::Environment}; -use hashql_diagnostics::DiagnosticIssues; use crate::{ body::basic_block::{BasicBlockId, BasicBlockVec}, builder::body, - context::MirContext, intern::Interner, pass::execution::{ VertexType, island::{IslandId, IslandPlacement}, target::TargetId, - traversal::{EntityPath, TraversalAnalysis, TraversalPathBitSet}, + traversal::EntityPath, }, }; @@ -27,14 +25,6 @@ fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { targets } -fn empty_per_block_paths(block_count: usize) -> BasicBlockVec { - BasicBlockVec::from_elem_in( - TraversalPathBitSet::empty(VertexType::Entity), - block_count, - Global, - ) -} - /// Single block — produces exactly one island containing that block. #[test] fn single_block() { @@ -52,8 +42,7 @@ fn single_block() { }); let targets = make_targets(&[TargetId::Interpreter]); - let paths = empty_per_block_paths(body.basic_blocks.len()); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Interpreter); @@ -82,8 +71,7 @@ fn same_target_chain() { }); let targets = make_targets(&[TargetId::Postgres, TargetId::Postgres]); - let paths = empty_per_block_paths(body.basic_blocks.len()); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Postgres); @@ -113,8 +101,7 @@ fn different_targets() { }); let targets = make_targets(&[TargetId::Interpreter, TargetId::Postgres]); - let paths = empty_per_block_paths(body.basic_blocks.len()); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 2); @@ -166,8 +153,7 @@ fn diamond_same_target() { TargetId::Interpreter, TargetId::Interpreter, ]); - let paths = empty_per_block_paths(body.basic_blocks.len()); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Interpreter); @@ -212,8 +198,7 @@ fn diamond_mixed_targets() { TargetId::Embedding, TargetId::Interpreter, ]); - let paths = empty_per_block_paths(body.basic_blocks.len()); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); // bb0 alone, bb1 alone, bb2 alone, bb3 alone — 4 islands, since no same-target // edges exist between any pair of connected blocks. @@ -265,8 +250,7 @@ fn alternating_targets() { TargetId::Interpreter, TargetId::Postgres, ]); - let paths = empty_per_block_paths(body.basic_blocks.len()); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 4); for island_id in islands.ids() { @@ -283,11 +267,11 @@ fn alternating_targets() { let bb0_island = islands .ids() .find(|&id| islands[id].contains(BasicBlockId::new(0))) - .unwrap(); + .expect("bb0 is present"); let bb2_island = islands .ids() .find(|&id| islands[id].contains(BasicBlockId::new(2))) - .unwrap(); + .expect("bb2 is present"); assert_ne!(bb0_island, bb2_island); } @@ -316,8 +300,7 @@ fn transitive_same_target_chain() { }); let targets = make_targets(&[TargetId::Postgres, TargetId::Postgres, TargetId::Postgres]); - let paths = empty_per_block_paths(body.basic_blocks.len()); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, &paths, Global); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].count(), 3); @@ -353,39 +336,8 @@ fn island_joins_traversal_paths() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = - TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, Global); - let vertex = traversals.vertex(); - - // Fold per-location traversals into per-block bitsets (same logic as fusion's fuse_in). - use crate::pass::{ - analysis::dataflow::lattice::{HasBottom as _, JoinSemiLattice as _}, - execution::traversal::TraversalLattice, - }; - - let lattice = TraversalLattice::new(vertex); - let per_block_paths = BasicBlockVec::from_domain_derive_in( - |block_id, _| { - traversals - .of(block_id) - .iter() - .fold(lattice.bottom(), |lhs: TraversalPathBitSet, rhs| { - lattice.join_owned(lhs, rhs) - }) - }, - &body.basic_blocks, - Global, - ); - let targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); - let islands = IslandPlacement::new().run(&body, vertex, &targets, &per_block_paths, Global); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); let island = &islands[IslandId::new(0)]; diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index d117d99c07f..fbeac816c75 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -40,7 +40,7 @@ use self::{ terminator_placement::TerminatorPlacement, traversal::TransferCostConfig, }; -use super::{analysis::size_estimation::BodyFootprint, transform::Traversals}; +use super::analysis::size_estimation::BodyFootprint; use crate::{ body::{Body, Source, basic_block::BasicBlockVec, local::Local}, context::MirContext, @@ -49,7 +49,6 @@ use crate::{ }; pub struct ExecutionAnalysis<'ctx, 'heap, S: Allocator> { - pub traversals: &'ctx DefIdSlice>>, pub footprints: &'ctx DefIdSlice>, pub scratch: S, } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs index 924f2da2cb2..21bf9f0f0c9 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs @@ -9,13 +9,9 @@ use crate::{ context::MirContext, def::DefId, intern::Interner, - pass::execution::{ - VertexType, - statement_placement::{ - InterpreterStatementPlacement, - tests::{assert_placement, run_placement}, - }, - traversal::TraversalAnalysis, + pass::execution::statement_placement::{ + InterpreterStatementPlacement, + tests::{assert_placement, run_placement}, }, }; @@ -62,9 +58,7 @@ fn all_statements_supported() { diagnostics: DiagnosticIssues::new(), }; - let traversals = - TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); - let mut placement = InterpreterStatementPlacement::new(&traversals); + let mut placement = InterpreterStatementPlacement::new(); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( @@ -104,9 +98,7 @@ fn traversal_single_path_cost() { diagnostics: DiagnosticIssues::new(), }; - let traversals = - TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); - let mut placement = InterpreterStatementPlacement::new(&traversals); + let mut placement = InterpreterStatementPlacement::new(); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( @@ -148,9 +140,7 @@ fn traversal_multiple_paths_cost() { diagnostics: DiagnosticIssues::new(), }; - let traversals = - TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); - let mut placement = InterpreterStatementPlacement::new(&traversals); + let mut placement = InterpreterStatementPlacement::new(); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( @@ -193,9 +183,7 @@ fn traversal_swallowing_reduces_cost() { diagnostics: DiagnosticIssues::new(), }; - let traversals = - TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); - let mut placement = InterpreterStatementPlacement::new(&traversals); + let mut placement = InterpreterStatementPlacement::new(); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( @@ -237,9 +225,7 @@ fn non_traversal_unaffected_by_costs() { diagnostics: DiagnosticIssues::new(), }; - let traversals = - TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); - let mut placement = InterpreterStatementPlacement::new(&traversals); + let mut placement = InterpreterStatementPlacement::new(); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( @@ -283,9 +269,7 @@ fn storage_statements_zero_cost() { diagnostics: DiagnosticIssues::new(), }; - let traversals = - TraversalAnalysis::new(VertexType::Entity).traversal_analysis_in(&body, context.heap); - let mut placement = InterpreterStatementPlacement::new(&traversals); + let mut placement = InterpreterStatementPlacement::new(); let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs index 3cbc04c0608..cf254b59d4b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs @@ -25,7 +25,6 @@ use crate::{ statement_placement::{ EmbeddingStatementPlacement, InterpreterStatementPlacement, PostgresStatementPlacement, }, - traversal::Traversals, }, pretty::{TextFormatAnnotations, TextFormatOptions}, }; @@ -144,10 +143,8 @@ fn non_graph_read_filter_returns_empty() { diagnostics: DiagnosticIssues::new(), }; - let traversals = Traversals::new_in(&body.basic_blocks, VertexType::Entity, &heap); - let mut postgres = PostgresStatementPlacement::new_in(Global); - let mut interpreter = InterpreterStatementPlacement::new(&traversals); + let mut interpreter = InterpreterStatementPlacement::new(); let mut embedding = EmbeddingStatementPlacement::new_in(Global); let vertex = VertexType::Entity; diff --git a/libs/@local/hashql/mir/src/pass/execution/tests.rs b/libs/@local/hashql/mir/src/pass/execution/tests.rs index 36300a48500..05f191d07a8 100644 --- a/libs/@local/hashql/mir/src/pass/execution/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/tests.rs @@ -21,10 +21,9 @@ use crate::{ def::{DefId, DefIdSlice}, intern::Interner, pass::{ - Changed, GlobalAnalysisPass as _, TransformPass as _, + GlobalAnalysisPass as _, analysis::size_estimation::SizeEstimationAnalysis, execution::{ExecutionAnalysis, island::IslandVec, target::TargetId}, - transform::TraversalExtraction, }, }; @@ -75,22 +74,12 @@ fn run_execution<'heap>( BasicBlockVec, IslandVec, ) { - let mut extraction = TraversalExtraction::new_in(Global); - let _: Changed = extraction.run(context, body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - - let traversals = [Some(traversals)]; - let traversals_slice = DefIdSlice::from_raw(&traversals); - let mut size_analysis = SizeEstimationAnalysis::new_in(Global); size_analysis.run(context, DefIdSlice::from_raw(core::slice::from_ref(body))); let footprints = size_analysis.finish(); let mut scratch = Scratch::new(); let analysis = ExecutionAnalysis { - traversals: traversals_slice, footprints: &footprints, scratch: &mut scratch, }; diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs index 07fc7bb501b..27dce8e9ef3 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs @@ -1,15 +1,9 @@ #[cfg(test)] mod tests; -use core::{ - alloc::Allocator, - ops::{Index, IndexMut}, -}; - -use super::{TraversalPath, TraversalPathBitSet}; +use super::TraversalPath; use crate::{ body::{ - basic_block::{BasicBlockId, BasicBlockVec}, local::Local, location::Location, place::{DefUse, Place, PlaceContext}, diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs index d5a5a5a7d1c..7bcb3f53e08 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs @@ -1,20 +1,46 @@ #![expect(clippy::min_ident_chars)] use hashql_core::{heap::Heap, id::Id as _, symbol::sym, r#type::environment::Environment}; -use hashql_diagnostics::DiagnosticIssues; +use super::{TraversalAnalysisVisitor, TraversalResult}; use crate::{ body::{Body, basic_block::BasicBlockId, location::Location}, builder::body, - context::MirContext, intern::Interner, - pass::execution::traversal::{ - EntityPath, - analysis::{TraversalAnalysis, Traversals}, + pass::execution::{ + VertexType, + traversal::{EntityPath, TraversalPathBitSet}, }, + visit::Visitor as _, }; -fn analyze<'heap>(context: &MirContext<'_, 'heap>, body: &Body<'heap>) -> Traversals<&'heap Heap> { - TraversalAnalysis::traversal_analysis_in(context, body, context.heap) +struct TestTraversals(Vec>); + +impl core::ops::Index for TestTraversals { + type Output = TraversalPathBitSet; + + fn index(&self, index: Location) -> &TraversalPathBitSet { + &self.0[index.block.as_usize()][index.statement_index - 1] + } +} + +fn analyze(body: &Body<'_>) -> TestTraversals { + let vertex = VertexType::Entity; + let mut result: Vec> = body + .basic_blocks + .iter() + .map(|block| vec![TraversalPathBitSet::empty(vertex); block.statements.len() + 1]) + .collect(); + + let mut visitor = TraversalAnalysisVisitor::new(vertex, |location: Location, trav_result| { + let entry = &mut result[location.block.as_usize()][location.statement_index - 1]; + match trav_result { + TraversalResult::Path(path) => entry.insert(path), + TraversalResult::Complete => entry.insert_all(), + } + }); + let Ok(()) = visitor.visit_body(body); + + TestTraversals(result) } fn location(block: usize, statement_index: usize) -> Location { @@ -41,14 +67,7 @@ fn single_leaf_path() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); // statement 0: props = load _1.properties let stmt = traversals[location(0, 1)] @@ -81,14 +100,7 @@ fn multi_segment_path() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); let stmt = traversals[location(0, 1)] .as_entity() @@ -113,14 +125,7 @@ fn bare_vertex_sets_all_bits() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); let stmt = traversals[location(0, 1)] .as_entity() @@ -156,14 +161,7 @@ fn multiple_paths_same_statement() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); let stmt = traversals[location(0, 1)] .as_entity() @@ -189,14 +187,7 @@ fn terminator_vertex_access() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); // 0 statements, terminator at index 1 let term = traversals[location(0, 1)] @@ -223,14 +214,7 @@ fn non_vertex_access_ignored() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); let stmt = traversals[location(0, 1)] .as_entity() @@ -261,14 +245,7 @@ fn composite_path_recorded() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); let stmt = traversals[location(0, 1)] .as_entity() @@ -295,14 +272,7 @@ fn embedding_path_recorded() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); let stmt = traversals[location(0, 1)] .as_entity() @@ -339,14 +309,7 @@ fn paths_across_blocks() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); // bb0[0]: props = load _1.properties let bb0_s0 = traversals[location(0, 1)] @@ -401,14 +364,7 @@ fn paths_recorded_independently_per_statement() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); // Each statement records independently let stmt0 = traversals[location(0, 1)] @@ -444,14 +400,7 @@ fn unresolvable_projection_sets_all_bits() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); let stmt = traversals[location(0, 1)] .as_entity() @@ -481,14 +430,7 @@ fn link_data_path_recorded() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); let stmt = traversals[location(0, 1)] .as_entity() @@ -519,14 +461,7 @@ fn temporal_versioning_swallowing_through_analysis() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); let stmt = traversals[location(0, 1)] .as_entity() @@ -556,14 +491,7 @@ fn swallowing_within_statement() { } }); - let context = MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }; - - let traversals = analyze(&context, &body); + let traversals = analyze(&body); // Both operands reference _1. WebId is inserted first, then RecordId swallows it. let stmt = traversals[location(0, 1)] diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new new file mode 100644 index 00000000000..91b4141ab42 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new @@ -0,0 +1,10 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/tests.rs +assertion_line: 65 +expression: output +--- +Assignment: + bb0: interpreter + +Islands: + 0: target=interpreter, blocks=[BasicBlockId(0)] From eb83e706eec413d8a6818f1ecfefa69cd884b2a8 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 1 Mar 2026 22:58:38 +0100 Subject: [PATCH 23/32] chore: remove old version --- .../suite/mir_pass_transform_post_inline.rs | 2 +- libs/@local/hashql/mir/benches/transform.rs | 12 +- .../hashql/mir/src/pass/execution/mod.rs | 37 ++ .../hashql/mir/src/pass/transform/mod.rs | 4 +- .../mir/src/pass/transform/post_inline.rs | 53 +- .../transform/traversal_extraction/mod.rs | 401 --------------- .../transform/traversal_extraction/tests.rs | 457 ------------------ ...ed_postgres_embedding_interpreter.snap.new | 10 - .../duplicate_different_blocks.snap | 64 --- .../duplicate_same_block_deduped.snap | 39 -- .../mixed_statement_and_terminator.snap | 55 --- .../multiple_distinct_projections.snap | 51 -- .../nested_projection_extracted.snap | 31 -- .../no_projections_from_target.snap | 27 -- .../non_graph_filter_unchanged.snap | 25 - .../pre_existing_load_recorded.snap | 33 -- .../projection_from_non_target_unchanged.snap | 27 -- .../single_projection_extracted.snap | 31 -- .../terminator_operand_extraction.snap | 25 - .../traversals_lookup_correct.snap | 42 -- 20 files changed, 52 insertions(+), 1374 deletions(-) delete mode 100644 libs/@local/hashql/mir/src/pass/transform/traversal_extraction/mod.rs delete mode 100644 libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_different_blocks.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_same_block_deduped.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/mixed_statement_and_terminator.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/multiple_distinct_projections.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/nested_projection_extracted.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/no_projections_from_target.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/non_graph_filter_unchanged.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/pre_existing_load_recorded.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/projection_from_non_target_unchanged.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/single_projection_extracted.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/terminator_operand_extraction.snap delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/traversals_lookup_correct.snap diff --git a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_post_inline.rs b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_post_inline.rs index 1dc2b95be8d..fb9180ae1ac 100644 --- a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_post_inline.rs +++ b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_post_inline.rs @@ -54,7 +54,7 @@ pub(crate) fn mir_pass_transform_post_inline<'heap>( diagnostics: DiagnosticIssues::new(), }; - let mut pass = PostInline::new_in(heap, &mut scratch); + let mut pass = PostInline::new_in(&mut scratch); let _: Changed = pass.run( &mut context, &mut GlobalTransformState::new_in(&bodies, heap), diff --git a/libs/@local/hashql/mir/benches/transform.rs b/libs/@local/hashql/mir/benches/transform.rs index b99f2bcbef4..38d7e74e684 100644 --- a/libs/@local/hashql/mir/benches/transform.rs +++ b/libs/@local/hashql/mir/benches/transform.rs @@ -512,8 +512,7 @@ fn pipeline(criterion: &mut Criterion) { changed |= Inline::new_in(InlineConfig::default(), &mut *scratch) .run(context, &mut state, bodies); scratch.reset(); - changed |= - PostInline::new_in(context.heap, &mut *scratch).run(context, &mut state, bodies); + changed |= PostInline::new_in(&mut *scratch).run(context, &mut state, bodies); scratch.reset(); changed }); @@ -528,8 +527,7 @@ fn pipeline(criterion: &mut Criterion) { changed |= Inline::new_in(InlineConfig::default(), &mut *scratch) .run(context, &mut state, bodies); scratch.reset(); - changed |= - PostInline::new_in(context.heap, &mut *scratch).run(context, &mut state, bodies); + changed |= PostInline::new_in(&mut *scratch).run(context, &mut state, bodies); scratch.reset(); changed }); @@ -544,8 +542,7 @@ fn pipeline(criterion: &mut Criterion) { changed |= Inline::new_in(InlineConfig::default(), &mut *scratch) .run(context, &mut state, bodies); scratch.reset(); - changed |= - PostInline::new_in(context.heap, &mut *scratch).run(context, &mut state, bodies); + changed |= PostInline::new_in(&mut *scratch).run(context, &mut state, bodies); scratch.reset(); changed }); @@ -560,8 +557,7 @@ fn pipeline(criterion: &mut Criterion) { changed |= Inline::new_in(InlineConfig::default(), &mut *scratch) .run(context, &mut state, bodies); scratch.reset(); - changed |= - PostInline::new_in(context.heap, &mut *scratch).run(context, &mut state, bodies); + changed |= PostInline::new_in(&mut *scratch).run(context, &mut state, bodies); scratch.reset(); changed }); diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index fbeac816c75..7aef62ffa01 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -85,6 +85,14 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { let mut statement_costs = statement_costs.map(|cost| cost.unwrap_or_else(|| unreachable!())); + // DEBUG: statement costs before splitting + for target in TargetId::all() { + eprintln!("=== Statement costs for {target} ==="); + for (block_id, block) in body.basic_blocks.iter_enumerated() { + eprintln!(" {block_id:?}: {} statements", block.statements.len()); + } + } + let mut possibilities = BasicBlockSplitting::new_in(&self.scratch).split_in( context, body, @@ -92,6 +100,15 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { &self.scratch, ); + // DEBUG: possibilities after splitting + eprintln!( + "=== Possibilities after splitting ({} blocks) ===", + body.basic_blocks.len() + ); + for (block_id, _) in body.basic_blocks.iter_enumerated() { + eprintln!(" {block_id:?}: {:?}", possibilities[block_id]); + } + let terminators = TerminatorPlacement::new_in( TransferCostConfig::new(InformationRange::full()), &self.scratch, @@ -104,12 +121,32 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { &self.scratch, ); + // DEBUG: terminator costs + eprintln!("=== Terminator costs ==="); + for (block_id, _) in body.basic_blocks.iter_enumerated() { + let matrices = terminator_costs.of(block_id); + for (edge_idx, matrix) in matrices.iter().enumerate() { + eprintln!(" {block_id:?} edge {edge_idx}:"); + for (from, to, cost) in matrix.iter() { + if let Some(cost) = cost { + eprintln!(" {from} -> {to}: {cost}"); + } + } + } + } + ArcConsistency { blocks: &mut possibilities, terminators: &mut terminator_costs, } .run_in(body, &self.scratch); + // DEBUG: after arc consistency + eprintln!("=== Possibilities after arc consistency ==="); + for (block_id, _) in body.basic_blocks.iter_enumerated() { + eprintln!(" {block_id:?}: {:?}", possibilities[block_id]); + } + let mut solver = PlacementSolverContext { assignment: &possibilities, statements: &statement_costs, diff --git a/libs/@local/hashql/mir/src/pass/transform/mod.rs b/libs/@local/hashql/mir/src/pass/transform/mod.rs index cff6fb9c773..c6819a45d9f 100644 --- a/libs/@local/hashql/mir/src/pass/transform/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/mod.rs @@ -12,7 +12,6 @@ mod inst_simplify; mod post_inline; mod pre_inline; mod ssa_repair; -mod traversal_extraction; pub use self::{ administrative_reduction::AdministrativeReduction, @@ -25,8 +24,7 @@ pub use self::{ forward_substitution::ForwardSubstitution, inline::{Inline, InlineConfig, InlineCostEstimationConfig, InlineHeuristicsConfig}, inst_simplify::InstSimplify, - post_inline::{PostInline, PostInlineResidual}, + post_inline::PostInline, pre_inline::PreInline, ssa_repair::SsaRepair, - traversal_extraction::{TraversalExtraction, Traversals}, }; diff --git a/libs/@local/hashql/mir/src/pass/transform/post_inline.rs b/libs/@local/hashql/mir/src/pass/transform/post_inline.rs index bcdad82c183..2c0a274c374 100644 --- a/libs/@local/hashql/mir/src/pass/transform/post_inline.rs +++ b/libs/@local/hashql/mir/src/pass/transform/post_inline.rs @@ -7,20 +7,16 @@ use core::alloc::Allocator; -use hashql_core::heap::{BumpAllocator, Heap}; +use hashql_core::heap::BumpAllocator; -use super::{Canonicalization, CanonicalizationConfig, TraversalExtraction, Traversals}; +use super::{Canonicalization, CanonicalizationConfig}; use crate::{ body::Body, context::MirContext, - def::{DefIdSlice, DefIdVec}, - pass::{Changed, GlobalTransformPass, GlobalTransformState, TransformPass as _}, + def::DefIdSlice, + pass::{Changed, GlobalTransformPass, GlobalTransformState}, }; -pub struct PostInlineResidual<'heap> { - pub traversals: DefIdVec>, &'heap Heap>, -} - /// Post-inlining optimization driver. /// /// A thin wrapper around [`Canonicalization`] configured for post-inlining optimization. By running @@ -34,63 +30,32 @@ pub struct PostInlineResidual<'heap> { /// more optimization opportunities that may require additional passes to fully resolve. /// /// See [`Canonicalization`] for details on the pass ordering and implementation. -pub struct PostInline<'heap, A: Allocator> { +pub struct PostInline { canonicalization: Canonicalization, - - traversals: DefIdVec>, &'heap Heap>, } -impl<'heap, A: BumpAllocator> PostInline<'heap, A> { +impl PostInline { /// Creates a new post-inlining pass with the given allocator. /// /// The allocator is used for temporary data structures within sub-passes and is reset /// between pass invocations. - pub const fn new_in(heap: &'heap Heap, alloc: A) -> Self { + pub const fn new_in(alloc: A) -> Self { Self { canonicalization: Canonicalization::new_in( CanonicalizationConfig { max_iterations: 16 }, alloc, ), - traversals: DefIdVec::new_in(heap), - } - } - - /// Consumes the pass and returns accumulated results. - /// - /// The returned [`PostInlineResidual`] contains traversal maps for each graph read filter - /// body processed during the pass run. - pub fn finish(self) -> PostInlineResidual<'heap> { - PostInlineResidual { - traversals: self.traversals, } } } -impl<'env, 'heap, A: BumpAllocator> GlobalTransformPass<'env, 'heap> for PostInline<'heap, A> { +impl<'env, 'heap, A: BumpAllocator> GlobalTransformPass<'env, 'heap> for PostInline { fn run( &mut self, context: &mut MirContext<'env, 'heap>, state: &mut GlobalTransformState<'_>, bodies: &mut DefIdSlice>, ) -> Changed { - let mut changed = Changed::No; - changed |= self.canonicalization.run(context, state, bodies); - - self.canonicalization.allocator_mut().scoped(|alloc| { - let mut extraction = TraversalExtraction::new_in(alloc); - - for (id, body) in bodies.iter_enumerated_mut() { - let changed_body = extraction.run(context, body); - - if let Some(traversal) = extraction.take_traversals() { - self.traversals.insert(id, traversal); - } - - state.mark(id, changed_body); - changed |= changed_body; - } - }); - - changed + self.canonicalization.run(context, state, bodies) } } diff --git a/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/mod.rs b/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/mod.rs deleted file mode 100644 index ef6020770a4..00000000000 --- a/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/mod.rs +++ /dev/null @@ -1,401 +0,0 @@ -//! Traversal extraction transformation pass. -//! -//! This pass extracts projections from a target local into separate bindings, creating explicit -//! intermediate assignments. It is the inverse of projection forwarding — rather than inlining -//! projections, it materializes them as distinct locals. -//! -//! # Pipeline Integration -//! -//! Traversal extraction runs as the final phase of [`super::PostInline`], after -//! [`super::Canonicalization`] has cleaned up redundancy from inlining: -//! -//! ```text -//! Post-Inline -//! ├── Canonicalization (fixpoint loop) -//! └── TraversalExtraction (single pass) -//! ``` -//! -//! The pass only operates on [`Source::GraphReadFilter`] bodies; other body types are skipped -//! with [`Changed::No`]. This placement ensures canonicalization has already simplified the MIR -//! before extraction, minimizing the number of projections that need materialization. -//! -//! # Purpose -//! -//! The primary use case is preparing graph read filters for entity traversal. When reading from -//! the graph, the filter body receives a vertex as its second argument (`Local::new(1)`). -//! Projections like `vertex.2.1` (accessing nested properties) need to be extracted so the graph -//! executor can track which paths through the vertex are actually accessed. -//! -//! # Algorithm -//! -//! The pass operates by: -//! -//! 1. Walking all operands in the MIR body -//! 2. For each place operand projecting from the target local, creating a new local and load -//! 3. Replacing the original operand with a reference to the new local -//! 4. Recording the projection path in a [`Traversals`] map for later consumption -//! -//! Deduplication is scoped to the current basic block — if the same projection appears multiple -//! times within a block, it reuses the existing extracted local rather than creating duplicates. -//! -//! Pre-existing loads (e.g., `b = a.2.1`) are detected via [`VisitorMut::visit_statement_assign`] -//! and recorded in the traversal map without generating new statements. -//! -//! # Example -//! -//! Before: -//! ```text -//! bb0: -//! _2 = input() -//! _3 = eq(_1.0.1, _2) -//! _4 = eq(_1.0.1, _1.2) -//! return and(_3, _4) -//! ``` -//! -//! After: -//! ```text -//! bb0: -//! _2 = input() -//! _5 = _1.0.1 -//! _3 = eq(_5, _2) -//! _6 = _1.2 -//! _4 = eq(_5, _6) -//! return and(_3, _4) -//! ``` -//! -//! The [`Traversals`] map records `_5 → _1.0.1` and `_6 → _1.2` for the graph executor to use. -#[cfg(test)] -mod tests; - -use core::{alloc::Allocator, convert::Infallible}; - -use hashql_core::{ - heap::Heap, - id::{Id as _, bit_vec::DenseBitSet}, - span::SpanId, -}; - -use crate::{ - body::{ - Body, Source, - basic_block::{BasicBlock, BasicBlockId}, - local::{Local, LocalDecl, LocalVec}, - location::Location, - operand::Operand, - place::Place, - rvalue::RValue, - statement::{Assign, Statement, StatementKind}, - terminator::Terminator, - }, - context::MirContext, - intern::Interner, - pass::{Changed, TransformPass}, - visit::{self, VisitorMut, r#mut::filter}, -}; - -/// Maps extracted locals back to their original projection paths. -/// -/// Produced by [`TraversalExtraction`] and consumed by the graph executor to determine which -/// property paths were accessed on the vertex local. -pub struct Traversals<'heap> { - /// The source local from which projections were extracted (typically the vertex, `_1`). - source: Local, - /// Sparse map from extracted local to its original projection path. - derivations: LocalVec>, &'heap Heap>, -} - -impl<'heap> Traversals<'heap> { - pub(crate) fn with_capacity_in(source: Local, capacity: usize, heap: &'heap Heap) -> Self { - Self { - source, - derivations: LocalVec::with_capacity_in(capacity, heap), - } - } - - pub(crate) fn insert(&mut self, local: Local, place: Place<'heap>) { - debug_assert_eq!(place.local, self.source); - - self.derivations.insert(local, place); - } - - /// Returns the original projection path for `local`, if it was extracted from the source. - #[must_use] - #[inline] - pub fn lookup(&self, local: Local) -> Option<&Place<'heap>> { - self.derivations.lookup(local) - } - - /// Returns `true` if `local` is a registered traversal destination. - #[must_use] - pub fn contains(&self, local: Local) -> bool { - self.derivations.contains(local) - } - - /// Returns the source local from which all projections were extracted. - #[must_use] - pub const fn source(&self) -> Local { - self.source - } - - /// Returns a bitset of all locals that are traversal destinations. - #[must_use] - pub fn enabled(&self, body: &Body<'heap>) -> DenseBitSet { - let mut set = DenseBitSet::new_empty(body.local_decls.len()); - - for (local, place) in self.derivations.iter_enumerated() { - if place.is_some() { - set.insert(local); - } - } - - set - } -} - -/// Visitor that extracts projections from a target local into separate bindings. -struct TraversalExtractionVisitor<'env, 'heap, A: Allocator> { - /// The local we're extracting projections from (the vertex). - target: Local, - /// Declaration of the target local, used to derive types for extracted locals. - target_decl: LocalDecl<'heap>, - - /// Span of the current statement/terminator being visited. - current_span: SpanId, - - /// Bound of existing locals before extraction (new locals start from here). - total_locals: Local, - - /// New local declarations to append to the body after visiting. - pending_locals: Vec, A>, - /// Index into `pending_locals` marking the start of the current basic block's locals. - /// Used to scope deduplication to the current block. - pending_locals_offset: usize, - /// New load statements to insert before the current statement. - pending_statements: Vec, A>, - - /// Accumulated traversal mappings. - traversals: Traversals<'heap>, - changed: Changed, - interner: &'env Interner<'heap>, -} - -impl<'heap, A: Allocator> VisitorMut<'heap> for TraversalExtractionVisitor<'_, 'heap, A> { - type Filter = filter::Deep; - type Residual = Result; - type Result - = Result - where - T: 'heap; - - fn interner(&self) -> &Interner<'heap> { - self.interner - } - - fn visit_operand(&mut self, _: Location, operand: &mut Operand<'heap>) -> Self::Result<()> { - let Some(place) = operand.as_place() else { - return Ok(()); - }; - - if place.local != self.target { - return Ok(()); - } - - let r#type = place.type_id_unchecked(&self.target_decl); - - // Check if we already extracted this projection in the current basic block. - let new_local = if let Some(offset) = - (self.pending_locals_offset..self.pending_locals.len()).find(|&index| { - self.traversals - .lookup(self.total_locals.plus(index)) - .is_some_and(|pending| pending.projections == place.projections) - }) { - self.total_locals.plus(offset) - } else { - let new_local = self.total_locals.plus(self.pending_locals.len()); - self.traversals.insert(new_local, *place); - - self.pending_locals.push(LocalDecl { - span: self.target_decl.span, - r#type, - name: None, - }); - self.pending_statements.push(Statement { - span: self.current_span, - kind: StatementKind::Assign(Assign { - lhs: Place::local(new_local), - rhs: RValue::Load(Operand::Place(*place)), - }), - }); - - new_local - }; - - *operand = Operand::Place(Place::local(new_local)); - - Ok(()) - } - - fn visit_rvalue(&mut self, location: Location, rvalue: &mut RValue<'heap>) -> Self::Result<()> { - // Skip loads — they're recorded by `visit_statement_assign` to avoid double-processing. - if matches!(rvalue, RValue::Load(_)) { - return Ok(()); - } - - visit::r#mut::walk_rvalue(self, location, rvalue) - } - - fn visit_statement_assign( - &mut self, - location: Location, - assign: &mut Assign<'heap>, - ) -> Self::Result<()> { - Ok(()) = visit::r#mut::walk_statement_assign(self, location, assign); - - let Assign { lhs, rhs } = assign; - - if !lhs.projections.is_empty() { - return Ok(()); - } - - let RValue::Load(Operand::Place(rhs)) = rhs else { - return Ok(()); - }; - - if rhs.local != self.target { - return Ok(()); - } - - // Record pre-existing load as a traversal (e.g., `_2 = _1.0.1` already in the MIR). - self.traversals.insert(lhs.local, *rhs); - - Ok(()) - } - - fn visit_statement( - &mut self, - location: Location, - statement: &mut Statement<'heap>, - ) -> Self::Result<()> { - self.current_span = statement.span; - - visit::r#mut::walk_statement(self, location, statement) - } - - fn visit_terminator( - &mut self, - location: Location, - terminator: &mut Terminator<'heap>, - ) -> Self::Result<()> { - self.current_span = terminator.span; - visit::r#mut::walk_terminator(self, location, terminator) - } - - fn visit_basic_block( - &mut self, - id: BasicBlockId, - BasicBlock { - params, - statements, - terminator, - }: &mut BasicBlock<'heap>, - ) -> Self::Result<()> { - let mut location = Location { - block: id, - statement_index: 0, - }; - - self.pending_locals_offset = self.pending_locals.len(); - - self.visit_basic_block_params(location, params)?; - - location.statement_index += 1; - - // statement_index is 1-indexed (0 is block params). - while location.statement_index <= statements.len() { - let index = location.statement_index - 1; - - let statement = &mut statements[index]; - Ok(()) = self.visit_statement(location, statement); - - location.statement_index += 1; - if self.pending_statements.is_empty() { - continue; - } - - // Skip over the statements we're about to insert — they're already recorded. - location.statement_index += self.pending_statements.len(); - - statements.splice(index..index, self.pending_statements.drain(..)); - self.changed = Changed::Yes; - } - - self.visit_terminator(location, terminator)?; - - // Insert any remaining statements from terminator operands at the block end. - #[expect(clippy::extend_with_drain, reason = "differing allocator")] - if !self.pending_statements.is_empty() { - statements.extend(self.pending_statements.drain(..)); - self.changed = Changed::Yes; - } - - Ok(()) - } -} - -/// Extracts projections from the vertex local in graph read filter bodies. -/// -/// This pass only runs on [`Source::GraphReadFilter`] bodies. After running, call -/// [`take_traversals`](Self::take_traversals) to retrieve the mapping of extracted locals to -/// their original projection paths. -pub struct TraversalExtraction<'heap, A: Allocator> { - alloc: A, - traversals: Option>, -} - -impl<'heap, A: Allocator> TraversalExtraction<'heap, A> { - /// Creates a new pass using `alloc` for temporary allocations. - pub const fn new_in(alloc: A) -> Self { - Self { - alloc, - traversals: None, - } - } - - /// Takes the traversal map from the last pass run. - /// - /// Returns [`None`] if the pass hasn't run or if the body wasn't a graph read filter. - pub const fn take_traversals(&mut self) -> Option> { - self.traversals.take() - } -} - -impl<'env, 'heap, A: Allocator> TransformPass<'env, 'heap> for TraversalExtraction<'heap, A> { - fn run(&mut self, context: &mut MirContext<'env, 'heap>, body: &mut Body<'heap>) -> Changed { - if !matches!(body.source, Source::GraphReadFilter(_)) { - self.traversals = None; - return Changed::No; - } - - debug_assert_eq!(body.args, 2); - let vertex = Local::new(1); - - let mut visitor = TraversalExtractionVisitor { - target: vertex, - target_decl: body.local_decls[vertex], - current_span: SpanId::SYNTHETIC, - total_locals: body.local_decls.bound(), - pending_locals_offset: 0, - pending_locals: Vec::new_in(&self.alloc), - pending_statements: Vec::new_in(&self.alloc), - traversals: Traversals::with_capacity_in(vertex, body.local_decls.len(), context.heap), - changed: Changed::No, - interner: context.interner, - }; - Ok(()) = visitor.visit_body_preserving_cfg(body); - - body.local_decls.extend(visitor.pending_locals); - - self.traversals = Some(visitor.traversals); - visitor.changed - } -} diff --git a/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs b/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs deleted file mode 100644 index 0b227dbb1ed..00000000000 --- a/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs +++ /dev/null @@ -1,457 +0,0 @@ -#![expect(clippy::min_ident_chars, reason = "tests")] - -use alloc::alloc::Global; -use std::{io::Write as _, path::PathBuf}; - -use bstr::ByteVec as _; -use hashql_core::{ - heap::Heap, - pretty::Formatter, - r#type::{TypeFormatter, TypeFormatterOptions, environment::Environment}, -}; -use hashql_diagnostics::DiagnosticIssues; -use insta::{Settings, assert_snapshot}; - -use crate::{ - body::Body, - builder::body, - context::MirContext, - def::DefIdSlice, - intern::Interner, - pass::{TransformPass as _, transform::traversal_extraction::TraversalExtraction}, - pretty::TextFormatOptions, -}; - -#[track_caller] -fn assert_traversal_pass<'heap>( - name: &'static str, - body: Body<'heap>, - mut context: MirContext<'_, 'heap>, -) { - let formatter = Formatter::new(context.heap); - let mut formatter = TypeFormatter::new( - &formatter, - context.env, - TypeFormatterOptions::terse().with_qualified_opaque_names(true), - ); - let mut text_format = TextFormatOptions { - writer: Vec::new(), - indent: 4, - sources: (), - types: &mut formatter, - annotations: (), - } - .build(); - - let mut bodies = [body]; - - text_format - .format(DefIdSlice::from_raw(&bodies), &[]) - .expect("should be able to write bodies"); - - let mut pass = TraversalExtraction::new_in(Global); - let changed = pass.run(&mut context, &mut bodies[0]); - - write!( - text_format.writer, - "\n\n{:=^50}\n\n", - format!(" Changed: {changed:?} ") - ) - .expect("infallible"); - - text_format - .format(DefIdSlice::from_raw(&bodies), &[]) - .expect("should be able to write bodies"); - - // Include traversals info if available - if let Some(traversals) = pass.take_traversals() { - write!(text_format.writer, "\n\n{:=^50}\n\n", " Traversals ").expect("infallible"); - - for local in bodies[0].local_decls.ids() { - if let Some(place) = traversals.lookup(local) { - writeln!(text_format.writer, "{local} → {place}").expect("infallible"); - } - } - } - - let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - let mut settings = Settings::clone_current(); - settings.set_snapshot_path(dir.join("tests/ui/pass/traversal_extraction")); - settings.set_prepend_module_to_snapshot(false); - - let _drop = settings.bind_to_scope(); - - let value = text_format.writer.into_string_lossy(); - assert_snapshot!(name, value); -} - -#[test] -fn non_graph_filter_unchanged() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Regular fn body, not GraphReadFilter - should return Changed::No - let body = body!(interner, env; fn@0/2 -> Bool { - decl env: (), vertex: (Int, Int), result: Bool; - @proj vertex_0 = vertex.0: Int; - - bb0() { - result = bin.== vertex_0 42; - return result; - } - }); - - assert_traversal_pass( - "non_graph_filter_unchanged", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn no_projections_from_target() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // GraphReadFilter but no projections from vertex (_1) - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int), result: Bool; - - bb0() { - result = load true; - return result; - } - }); - - assert_traversal_pass( - "no_projections_from_target", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn single_projection_extracted() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Single projection from vertex.0 should be extracted - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int), result: Bool; - @proj vertex_0 = vertex.0: Int; - - bb0() { - result = bin.== vertex_0 42; - return result; - } - }); - - assert_traversal_pass( - "single_projection_extracted", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn nested_projection_extracted() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Nested projection vertex.0.1 should be extracted - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: ((Int, Int), Int), result: Bool; - @proj vertex_0 = vertex.0: (Int, Int), vertex_0_1 = vertex_0.1: Int; - - bb0() { - result = bin.== vertex_0_1 42; - return result; - } - }); - - assert_traversal_pass( - "nested_projection_extracted", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn duplicate_same_block_deduped() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Same projection used twice in one block - should reuse extracted local - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int), r1: Bool, r2: Bool, result: Bool; - @proj vertex_0 = vertex.0: Int; - - bb0() { - r1 = bin.== vertex_0 42; - r2 = bin.== vertex_0 100; - result = bin.& r1 r2; - return result; - } - }); - - assert_traversal_pass( - "duplicate_same_block_deduped", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn duplicate_different_blocks() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Same projection in different blocks - should create separate locals (no cross-block dedup) - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int), r1: Bool, r2: Bool; - @proj vertex_0 = vertex.0: Int; - - bb0() { - if true then bb1() else bb2(); - }, - bb1() { - r1 = bin.== vertex_0 42; - goto bb3(r1); - }, - bb2() { - r2 = bin.== vertex_0 100; - goto bb3(r2); - }, - bb3(r1) { - return r1; - } - }); - - assert_traversal_pass( - "duplicate_different_blocks", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn multiple_distinct_projections() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Multiple different projections - each gets its own extracted local - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int, Int), r1: Bool, r2: Bool, r3: Bool, result: Bool; - @proj vertex_0 = vertex.0: Int, vertex_1 = vertex.1: Int, vertex_2 = vertex.2: Int; - - bb0() { - r1 = bin.== vertex_0 1; - r2 = bin.== vertex_1 2; - r3 = bin.== vertex_2 3; - result = bin.& r1 r2; - result = bin.& result r3; - return result; - } - }); - - assert_traversal_pass( - "multiple_distinct_projections", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn pre_existing_load_recorded() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Pre-existing load statement should be recorded without generating new statements - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int), extracted: Int, result: Bool; - @proj vertex_0 = vertex.0: Int; - - bb0() { - extracted = load vertex_0; - result = bin.== extracted 42; - return result; - } - }); - - assert_traversal_pass( - "pre_existing_load_recorded", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn terminator_operand_extraction() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Projection used in terminator should be extracted at block end - let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { - decl env: (), vertex: (Int, Int); - @proj vertex_0 = vertex.0: Int; - - bb0() { - return vertex_0; - } - }); - - assert_traversal_pass( - "terminator_operand_extraction", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn mixed_statement_and_terminator() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Projections in both statements and terminator - let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { - decl env: (), vertex: (Int, Int), cond: Bool; - @proj vertex_0 = vertex.0: Int, vertex_1 = vertex.1: Int; - - bb0() { - cond = bin.== vertex_0 42; - if cond then bb1() else bb2(); - }, - bb1() { - return vertex_0; - }, - bb2() { - return vertex_1; - } - }); - - assert_traversal_pass( - "mixed_statement_and_terminator", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn projection_from_non_target_unchanged() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Projection from env (_0) should not be extracted - only vertex (_1) is target - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (Int, Int), vertex: (Int, Int), result: Bool; - @proj env_0 = env.0: Int; - - bb0() { - result = bin.== env_0 42; - return result; - } - }); - - assert_traversal_pass( - "projection_from_non_target_unchanged", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn traversals_lookup_correct() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Verify traversals.lookup() returns correct projection paths - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int, Int), r1: Bool, r2: Bool, result: Bool; - @proj vertex_0 = vertex.0: Int, vertex_2 = vertex.2: Int; - - bb0() { - r1 = bin.== vertex_0 1; - r2 = bin.== vertex_2 3; - result = bin.& r1 r2; - return result; - } - }); - - assert_traversal_pass( - "traversals_lookup_correct", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new deleted file mode 100644 index 91b4141ab42..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new +++ /dev/null @@ -1,10 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/execution/tests.rs -assertion_line: 65 -expression: output ---- -Assignment: - bb0: interpreter - -Islands: - 0: target=interpreter, blocks=[BasicBlockId(0)] diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_different_blocks.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_different_blocks.snap deleted file mode 100644 index 8913c267a70..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_different_blocks.snap +++ /dev/null @@ -1,64 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - - bb0(): { - switchInt(1) -> [0: bb2(), 1: bb1()] - } - - bb1(): { - %2 = %1.0 == 42 - - goto -> bb3(%2) - } - - bb2(): { - %3 = %1.0 == 100 - - goto -> bb3(%3) - } - - bb3(%2): { - return %2 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Integer - let %5: Integer - - bb0(): { - switchInt(1) -> [0: bb2(), 1: bb1()] - } - - bb1(): { - %4 = %1.0 - %2 = %4 == 42 - - goto -> bb3(%2) - } - - bb2(): { - %5 = %1.0 - %3 = %5 == 100 - - goto -> bb3(%3) - } - - bb3(%2): { - return %2 - } -} - -=================== Traversals =================== - -%4 → %1.0 -%5 → %1.0 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_same_block_deduped.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_same_block_deduped.snap deleted file mode 100644 index 58ba673793b..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_same_block_deduped.snap +++ /dev/null @@ -1,39 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - - bb0(): { - %2 = %1.0 == 42 - %3 = %1.0 == 100 - %4 = %2 & %3 - - return %4 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - let %5: Integer - - bb0(): { - %5 = %1.0 - %2 = %5 == 42 - %3 = %5 == 100 - %4 = %2 & %3 - - return %4 - } -} - -=================== Traversals =================== - -%5 → %1.0 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/mixed_statement_and_terminator.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/mixed_statement_and_terminator.snap deleted file mode 100644 index 9e1f0561d97..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/mixed_statement_and_terminator.snap +++ /dev/null @@ -1,55 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Integer { - let %2: Boolean - - bb0(): { - %2 = %1.0 == 42 - - switchInt(%2) -> [0: bb2(), 1: bb1()] - } - - bb1(): { - return %1.0 - } - - bb2(): { - return %1.1 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Integer { - let %2: Boolean - let %3: Integer - let %4: Integer - let %5: Integer - - bb0(): { - %3 = %1.0 - %2 = %3 == 42 - - switchInt(%2) -> [0: bb2(), 1: bb1()] - } - - bb1(): { - %4 = %1.0 - - return %4 - } - - bb2(): { - %5 = %1.1 - - return %5 - } -} - -=================== Traversals =================== - -%3 → %1.0 -%4 → %1.0 -%5 → %1.1 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/multiple_distinct_projections.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/multiple_distinct_projections.snap deleted file mode 100644 index 13467e78d4e..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/multiple_distinct_projections.snap +++ /dev/null @@ -1,51 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - let %5: Boolean - - bb0(): { - %2 = %1.0 == 1 - %3 = %1.1 == 2 - %4 = %1.2 == 3 - %5 = %2 & %3 - %5 = %5 & %4 - - return %5 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - let %5: Boolean - let %6: Integer - let %7: Integer - let %8: Integer - - bb0(): { - %6 = %1.0 - %2 = %6 == 1 - %7 = %1.1 - %3 = %7 == 2 - %8 = %1.2 - %4 = %8 == 3 - %5 = %2 & %3 - %5 = %5 & %4 - - return %5 - } -} - -=================== Traversals =================== - -%6 → %1.0 -%7 → %1.1 -%8 → %1.2 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/nested_projection_extracted.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/nested_projection_extracted.snap deleted file mode 100644 index 06da1f6fbf3..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/nested_projection_extracted.snap +++ /dev/null @@ -1,31 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: ((Integer, Integer), Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %1.0.1 == 42 - - return %2 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: ((Integer, Integer), Integer)) -> Boolean { - let %2: Boolean - let %3: Integer - - bb0(): { - %3 = %1.0.1 - %2 = %3 == 42 - - return %2 - } -} - -=================== Traversals =================== - -%3 → %1.0.1 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/no_projections_from_target.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/no_projections_from_target.snap deleted file mode 100644 index 526b9dbeffb..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/no_projections_from_target.snap +++ /dev/null @@ -1,27 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = 1 - - return %2 - } -} - -================== Changed: No =================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = 1 - - return %2 - } -} - -=================== Traversals =================== diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/non_graph_filter_unchanged.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/non_graph_filter_unchanged.snap deleted file mode 100644 index 87955e126ad..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/non_graph_filter_unchanged.snap +++ /dev/null @@ -1,25 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {closure@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %1.0 == 42 - - return %2 - } -} - -================== Changed: No =================== - -fn {closure@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %1.0 == 42 - - return %2 - } -} diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/pre_existing_load_recorded.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/pre_existing_load_recorded.snap deleted file mode 100644 index 99dfe813624..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/pre_existing_load_recorded.snap +++ /dev/null @@ -1,33 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Integer - let %3: Boolean - - bb0(): { - %2 = %1.0 - %3 = %2 == 42 - - return %3 - } -} - -================== Changed: No =================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Integer - let %3: Boolean - - bb0(): { - %2 = %1.0 - %3 = %2 == 42 - - return %3 - } -} - -=================== Traversals =================== - -%2 → %1.0 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/projection_from_non_target_unchanged.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/projection_from_non_target_unchanged.snap deleted file mode 100644 index 10ca4a6879b..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/projection_from_non_target_unchanged.snap +++ /dev/null @@ -1,27 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (Integer, Integer), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %0.0 == 42 - - return %2 - } -} - -================== Changed: No =================== - -fn {graph::read::filter@4294967040}(%0: (Integer, Integer), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %0.0 == 42 - - return %2 - } -} - -=================== Traversals =================== diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/single_projection_extracted.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/single_projection_extracted.snap deleted file mode 100644 index 0b8e097b1cc..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/single_projection_extracted.snap +++ /dev/null @@ -1,31 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %1.0 == 42 - - return %2 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Integer - - bb0(): { - %3 = %1.0 - %2 = %3 == 42 - - return %2 - } -} - -=================== Traversals =================== - -%3 → %1.0 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/terminator_operand_extraction.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/terminator_operand_extraction.snap deleted file mode 100644 index f4a95e38a5c..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/terminator_operand_extraction.snap +++ /dev/null @@ -1,25 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Integer { - bb0(): { - return %1.0 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Integer { - let %2: Integer - - bb0(): { - %2 = %1.0 - - return %2 - } -} - -=================== Traversals =================== - -%2 → %1.0 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/traversals_lookup_correct.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/traversals_lookup_correct.snap deleted file mode 100644 index d701ba43613..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/traversals_lookup_correct.snap +++ /dev/null @@ -1,42 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - - bb0(): { - %2 = %1.0 == 1 - %3 = %1.2 == 3 - %4 = %2 & %3 - - return %4 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - let %5: Integer - let %6: Integer - - bb0(): { - %5 = %1.0 - %2 = %5 == 1 - %6 = %1.2 - %3 = %6 == 3 - %4 = %2 & %3 - - return %4 - } -} - -=================== Traversals =================== - -%5 → %1.0 -%6 → %1.2 From ef8e439c821e3fdd0b63e136e5ceddd5baba1eb8 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Mon, 2 Mar 2026 18:55:28 +0100 Subject: [PATCH 24/32] feat: sketch out the cost vec --- .../hashql/core/src/id/bit_vec/finite.rs | 102 +++++++++++++++++ libs/@local/hashql/mir/benches/interpret.rs | 2 +- .../pass/analysis/size_estimation/range.rs | 8 ++ .../hashql/mir/src/pass/execution/cost.rs | 107 +++++++++++++++++- .../hashql/mir/src/pass/execution/mod.rs | 37 ------ .../statement_placement/interpret/mod.rs | 36 +----- .../hashql/mir/src/pass/execution/target.rs | 1 + .../execution/terminator_placement/mod.rs | 26 +---- .../src/pass/execution/traversal/entity.rs | 57 +++++++++- ...ed_postgres_embedding_interpreter.snap.new | 10 ++ 10 files changed, 291 insertions(+), 95 deletions(-) create mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new diff --git a/libs/@local/hashql/core/src/id/bit_vec/finite.rs b/libs/@local/hashql/core/src/id/bit_vec/finite.rs index a49eec92f1a..48472ffc9bc 100644 --- a/libs/@local/hashql/core/src/id/bit_vec/finite.rs +++ b/libs/@local/hashql/core/src/id/bit_vec/finite.rs @@ -293,6 +293,28 @@ impl FiniteBitSet { } } + /// Flips all bits within the domain, turning set bits off and unset bits on. + /// + /// # Panics + /// + /// Panics if `domain_size` is greater than `T::MAX_DOMAIN_SIZE`. + #[inline] + pub const fn negate(&mut self, domain_size: u32) + where + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { + assert!(domain_size <= T::MAX_DOMAIN_SIZE); + + let mask = if domain_size == T::MAX_DOMAIN_SIZE { + !T::EMPTY + } else { + (T::ONE << T::from_u32(domain_size)) - T::ONE + }; + + self.store = !self.store & mask; + } + /// Returns an iterator over the indices of set bits. #[inline] pub fn iter(&self) -> FiniteBitIter { @@ -705,4 +727,84 @@ mod tests { assert!(a.intersect(&b)); assert!(a.is_empty()); } + + #[test] + fn negate_empty_set() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(4); + set.negate(4); + + assert!(set.contains(TestId::from_usize(0))); + assert!(set.contains(TestId::from_usize(1))); + assert!(set.contains(TestId::from_usize(2))); + assert!(set.contains(TestId::from_usize(3))); + assert!(!set.contains(TestId::from_usize(4))); + assert_eq!(set.len(), 4); + } + + #[test] + fn negate_full_set() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(4); + set.insert_range(TestId::from_usize(0)..=TestId::from_usize(3), 4); + set.negate(4); + + assert!(set.is_empty()); + } + + #[test] + fn negate_partial_set() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(1)); + set.insert(TestId::from_usize(3)); + set.insert(TestId::from_usize(5)); + set.negate(8); + + assert!(set.contains(TestId::from_usize(0))); + assert!(!set.contains(TestId::from_usize(1))); + assert!(set.contains(TestId::from_usize(2))); + assert!(!set.contains(TestId::from_usize(3))); + assert!(set.contains(TestId::from_usize(4))); + assert!(!set.contains(TestId::from_usize(5))); + assert!(set.contains(TestId::from_usize(6))); + assert!(set.contains(TestId::from_usize(7))); + assert_eq!(set.len(), 5); + } + + #[test] + fn negate_masks_above_domain() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(3); + set.negate(3); + + assert!(set.contains(TestId::from_usize(0))); + assert!(set.contains(TestId::from_usize(1))); + assert!(set.contains(TestId::from_usize(2))); + assert!(!set.contains(TestId::from_usize(3))); + assert_eq!(set.len(), 3); + } + + #[test] + fn negate_is_involution() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(6); + set.insert(TestId::from_usize(2)); + set.insert(TestId::from_usize(4)); + + let original = set; + set.negate(6); + set.negate(6); + + assert_eq!(set, original); + } + + #[test] + fn negate_full_width() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(0)); + set.insert(TestId::from_usize(7)); + set.negate(8); + + assert!(!set.contains(TestId::from_usize(0))); + assert!(set.contains(TestId::from_usize(1))); + assert!(set.contains(TestId::from_usize(6))); + assert!(!set.contains(TestId::from_usize(7))); + assert_eq!(set.len(), 6); + } } diff --git a/libs/@local/hashql/mir/benches/interpret.rs b/libs/@local/hashql/mir/benches/interpret.rs index 0c62ba0eab1..cc65af23087 100644 --- a/libs/@local/hashql/mir/benches/interpret.rs +++ b/libs/@local/hashql/mir/benches/interpret.rs @@ -78,7 +78,7 @@ fn create_fibonacci_body<'heap>( let _: Changed = inline.run(&mut context, &mut state.as_mut(), bodies_mut); scratch.reset(); - let mut post = PostInline::new_in(context.heap, &mut scratch); + let mut post = PostInline::new_in(&mut scratch); let _: Changed = post.run(&mut context, &mut state.as_mut(), bodies_mut); scratch.reset(); diff --git a/libs/@local/hashql/mir/src/pass/analysis/size_estimation/range.rs b/libs/@local/hashql/mir/src/pass/analysis/size_estimation/range.rs index a6cab78d8a5..60acadb6d21 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/size_estimation/range.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/size_estimation/range.rs @@ -203,6 +203,14 @@ macro_rules! range { // Could become empty if min > max (no overlap) Self { min, max: max.map(<$inner>::new) } } + + pub fn midpoint(self) -> Option<$inner> { + let min = self.min.raw; + let max = self.inclusive_max()?.raw; + + let avg = min.midpoint(max); + Some(<$inner>::new(avg)) + } } impl Debug for $name { diff --git a/libs/@local/hashql/mir/src/pass/execution/cost.rs b/libs/@local/hashql/mir/src/pass/execution/cost.rs index d6b14c77bfd..3295738e00f 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost.rs @@ -8,14 +8,31 @@ use core::{ alloc::Allocator, fmt, iter::Sum, + mem, ops::{Add, AddAssign, Index, IndexMut, Mul, MulAssign}, }; use std::f32; -use super::block_partitioned_vec::BlockPartitionedVec; +use hashql_core::id::{IdSlice, bit_vec::FiniteBitSet}; + +use super::{ + TargetId, VertexType, block_partitioned_vec::BlockPartitionedVec, target::TargetArray, + traversal::TransferCostConfig, +}; use crate::{ - body::{basic_block::BasicBlockId, basic_blocks::BasicBlocks, location::Location}, + body::{ + basic_block::{BasicBlock, BasicBlockId, BasicBlockSlice, BasicBlockVec}, + basic_blocks::BasicBlocks, + location::Location, + }, macros::{forward_ref_binop, forward_ref_op_assign}, + pass::{ + analysis::size_estimation::{InformationRange, range::SaturatingMul}, + execution::traversal::{ + EntityPathBitSet, TraversalAnalysisVisitor, TraversalPathBitSet, TraversalResult, + }, + }, + visit::Visitor, }; /// Execution cost for a statement on a particular target. @@ -393,6 +410,92 @@ impl IndexMut for StatementCostVec { } } +#[derive(Debug, Copy, Clone)] +pub struct BlockCost { + base: ApproxCost, + load: TargetArray, +} + +impl BlockCost { + fn zero() -> Self { + Self { + base: ApproxCost::ZERO, + load: TargetArray::from_raw([ApproxCost::ZERO; _]), + } + } + + fn of(&self, target: TargetId) -> ApproxCost { + self.base + self.load[target] + } +} + +pub struct BlockCostVec { + inner: BasicBlockVec, +} + +impl BlockCostVec { + pub fn new_in<'heap>( + vertex: VertexType, + blocks: &BasicBlockSlice>, + statement: &StatementCostVec, + config: &TransferCostConfig, + alloc: A, + ) -> Self { + let inner = BasicBlockVec::from_domain_derive_in( + |id, block| { + let base = statement.sum_approx(id); + + let mut bitset = TraversalPathBitSet::empty(vertex); + let mut visitor = TraversalAnalysisVisitor::new(vertex, |_, result| match result { + TraversalResult::Path(path) => bitset.insert(path), + TraversalResult::Complete => bitset.insert_all(), + }); + Ok(()) = visitor.visit_basic_block(id, block); + + let mut load = TargetArray::from_raw([InformationRange::zero(); _]); + + match bitset { + TraversalPathBitSet::Entity(bitset) => { + let leafs = bitset.to_leaves(); + + for leaf in &leafs { + let mut remote = leaf.origin(); + remote.negate(TargetId::VARIANT_COUNT_U32); + + for remote in &remote { + let cost = leaf + .transfer_size(config) + .saturating_mul(config.target_multiplier[remote].get()); + + load[remote] += cost; + } + } + } + } + + // for each load calculate the cost, assuming a cardinality of 1 (entity size does + // not effect the cost analytics, because each entity is processed individually in + // the closure context) + #[expect(clippy::cast_precision_loss)] + let load = load.map(|range| { + let Some(average) = range.midpoint() else { + return ApproxCost::INF; + }; + + ApproxCost::new(average.as_u32() as f32) + .unwrap_or_else(|| unreachable!("the value is always non-NaN")) + }); + + BlockCost { base, load } + }, + blocks, + alloc, + ); + + Self { inner } + } +} + #[cfg(test)] mod tests { use alloc::alloc::Global; diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 7aef62ffa01..fbeac816c75 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -85,14 +85,6 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { let mut statement_costs = statement_costs.map(|cost| cost.unwrap_or_else(|| unreachable!())); - // DEBUG: statement costs before splitting - for target in TargetId::all() { - eprintln!("=== Statement costs for {target} ==="); - for (block_id, block) in body.basic_blocks.iter_enumerated() { - eprintln!(" {block_id:?}: {} statements", block.statements.len()); - } - } - let mut possibilities = BasicBlockSplitting::new_in(&self.scratch).split_in( context, body, @@ -100,15 +92,6 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { &self.scratch, ); - // DEBUG: possibilities after splitting - eprintln!( - "=== Possibilities after splitting ({} blocks) ===", - body.basic_blocks.len() - ); - for (block_id, _) in body.basic_blocks.iter_enumerated() { - eprintln!(" {block_id:?}: {:?}", possibilities[block_id]); - } - let terminators = TerminatorPlacement::new_in( TransferCostConfig::new(InformationRange::full()), &self.scratch, @@ -121,32 +104,12 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { &self.scratch, ); - // DEBUG: terminator costs - eprintln!("=== Terminator costs ==="); - for (block_id, _) in body.basic_blocks.iter_enumerated() { - let matrices = terminator_costs.of(block_id); - for (edge_idx, matrix) in matrices.iter().enumerate() { - eprintln!(" {block_id:?} edge {edge_idx}:"); - for (from, to, cost) in matrix.iter() { - if let Some(cost) = cost { - eprintln!(" {from} -> {to}: {cost}"); - } - } - } - } - ArcConsistency { blocks: &mut possibilities, terminators: &mut terminator_costs, } .run_in(body, &self.scratch); - // DEBUG: after arc consistency - eprintln!("=== Possibilities after arc consistency ==="); - for (block_id, _) in body.basic_blocks.iter_enumerated() { - eprintln!(" {block_id:?}: {:?}", possibilities[block_id]); - } - let mut solver = PlacementSolverContext { assignment: &possibilities, statements: &statement_costs, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs index 23308aef483..419cccded41 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs @@ -5,13 +5,12 @@ use crate::{ body::{ Body, Source, location::Location, - statement::{Assign, Statement, StatementKind}, + statement::{Statement, StatementKind}, }, context::MirContext, pass::execution::{ VertexType, cost::{Cost, StatementCostVec}, - traversal::{TraversalAnalysisVisitor, TraversalPathBitSet, TraversalResult}, }, visit::Visitor, }; @@ -21,8 +20,6 @@ mod tests; struct CostVisitor { cost: Cost, - vertex: VertexType, - traversal_overhead: Cost, statement_costs: StatementCostVec, } @@ -35,30 +32,9 @@ impl<'heap, A: Allocator> Visitor<'heap> for CostVisitor { location: Location, statement: &Statement<'heap>, ) -> Self::Result { - // All statements are supported; TraversalAnalysis provides backend data access match &statement.kind { - StatementKind::Assign(Assign { lhs, rhs }) => { - // If it's a traversal load (aka we add the interpreter cost, as well as the cost to - // load the statement). We assume worst case for the traversal. - #[expect( - clippy::cast_possible_truncation, - reason = "variant count is under u32::MAX" - )] - let cost = if lhs.projections.is_empty() { - let mut bitset = TraversalPathBitSet::empty(self.vertex); - Ok(()) = TraversalAnalysisVisitor::new(self.vertex, |_, result| match result { - TraversalResult::Path(path) => bitset.insert(path), - TraversalResult::Complete => bitset.insert_all(), - }) - .visit_rvalue(location, rhs); - - self.cost - .saturating_add(self.traversal_overhead.saturating_mul(bitset.len() as u32)) - } else { - self.cost - }; - - self.statement_costs[location] = Some(cost); + StatementKind::Assign(_) => { + self.statement_costs[location] = Some(self.cost); } StatementKind::StorageDead(_) | StatementKind::StorageLive(_) | StatementKind::Nop => { self.statement_costs[location] = Some(cost!(0)); @@ -74,14 +50,12 @@ impl<'heap, A: Allocator> Visitor<'heap> for CostVisitor { /// /// Supports all statements unconditionally, serving as the universal fallback. pub(crate) struct InterpreterStatementPlacement { - traversal_overhead: Cost, statement_cost: Cost, } impl InterpreterStatementPlacement { pub(crate) const fn new() -> Self { Self { - traversal_overhead: cost!(4), statement_cost: cost!(8), } } @@ -92,7 +66,7 @@ impl<'heap, A: Allocator + Clone> StatementPlacement<'heap, A> for InterpreterSt &mut self, _: &MirContext<'_, 'heap>, body: &Body<'heap>, - vertex: VertexType, + _: VertexType, alloc: A, ) -> StatementCostVec { let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); @@ -107,8 +81,6 @@ impl<'heap, A: Allocator + Clone> StatementPlacement<'heap, A> for InterpreterSt let mut visitor = CostVisitor { cost: self.statement_cost, statement_costs, - traversal_overhead: self.traversal_overhead, - vertex, }; visitor.visit_body(body); diff --git a/libs/@local/hashql/mir/src/pass/execution/target.rs b/libs/@local/hashql/mir/src/pass/execution/target.rs index c7c9a50ab06..304e7a39762 100644 --- a/libs/@local/hashql/mir/src/pass/execution/target.rs +++ b/libs/@local/hashql/mir/src/pass/execution/target.rs @@ -10,6 +10,7 @@ use hashql_core::id::{Id, IdArray, bit_vec::FiniteBitSet}; /// estimation during placement. The interpreter is evaluated last so it can incorporate traversal /// costs computed by the other backends. #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Id)] +#[id(const)] pub enum TargetId { /// In-process evaluator that supports all MIR operations. /// diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs index 3206c794d35..f9903d47b10 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs @@ -98,13 +98,13 @@ impl TransMatrix { } #[inline] - fn offset(from: TargetId, to: TargetId) -> usize { + const fn offset(from: TargetId, to: TargetId) -> usize { from.as_usize() * TargetId::VARIANT_COUNT + to.as_usize() } #[inline] #[expect(clippy::integer_division, clippy::integer_division_remainder_used)] - fn from_offset(offset: usize) -> (TargetId, TargetId) { + const fn from_offset(offset: usize) -> (TargetId, TargetId) { let from = TargetId::from_usize(offset / TargetId::VARIANT_COUNT); let to = TargetId::from_usize(offset % TargetId::VARIANT_COUNT); (from, to) @@ -113,13 +113,13 @@ impl TransMatrix { /// Returns the cost for transitioning from `from` to `to`, or `None` if disallowed. #[inline] #[must_use] - pub(crate) fn get(&self, from: TargetId, to: TargetId) -> Option { + pub(crate) const fn get(&self, from: TargetId, to: TargetId) -> Option { self.matrix[Self::offset(from, to)] } #[inline] #[must_use] - pub(crate) fn contains(&self, from: TargetId, to: TargetId) -> bool { + pub(crate) const fn contains(&self, from: TargetId, to: TargetId) -> bool { self.matrix[Self::offset(from, to)].is_some() } @@ -538,28 +538,14 @@ impl TerminatorPlacement { live_in: &BasicBlockSlice<(DenseBitSet, TraversalPathBitSet)>, successor: BasicBlockId, ) -> Cost { - let (locals, paths) = &live_in[successor]; + let (locals, _) = &live_in[successor]; required_locals.clone_from(locals); for ¶m in body.basic_blocks[successor].params { required_locals.insert(param); } - let local_cost = self.sum_local_sizes(footprint, required_locals); - - if paths.is_empty() { - return local_cost; - } - - let path_range = paths.transfer_size(&self.transfer_config); - - let Some(max) = path_range.inclusive_max() else { - return Cost::MAX; - }; - - let path_cost = Cost::new_saturating(path_range.min().midpoint(max).as_u32()); - - local_cost.saturating_add(path_cost) + self.sum_local_sizes(footprint, required_locals) } /// Sums the estimated sizes of all locals in the set. diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index af8f380e795..f9e7426ca4e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -14,9 +14,12 @@ use super::{ }; use crate::{ body::place::{Projection, ProjectionKind}, - pass::analysis::{ - dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}, - size_estimation::{InformationRange, InformationUnit}, + pass::{ + analysis::{ + dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}, + size_estimation::{InformationRange, InformationUnit}, + }, + execution::target::{TargetArray, TargetBitSet, TargetId}, }, }; @@ -137,6 +140,11 @@ pub(crate) struct TransferCostConfig { /// This is a placeholder until the confirmed entity type set is available, at which point /// the metadata size can be computed directly from the property key count. pub property_metadata_divisor: NonZero, + /// Multiplier for the cost of transferring an entity to a target. + /// + /// For example, if the multiplier for Postgres is 2, then transferring an entity to Postgres + /// costs twice as much as transferring it to the interpreter. + pub target_multiplier: TargetArray>, } impl TransferCostConfig { @@ -159,6 +167,7 @@ impl TransferCostConfig { Bound::Included(InformationUnit::new(10)), ), property_metadata_divisor: NonZero::new(4).expect("infallible"), + target_multiplier: TargetArray::from_raw([NonZero::new(1).expect("infallible"); _]), } } } @@ -177,6 +186,18 @@ impl EntityPath { resolve(projections) } + /// Returns the set of execution targets that natively serve this path. + pub(crate) const fn origin(self) -> TargetBitSet { + let mut set = TargetBitSet::new_empty(TargetId::VARIANT_COUNT_U32); + + match self.access() { + Access::Postgres(_) => set.insert(TargetId::Postgres), + Access::Embedding(_) => set.insert(TargetId::Embedding), + } + + set + } + /// Returns the backend access mode for this path. pub(crate) const fn access(self) -> Access { match self { @@ -458,6 +479,36 @@ impl EntityPathBitSet { *self = Self::TOP; } + /// Expands composite paths to their leaf descendants. + /// + /// Composites like [`RecordId`](EntityPath::RecordId) are replaced by their leaf children + /// (e.g. [`WebId`](EntityPath::WebId), [`EntityUuid`](EntityPath::EntityUuid), etc.). + /// Leaf paths are kept as-is. + /// + /// This works because the costs are purely cumulative, therefore any composite path can be + /// expanded to its leaf descendants without affecting the total cost. This allows for greater + /// precision during cost estimation, to allow for attributing the cost if something is + /// available on multiple backends. + #[expect(clippy::cast_possible_truncation)] + pub(crate) fn to_leaves(self) -> FiniteBitSet { + let mut result = FiniteBitSet::new_empty(core::mem::variant_count::() as u32); + + for path in &self.0 { + if path.children().is_empty() { + result.insert(path); + continue; + } + + for &child in path.children() { + if child.children().is_empty() { + result.insert(child); + } + } + } + + result + } + /// Sums the [`transfer_size`](EntityPath::transfer_size) of every path in this set. pub(crate) fn transfer_size(self, config: &TransferCostConfig) -> InformationRange { let mut total = InformationRange::zero(); diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new new file mode 100644 index 00000000000..91b4141ab42 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new @@ -0,0 +1,10 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/tests.rs +assertion_line: 65 +expression: output +--- +Assignment: + bb0: interpreter + +Islands: + 0: target=interpreter, blocks=[BasicBlockId(0)] From 47894922908b94bf411e6e56d3ed685f2f0ffbe5 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Mon, 2 Mar 2026 21:06:27 +0100 Subject: [PATCH 25/32] feat: checkpoitn --- .../mir/src/pass/execution/cost/analysis.rs | 547 ++++++++++++++++++ .../pass/execution/{cost.rs => cost/mod.rs} | 202 +------ .../mir/src/pass/execution/cost/tests.rs | 78 +++ .../hashql/mir/src/pass/execution/mod.rs | 35 +- .../pass/execution/placement/solve/csp/mod.rs | 9 +- .../execution/placement/solve/estimate/mod.rs | 14 +- .../src/pass/execution/placement/solve/mod.rs | 8 +- .../src/pass/execution/traversal/entity.rs | 43 +- .../mir/src/pass/execution/traversal/mod.rs | 35 +- .../mir/src/pass/execution/traversal/tests.rs | 8 +- 10 files changed, 711 insertions(+), 268 deletions(-) create mode 100644 libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs rename libs/@local/hashql/mir/src/pass/execution/{cost.rs => cost/mod.rs} (66%) create mode 100644 libs/@local/hashql/mir/src/pass/execution/cost/tests.rs diff --git a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs new file mode 100644 index 00000000000..89452833a59 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs @@ -0,0 +1,547 @@ +use core::alloc::Allocator; + +use super::{ApproxCost, StatementCostVec}; +use crate::{ + body::basic_block::{BasicBlock, BasicBlockId, BasicBlockSlice, BasicBlockVec}, + pass::{ + analysis::size_estimation::{InformationRange, range::SaturatingMul as _}, + execution::{ + TargetId, VertexType, + target::{TargetArray, TargetBitSet}, + traversal::{ + TransferCostConfig, TraversalAnalysisVisitor, TraversalPathBitSet, TraversalResult, + }, + }, + }, + visit::Visitor as _, +}; + +#[derive(Debug, Copy, Clone)] +struct BasicBlockTargetCost { + base: ApproxCost, + load: ApproxCost, +} + +impl BasicBlockTargetCost { + const ZERO: Self = Self { + base: ApproxCost::ZERO, + load: ApproxCost::ZERO, + }; + + fn total(self) -> ApproxCost { + self.base + self.load + } +} + +#[derive(Debug, Copy, Clone)] +struct BasicBlockCost { + targets: TargetBitSet, + costs: TargetArray, +} + +#[derive(Debug)] +pub(crate) struct BasicBlockCostVec { + inner: BasicBlockVec, +} + +impl BasicBlockCostVec { + pub(crate) fn assignments(&self, block: BasicBlockId) -> TargetBitSet { + self.inner[block].targets + } + + pub(crate) fn cost(&self, block: BasicBlockId, target: TargetId) -> ApproxCost { + let entry = &self.inner[block]; + + debug_assert!( + entry.targets.contains(target), + "target {target:?} is not in the domain of block {block:?}" + ); + + entry.costs[target].total() + } +} + +pub(crate) struct BasicBlockCostAnalysis<'ctx, A: Allocator> { + pub vertex: VertexType, + pub assignments: &'ctx BasicBlockSlice, + pub costs: &'ctx TargetArray>, +} + +impl BasicBlockCostAnalysis<'_, A> { + fn analyze_basic_block_target( + &self, + config: &TransferCostConfig, + id: BasicBlockId, + target: TargetId, + traversals: TraversalPathBitSet, + ) -> BasicBlockTargetCost { + let base = self.costs[target].sum_approx(id); + + let mut range = InformationRange::zero(); + + // For *any* target that is *not* able to be assigned in this block, add the cost to the + // total range. + for path in &traversals { + if !path.origin().contains(target) { + range += path.estimate_size(config); + } + } + + let load = range + .saturating_mul(config.target_multiplier[target].get()) + .midpoint() + .map_or(ApproxCost::INF, From::from); + + BasicBlockTargetCost { base, load } + } + + fn analyze_basic_block( + &self, + config: &TransferCostConfig, + id: BasicBlockId, + block: &BasicBlock<'_>, + ) -> BasicBlockCost { + let targets = self.assignments[id]; + let mut costs = TargetArray::from_raw([BasicBlockTargetCost::ZERO; _]); + + // We do not expand to the leave nodes on purpose, we work under the assumption that any + // composite path that is given is more efficient than it's individual components and will + // always be fetched together, therefore the cost of the parent must be used to accurately + // describe the cost. If a node can be used in multiple places at the same time, then fetch + // from the composite will always be preferred. + let mut traversals = TraversalPathBitSet::empty(self.vertex); + let mut visitor = TraversalAnalysisVisitor::new(self.vertex, |_, result| match result { + TraversalResult::Path(path) => traversals.insert(path), + TraversalResult::Complete => traversals.insert_all(), + }); + Ok(()) = visitor.visit_basic_block(id, block); + + for target in &targets { + costs[target] = self.analyze_basic_block_target(config, id, target, traversals); + } + + BasicBlockCost { targets, costs } + } + + pub(crate) fn analyze_in( + &self, + config: &TransferCostConfig, + blocks: &BasicBlockSlice>, + alloc: A, + ) -> BasicBlockCostVec { + let inner = BasicBlockVec::from_domain_derive_in( + |id, block| self.analyze_basic_block(config, id, block), + blocks, + alloc, + ); + + BasicBlockCostVec { inner } + } +} + +#[cfg(test)] +mod tests { + use alloc::alloc::Global; + + use hashql_core::{heap::Heap, symbol::sym, r#type::environment::Environment}; + + use super::*; + use crate::{ + body::basic_block::BasicBlockId, + builder::body, + intern::Interner, + pass::{ + analysis::size_estimation::InformationRange, execution::traversal::TransferCostConfig, + }, + }; + + fn all_targets() -> TargetBitSet { + let mut set = TargetBitSet::new_empty(TargetId::VARIANT_COUNT_U32); + for target in TargetId::all() { + set.insert(target); + } + set + } + + fn default_config() -> TransferCostConfig { + TransferCostConfig::new(InformationRange::full()) + } + + fn make_targets(body: &crate::body::Body<'_>, domain: TargetBitSet) -> Vec { + body.basic_blocks.iter().map(|_| domain).collect() + } + + /// A block with no vertex accesses has zero load cost on every target. + #[test] + fn no_vertex_access_zero_load() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (Int), vertex: [Opaque sym::path::Entity; ?], val: Int; + @proj env_0 = env.0: Int; + + bb0() { + val = load env_0; + return val; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&default_config(), &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + for target in TargetId::all() { + let total = result.cost(bb0, target); + let base = costs[target].sum_approx(bb0); + assert_eq!(total, base, "target {target:?} should have zero load cost"); + } + } + + /// Accessing Vectors (Embedding-origin) charges load on Interpreter and Postgres + /// but not on Embedding. + #[test] + fn vectors_path_charges_non_origin_targets() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj encodings = vertex.encodings: ?, + vectors = encodings.vectors: ?; + + bb0() { + val = load vectors; + return val; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let config = default_config(); + let result = analysis.analyze_in(&config, &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + let embedding_cost = result.cost(bb0, TargetId::Embedding); + let embedding_base = costs[TargetId::Embedding].sum_approx(bb0); + assert_eq!( + embedding_cost, embedding_base, + "Embedding is origin for Vectors; no load premium" + ); + + let interpreter_cost = result.cost(bb0, TargetId::Interpreter); + let interpreter_base = costs[TargetId::Interpreter].sum_approx(bb0); + assert!( + interpreter_cost > interpreter_base, + "Interpreter should pay load premium for Vectors" + ); + + let postgres_cost = result.cost(bb0, TargetId::Postgres); + let postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + assert!( + postgres_cost > postgres_base, + "Postgres should pay load premium for Vectors" + ); + } + + /// Accessing Archived (Postgres-origin) charges load on non-Postgres targets. + #[test] + fn postgres_path_charges_non_postgres_targets() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: Bool; + @proj metadata = vertex.metadata: ?, archived = metadata.archived: Bool; + + bb0() { + val = load archived; + return val; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&default_config(), &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + let postgres_cost = result.cost(bb0, TargetId::Postgres); + let postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + assert_eq!( + postgres_cost, postgres_base, + "Postgres is origin for Archived; no load premium" + ); + + let interpreter_cost = result.cost(bb0, TargetId::Interpreter); + let interpreter_base = costs[TargetId::Interpreter].sum_approx(bb0); + assert!( + interpreter_cost > interpreter_base, + "Interpreter should pay load premium for Archived" + ); + + let embedding_cost = result.cost(bb0, TargetId::Embedding); + let embedding_base = costs[TargetId::Embedding].sum_approx(bb0); + assert!( + embedding_cost > embedding_base, + "Embedding should pay load premium for Archived" + ); + } + + /// Properties (Postgres) + Vectors (Embedding) in one block: Interpreter pays both + /// premiums, Postgres pays only Vectors, Embedding pays only Properties. + #[test] + fn multiple_paths_accumulate_load() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, ?) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, ?); + @proj properties = vertex.properties: ?, + encodings = vertex.encodings: ?, + vectors = encodings.vectors: ?; + + bb0() { + result = tuple properties, vectors; + return result; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&default_config(), &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + let interpreter_cost = result.cost(bb0, TargetId::Interpreter); + let postgres_cost = result.cost(bb0, TargetId::Postgres); + let embedding_cost = result.cost(bb0, TargetId::Embedding); + + // Interpreter pays both premiums, so it's the most expensive + assert!( + interpreter_cost > postgres_cost, + "Interpreter pays both premiums, Postgres only Vectors" + ); + assert!( + interpreter_cost > embedding_cost, + "Interpreter pays both premiums, Embedding only Properties" + ); + + // Both Postgres and Embedding pay above their base + let postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + let embedding_base = costs[TargetId::Embedding].sum_approx(bb0); + assert!(postgres_cost > postgres_base); + assert!(embedding_cost > embedding_base); + } + + /// RecordId (composite) expands to leaf descendants. All leaves are Postgres-origin, + /// so Postgres pays no premium and Interpreter does. + #[test] + fn composite_path_expands_to_leaves() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj metadata = vertex.metadata: ?, + record_id = metadata.record_id: ?; + + bb0() { + val = load record_id; + return val; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + // Use zero properties size so Properties path doesn't contribute noise + let config = TransferCostConfig::new(InformationRange::zero()); + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&config, &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + let postgres_cost = result.cost(bb0, TargetId::Postgres); + let postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + assert_eq!( + postgres_cost, postgres_base, + "Postgres is origin for all RecordId leaves" + ); + + let interpreter_cost = result.cost(bb0, TargetId::Interpreter); + let interpreter_base = costs[TargetId::Interpreter].sum_approx(bb0); + assert!( + interpreter_cost > interpreter_base, + "Interpreter should pay load premium for RecordId leaves" + ); + } + + /// With a restricted target domain, only available targets are analyzed. + #[test] + fn restricted_target_domain() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: Bool; + @proj metadata = vertex.metadata: ?, archived = metadata.archived: Bool; + + bb0() { + val = load archived; + return val; + } + }); + + let mut restricted = TargetBitSet::new_empty(TargetId::VARIANT_COUNT_U32); + restricted.insert(TargetId::Postgres); + restricted.insert(TargetId::Interpreter); + let targets = make_targets(&body, restricted); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&default_config(), &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + let postgres_cost = result.cost(bb0, TargetId::Postgres); + let postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + assert_eq!(postgres_cost, postgres_base); + + let interpreter_cost = result.cost(bb0, TargetId::Interpreter); + let interpreter_base = costs[TargetId::Interpreter].sum_approx(bb0); + assert!(interpreter_cost > interpreter_base); + } + + /// Paths across multiple blocks are analyzed independently per block. + #[test] + fn paths_across_blocks_independent() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + props: ?, val: Bool, cond: Bool; + @proj properties = vertex.properties: ?, + encodings = vertex.encodings: ?, + vectors = encodings.vectors: ?; + + bb0() { + props = load properties; + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + val = load vectors; + return val; + }, + bb2() { + return cond; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([2, 1, 0].into_iter(), Global)); + + let config = default_config(); + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&config, &body.basic_blocks, Global); + + let bb0 = BasicBlockId::new(0); + let bb1 = BasicBlockId::new(1); + let bb2 = BasicBlockId::new(2); + + // bb0 accesses Properties (Postgres-origin): Postgres no premium, others pay + let bb0_postgres = result.cost(bb0, TargetId::Postgres); + let bb0_postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + assert_eq!(bb0_postgres, bb0_postgres_base); + + let bb0_interpreter = result.cost(bb0, TargetId::Interpreter); + let bb0_interpreter_base = costs[TargetId::Interpreter].sum_approx(bb0); + assert!(bb0_interpreter > bb0_interpreter_base); + + // bb1 accesses Vectors (Embedding-origin): Embedding no premium, others pay + let bb1_embedding = result.cost(bb1, TargetId::Embedding); + let bb1_embedding_base = costs[TargetId::Embedding].sum_approx(bb1); + assert_eq!(bb1_embedding, bb1_embedding_base); + + let bb1_postgres = result.cost(bb1, TargetId::Postgres); + let bb1_postgres_base = costs[TargetId::Postgres].sum_approx(bb1); + assert!(bb1_postgres > bb1_postgres_base); + + // bb2 has no vertex accesses: all targets equal base + for target in TargetId::all() { + let cost = result.cost(bb2, target); + let base = costs[target].sum_approx(bb2); + assert_eq!(cost, base, "bb2 target {target:?} should have zero load"); + } + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/cost.rs b/libs/@local/hashql/mir/src/pass/execution/cost/mod.rs similarity index 66% rename from libs/@local/hashql/mir/src/pass/execution/cost.rs rename to libs/@local/hashql/mir/src/pass/execution/cost/mod.rs index 3295738e00f..62b3b200a40 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost/mod.rs @@ -8,33 +8,22 @@ use core::{ alloc::Allocator, fmt, iter::Sum, - mem, ops::{Add, AddAssign, Index, IndexMut, Mul, MulAssign}, }; use std::f32; -use hashql_core::id::{IdSlice, bit_vec::FiniteBitSet}; - -use super::{ - TargetId, VertexType, block_partitioned_vec::BlockPartitionedVec, target::TargetArray, - traversal::TransferCostConfig, -}; +pub(crate) use self::analysis::{BasicBlockCostAnalysis, BasicBlockCostVec}; +use super::block_partitioned_vec::BlockPartitionedVec; use crate::{ - body::{ - basic_block::{BasicBlock, BasicBlockId, BasicBlockSlice, BasicBlockVec}, - basic_blocks::BasicBlocks, - location::Location, - }, + body::{basic_block::BasicBlockId, basic_blocks::BasicBlocks, location::Location}, macros::{forward_ref_binop, forward_ref_op_assign}, - pass::{ - analysis::size_estimation::{InformationRange, range::SaturatingMul}, - execution::traversal::{ - EntityPathBitSet, TraversalAnalysisVisitor, TraversalPathBitSet, TraversalResult, - }, - }, - visit::Visitor, + pass::analysis::size_estimation::InformationUnit, }; +mod analysis; +#[cfg(test)] +mod tests; + /// Execution cost for a statement on a particular target. /// /// Lower values indicate cheaper execution. When multiple targets can execute a statement, the @@ -248,6 +237,13 @@ impl From for ApproxCost { } } +impl From for ApproxCost { + fn from(value: InformationUnit) -> Self { + #[expect(clippy::cast_precision_loss)] + Self(value.as_u32() as f32) + } +} + impl fmt::Display for ApproxCost { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&self.0, fmt) @@ -409,171 +405,3 @@ impl IndexMut for StatementCostVec { &mut self.0.of_mut(index.block)[index.statement_index - 1] } } - -#[derive(Debug, Copy, Clone)] -pub struct BlockCost { - base: ApproxCost, - load: TargetArray, -} - -impl BlockCost { - fn zero() -> Self { - Self { - base: ApproxCost::ZERO, - load: TargetArray::from_raw([ApproxCost::ZERO; _]), - } - } - - fn of(&self, target: TargetId) -> ApproxCost { - self.base + self.load[target] - } -} - -pub struct BlockCostVec { - inner: BasicBlockVec, -} - -impl BlockCostVec { - pub fn new_in<'heap>( - vertex: VertexType, - blocks: &BasicBlockSlice>, - statement: &StatementCostVec, - config: &TransferCostConfig, - alloc: A, - ) -> Self { - let inner = BasicBlockVec::from_domain_derive_in( - |id, block| { - let base = statement.sum_approx(id); - - let mut bitset = TraversalPathBitSet::empty(vertex); - let mut visitor = TraversalAnalysisVisitor::new(vertex, |_, result| match result { - TraversalResult::Path(path) => bitset.insert(path), - TraversalResult::Complete => bitset.insert_all(), - }); - Ok(()) = visitor.visit_basic_block(id, block); - - let mut load = TargetArray::from_raw([InformationRange::zero(); _]); - - match bitset { - TraversalPathBitSet::Entity(bitset) => { - let leafs = bitset.to_leaves(); - - for leaf in &leafs { - let mut remote = leaf.origin(); - remote.negate(TargetId::VARIANT_COUNT_U32); - - for remote in &remote { - let cost = leaf - .transfer_size(config) - .saturating_mul(config.target_multiplier[remote].get()); - - load[remote] += cost; - } - } - } - } - - // for each load calculate the cost, assuming a cardinality of 1 (entity size does - // not effect the cost analytics, because each entity is processed individually in - // the closure context) - #[expect(clippy::cast_precision_loss)] - let load = load.map(|range| { - let Some(average) = range.midpoint() else { - return ApproxCost::INF; - }; - - ApproxCost::new(average.as_u32() as f32) - .unwrap_or_else(|| unreachable!("the value is always non-NaN")) - }); - - BlockCost { base, load } - }, - blocks, - alloc, - ); - - Self { inner } - } -} - -#[cfg(test)] -mod tests { - use alloc::alloc::Global; - - use super::{Cost, StatementCostVec}; - use crate::body::{basic_block::BasicBlockId, location::Location}; - - /// `Cost::new` succeeds for valid values (0 and 100). - #[test] - fn cost_new_valid_values() { - let zero = Cost::new(0); - assert!(zero.is_some()); - - let hundred = Cost::new(100); - assert!(hundred.is_some()); - } - - /// `Cost::new(u32::MAX)` returns `None` (reserved as niche for `Option`). - #[test] - fn cost_new_max_returns_none() { - let max = Cost::new(u32::MAX); - assert!(max.is_none()); - } - - /// `Cost::new(u32::MAX - 1)` succeeds (largest valid cost value). - #[test] - fn cost_new_max_minus_one_valid() { - let max_valid = Cost::new(u32::MAX - 1); - assert!(max_valid.is_some()); - } - - /// `Cost::new_unchecked` with valid values works correctly. - /// - /// This test exercises unsafe code and should be run under Miri. - #[test] - #[expect(unsafe_code)] - fn cost_new_unchecked_valid() { - // SAFETY: 0 is not u32::MAX - let zero = unsafe { Cost::new_unchecked(0) }; - assert_eq!(Cost::new(0), Some(zero)); - - // SAFETY: 100 is not u32::MAX - let hundred = unsafe { Cost::new_unchecked(100) }; - assert_eq!(Cost::new(100), Some(hundred)); - } - - /// `StatementCostVec` uses 1-based `Location` indexing to address the underlying - /// 0-based `BlockPartitionedVec`. - #[test] - fn statement_cost_vec_location_indexing() { - let mut costs = StatementCostVec::from_iter([2, 3].into_iter(), Global); - - let loc_0_1 = Location { - block: BasicBlockId::new(0), - statement_index: 1, - }; - let loc_0_2 = Location { - block: BasicBlockId::new(0), - statement_index: 2, - }; - let loc_1_2 = Location { - block: BasicBlockId::new(1), - statement_index: 2, - }; - - costs[loc_0_1] = Some(cost!(10)); - costs[loc_0_2] = Some(cost!(20)); - costs[loc_1_2] = Some(cost!(30)); - - assert_eq!(costs.get(loc_0_1), Some(cost!(10))); - assert_eq!(costs.get(loc_0_2), Some(cost!(20))); - assert_eq!(costs.get(loc_1_2), Some(cost!(30))); - - // Unassigned location returns None - let loc_1_1 = Location { - block: BasicBlockId::new(1), - statement_index: 1, - }; - assert_eq!(costs.get(loc_1_1), None); - } -} diff --git a/libs/@local/hashql/mir/src/pass/execution/cost/tests.rs b/libs/@local/hashql/mir/src/pass/execution/cost/tests.rs new file mode 100644 index 00000000000..6f9588f2f00 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/cost/tests.rs @@ -0,0 +1,78 @@ +use alloc::alloc::Global; + +use super::{Cost, StatementCostVec}; +use crate::body::{basic_block::BasicBlockId, location::Location}; + +/// `Cost::new` succeeds for valid values (0 and 100). +#[test] +fn cost_new_valid_values() { + let zero = Cost::new(0); + assert!(zero.is_some()); + + let hundred = Cost::new(100); + assert!(hundred.is_some()); +} + +/// `Cost::new(u32::MAX)` returns `None` (reserved as niche for `Option`). +#[test] +fn cost_new_max_returns_none() { + let max = Cost::new(u32::MAX); + assert!(max.is_none()); +} + +/// `Cost::new(u32::MAX - 1)` succeeds (largest valid cost value). +#[test] +fn cost_new_max_minus_one_valid() { + let max_valid = Cost::new(u32::MAX - 1); + assert!(max_valid.is_some()); +} + +/// `Cost::new_unchecked` with valid values works correctly. +/// +/// This test exercises unsafe code and should be run under Miri. +#[test] +#[expect(unsafe_code)] +fn cost_new_unchecked_valid() { + // SAFETY: 0 is not u32::MAX + let zero = unsafe { Cost::new_unchecked(0) }; + assert_eq!(Cost::new(0), Some(zero)); + + // SAFETY: 100 is not u32::MAX + let hundred = unsafe { Cost::new_unchecked(100) }; + assert_eq!(Cost::new(100), Some(hundred)); +} + +/// `StatementCostVec` uses 1-based `Location` indexing to address the underlying +/// 0-based `BlockPartitionedVec`. +#[test] +fn statement_cost_vec_location_indexing() { + let mut costs = StatementCostVec::from_iter([2, 3].into_iter(), Global); + + let loc_0_1 = Location { + block: BasicBlockId::new(0), + statement_index: 1, + }; + let loc_0_2 = Location { + block: BasicBlockId::new(0), + statement_index: 2, + }; + let loc_1_2 = Location { + block: BasicBlockId::new(1), + statement_index: 2, + }; + + costs[loc_0_1] = Some(cost!(10)); + costs[loc_0_2] = Some(cost!(20)); + costs[loc_1_2] = Some(cost!(30)); + + assert_eq!(costs.get(loc_0_1), Some(cost!(10))); + assert_eq!(costs.get(loc_0_2), Some(cost!(20))); + assert_eq!(costs.get(loc_1_2), Some(cost!(30))); + + // Unassigned location returns None + let loc_1_1 = Location { + block: BasicBlockId::new(1), + statement_index: 1, + }; + assert_eq!(costs.get(loc_1_1), None); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index fbeac816c75..22647ab3cee 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -23,14 +23,8 @@ use core::{alloc::Allocator, assert_matches}; use hashql_core::heap::{BumpAllocator, Heap}; -pub use self::{ - cost::{ApproxCost, Cost}, - island::{Island, IslandId, IslandVec}, - placement::error::PlacementDiagnosticCategory, - target::TargetId, - vertex::VertexType, -}; use self::{ + cost::BasicBlockCostAnalysis, fusion::BasicBlockFusion, island::IslandPlacement, placement::{ArcConsistency, PlacementSolverContext}, @@ -40,6 +34,13 @@ use self::{ terminator_placement::TerminatorPlacement, traversal::TransferCostConfig, }; +pub use self::{ + cost::{ApproxCost, Cost}, + island::{Island, IslandId, IslandVec}, + placement::error::PlacementDiagnosticCategory, + target::TargetId, + vertex::VertexType, +}; use super::analysis::size_estimation::BodyFootprint; use crate::{ body::{Body, Source, basic_block::BasicBlockVec, local::Local}, @@ -85,13 +86,24 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { let mut statement_costs = statement_costs.map(|cost| cost.unwrap_or_else(|| unreachable!())); - let mut possibilities = BasicBlockSplitting::new_in(&self.scratch).split_in( + let mut assignments = BasicBlockSplitting::new_in(&self.scratch).split_in( context, body, &mut statement_costs, &self.scratch, ); + let block_costs = BasicBlockCostAnalysis { + vertex, + assignments: &assignments, + costs: &statement_costs, + } + .analyze_in( + &TransferCostConfig::new(InformationRange::full()), + &body.basic_blocks, + &self.scratch, + ); + let terminators = TerminatorPlacement::new_in( TransferCostConfig::new(InformationRange::full()), &self.scratch, @@ -100,19 +112,18 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { body, vertex, &self.footprints[body.id], - &possibilities, + &assignments, &self.scratch, ); ArcConsistency { - blocks: &mut possibilities, + blocks: &mut assignments, terminators: &mut terminator_costs, } .run_in(body, &self.scratch); let mut solver = PlacementSolverContext { - assignment: &possibilities, - statements: &statement_costs, + blocks: &block_costs, terminators: &terminator_costs, } .build_in(body, &self.scratch); diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs index 6e84a40bfbd..620d27debf4 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs @@ -156,7 +156,7 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> id: member, heap: TargetHeap::new(), target: HeapElement::EMPTY, - possible: self.solver.data.assignment[member], + possible: self.solver.data.blocks.assignments(member), } } } @@ -283,7 +283,7 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> fn replay_narrowing(&mut self, body: &Body<'_>) { // Reset unfixed domains to their original AC-3 state for block in &mut self.region.blocks[self.depth..] { - block.possible = self.solver.data.assignment[block.id]; + block.possible = self.solver.data.blocks.assignments(block.id); } self.region.fixed.clear(); @@ -418,10 +418,7 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> let mut min_stmt = ApproxCost::INF; for target in &block.possible { - min_stmt = cmp::min( - min_stmt, - self.solver.data.statements[target].sum_approx(block.id), - ); + min_stmt = cmp::min(min_stmt, self.solver.data.blocks.cost(block.id, target)); } if min_stmt < ApproxCost::INF { diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs index 5c69ddd2268..6abdf91474f 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs @@ -190,13 +190,12 @@ where let mut current_minimum = ApproxCost::INF; let mut minimum_transition_cost = None; - for target in &self.solver.data.assignment[edge.target.block] { + for target in &self.solver.data.blocks.assignments(edge.target.block) { let Some(cost) = edge.matrix.get(source, target) else { continue; }; - let mut block_cost = - self.solver.data.statements[target].sum_approx(edge.target.block); + let mut block_cost = self.solver.data.blocks.cost(edge.target.block, target); block_cost += cost; if block_cost < current_minimum { @@ -212,13 +211,12 @@ where let mut current_minimum = ApproxCost::INF; let mut minimum_transition_cost = None; - for source in &self.solver.data.assignment[edge.source.block] { + for source in &self.solver.data.blocks.assignments(edge.source.block) { let Some(cost) = edge.matrix.get(source, target) else { continue; }; - let mut block_cost = - self.solver.data.statements[source].sum_approx(edge.source.block); + let mut block_cost = self.solver.data.blocks.cost(edge.source.block, source); block_cost += cost; if block_cost < current_minimum { @@ -252,7 +250,7 @@ where // so that join edges get proportional influence without frequency data. // If a neighbor has no assignment yet, we optimistically assume its best local option. // Returns `None` if any assigned neighbor lacks a valid transition to this target. - let mut cost = self.solver.data.statements[target].sum_approx(block); + let mut cost = self.solver.data.blocks.cost(block, target); for pred in body.basic_blocks.predecessors(block) { if pred == block { @@ -328,7 +326,7 @@ where ) -> TargetHeap { let mut heap = TargetHeap::new(); - for target in &self.solver.data.assignment[block] { + for target in &self.solver.data.blocks.assignments(block) { if let Some(cost) = self.estimate_target(body, region, block, target) { heap.insert(target, cost); } diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs index cb567828f97..18eba8c9439 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs @@ -33,9 +33,7 @@ use crate::{ }, context::MirContext, pass::execution::{ - ApproxCost, - cost::StatementCostVec, - target::{TargetArray, TargetBitSet, TargetId}, + ApproxCost, cost::BasicBlockCostVec, target::TargetId, terminator_placement::TerminatorCostVec, }, }; @@ -88,8 +86,8 @@ fn back_edge_span(body: &Body<'_>, members: &[BasicBlockId]) -> SpanId { /// (`statements`), and terminator transition costs (`terminators`). #[derive(Debug, Copy, Clone)] pub(crate) struct PlacementSolverContext<'ctx, A: Allocator> { - pub assignment: &'ctx BasicBlockSlice, - pub statements: &'ctx TargetArray>, + pub blocks: &'ctx BasicBlockCostVec, + pub terminators: &'ctx TerminatorCostVec, } diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index f9e7426ca4e..666aa022682 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -309,7 +309,7 @@ impl EntityPath { /// entity schema. [`Properties`](Self::Properties) depends on the entity's type parameter. /// [`PropertyMetadata`](Self::PropertyMetadata) is estimated at 1/4 of properties size, /// since it stores lightweight per-property-key metadata rather than values. - pub(crate) fn transfer_size(self, config: &TransferCostConfig) -> InformationRange { + pub(crate) fn estimate_size(self, config: &TransferCostConfig) -> InformationRange { #[expect(clippy::match_same_arms, reason = "readability")] #[expect(clippy::integer_division)] match self { @@ -478,47 +478,6 @@ impl EntityPathBitSet { pub(crate) const fn insert_all(&mut self) { *self = Self::TOP; } - - /// Expands composite paths to their leaf descendants. - /// - /// Composites like [`RecordId`](EntityPath::RecordId) are replaced by their leaf children - /// (e.g. [`WebId`](EntityPath::WebId), [`EntityUuid`](EntityPath::EntityUuid), etc.). - /// Leaf paths are kept as-is. - /// - /// This works because the costs are purely cumulative, therefore any composite path can be - /// expanded to its leaf descendants without affecting the total cost. This allows for greater - /// precision during cost estimation, to allow for attributing the cost if something is - /// available on multiple backends. - #[expect(clippy::cast_possible_truncation)] - pub(crate) fn to_leaves(self) -> FiniteBitSet { - let mut result = FiniteBitSet::new_empty(core::mem::variant_count::() as u32); - - for path in &self.0 { - if path.children().is_empty() { - result.insert(path); - continue; - } - - for &child in path.children() { - if child.children().is_empty() { - result.insert(child); - } - } - } - - result - } - - /// Sums the [`transfer_size`](EntityPath::transfer_size) of every path in this set. - pub(crate) fn transfer_size(self, config: &TransferCostConfig) -> InformationRange { - let mut total = InformationRange::zero(); - - for path in &self.0 { - total += path.transfer_size(config); - } - - total - } } impl HasTop for TraversalLattice { diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index 183d15eaf86..d0999c80df8 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -18,7 +18,7 @@ pub(crate) use analysis::{TraversalAnalysisVisitor, TraversalResult}; pub use self::entity::{EntityPath, EntityPathBitSet}; pub(crate) use self::{access::Access, entity::TransferCostConfig}; -use super::VertexType; +use super::{VertexType, target::TargetBitSet}; use crate::pass::analysis::{ dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}, size_estimation::InformationRange, @@ -128,11 +128,21 @@ impl TraversalPathBitSet { } } - /// Sums the [`transfer_size`](EntityPath::transfer_size) of every path in this set. + #[must_use] #[inline] - pub(crate) fn transfer_size(self, config: &TransferCostConfig) -> InformationRange { + pub fn iter(&self) -> impl ExactSizeIterator { + self.into_iter() + } +} + +impl IntoIterator for &TraversalPathBitSet { + type Item = TraversalPath; + + type IntoIter = impl ExactSizeIterator; + + fn into_iter(self) -> Self::IntoIter { match self { - Self::Entity(entity_paths) => entity_paths.transfer_size(config), + TraversalPathBitSet::Entity(bitset) => bitset.into_iter().map(TraversalPath::Entity), } } } @@ -193,3 +203,20 @@ pub enum TraversalPath { /// A path into the entity schema. Entity(EntityPath), } + +impl TraversalPath { + #[inline] + #[must_use] + pub const fn origin(self) -> TargetBitSet { + match self { + Self::Entity(path) => path.origin(), + } + } + + #[inline] + pub(crate) fn estimate_size(self, config: &TransferCostConfig) -> InformationRange { + match self { + Self::Entity(path) => path.estimate_size(config), + } + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs index 694c280fc1d..07217d8f6d2 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs @@ -496,14 +496,14 @@ fn composite_transfer_size_matches_children() { for path in EntityPath::all() { if path.ancestors().first() == Some(&composite) { - expected += path.transfer_size(&config); + expected += path.estimate_size(&config); has_children = true; } } if has_children { assert_eq!( - composite.transfer_size(&config), + composite.estimate_size(&config), expected, "{composite:?} transfer_size doesn't match sum of immediate children" ); @@ -520,8 +520,8 @@ fn inferred_provenance_transfer_size_is_static() { let small_config = TransferCostConfig::new(InformationRange::zero()); let large_config = TransferCostConfig::new(InformationRange::value(InformationUnit::new(1000))); - let small = EntityPath::ProvenanceInferred.transfer_size(&small_config); - let large = EntityPath::ProvenanceInferred.transfer_size(&large_config); + let small = EntityPath::ProvenanceInferred.estimate_size(&small_config); + let large = EntityPath::ProvenanceInferred.estimate_size(&large_config); assert_eq!(small, large); assert_eq!( From bcd450e90eaa7dca4c03395166c653105074cbd6 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Mon, 2 Mar 2026 21:15:55 +0100 Subject: [PATCH 26/32] feat: checkpoint --- .../mir/src/pass/execution/cost/analysis.rs | 31 +++++ .../execution/placement/solve/csp/tests.rs | 121 ++++++++---------- .../placement/solve/estimate/tests.rs | 24 ++-- .../pass/execution/placement/solve/tests.rs | 60 ++++++--- ...ed_postgres_embedding_interpreter.snap.new | 10 -- ...non_traversal_unaffected_by_costs.snap.new | 18 +++ .../traversal_multiple_paths_cost.snap.new | 16 +++ .../traversal_single_path_cost.snap.new | 16 +++ ...traversal_swallowing_reduces_cost.snap.new | 14 ++ 9 files changed, 194 insertions(+), 116 deletions(-) delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new create mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap.new create mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap.new create mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap.new create mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap.new diff --git a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs index 89452833a59..94c5da5494f 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs @@ -61,6 +61,37 @@ impl BasicBlockCostVec { } } +impl BasicBlockCostVec { + /// Constructs a `BasicBlockCostVec` from per-block target domains and per-target statement + /// costs, without traversal analysis. + #[cfg(test)] + pub(crate) fn from_statements( + domains: &[TargetBitSet], + statements: &TargetArray>, + alloc: A, + ) -> Self { + let inner = BasicBlockVec::from_fn_in( + domains.len(), + |id: BasicBlockId| { + let targets = domains[id.as_usize()]; + let mut costs = TargetArray::from_raw([BasicBlockTargetCost::ZERO; _]); + + for target in &targets { + costs[target] = BasicBlockTargetCost { + base: statements[target].sum_approx(id), + load: ApproxCost::ZERO, + }; + } + + BasicBlockCost { targets, costs } + }, + alloc, + ); + + Self { inner } + } +} + pub(crate) struct BasicBlockCostAnalysis<'ctx, A: Allocator> { pub vertex: VertexType, pub assignments: &'ctx BasicBlockSlice, diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs index f77da3e3e38..d99d752cbc3 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs @@ -6,7 +6,7 @@ use hashql_core::{heap::Heap, id::IdArray, r#type::environment::Environment}; use super::{super::PlacementSolver, CyclicPlacementRegion}; use crate::{ - body::{basic_block::BasicBlockSlice, location::Location}, + body::location::Location, builder::body, intern::Interner, pass::execution::{ @@ -16,7 +16,9 @@ use crate::{ PlacementRegionId, PlacementSolverContext, condensation::PlacementRegionKind, csp::ConstraintSatisfaction, - tests::{all_targets, bb, fix_block, stmt_costs, target_set, terminators}, + tests::{ + all_targets, bb, fix_block, make_block_costs, stmt_costs, target_set, terminators, + }, }, target::{TargetArray, TargetId}, terminator_placement::{TerminatorCostVec, TransMatrix}, @@ -78,10 +80,9 @@ fn narrow_restricts_successor_domain() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -129,10 +130,9 @@ fn narrow_restricts_predecessor_domain() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -180,10 +180,9 @@ fn narrow_to_empty_domain() { bb(1): [complete(1)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -234,10 +233,9 @@ fn narrow_multiple_edges_intersect() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -294,10 +292,9 @@ fn replay_narrowing_resets_then_repropagates() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -373,10 +370,9 @@ fn lower_bound_min_statement_cost_per_block() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -429,10 +425,9 @@ fn lower_bound_min_transition_cost_per_edge() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -484,10 +479,9 @@ fn lower_bound_skips_self_loop_edges() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -538,10 +532,9 @@ fn lower_bound_fixed_successor_uses_concrete_target() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -595,10 +588,9 @@ fn lower_bound_all_fixed_returns_zero() { bb(1): [complete(1)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -651,10 +643,9 @@ fn mrv_selects_smallest_domain() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -702,10 +693,9 @@ fn mrv_tiebreak_by_constraint_degree() { bb(2): [complete(1)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -756,10 +746,9 @@ fn mrv_skips_fixed_blocks() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -814,10 +803,9 @@ fn greedy_solves_two_block_loop() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -869,10 +857,9 @@ fn greedy_rollback_finds_alternative() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -927,10 +914,9 @@ fn greedy_fails_when_infeasible() { bb(1): [P->P = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -985,10 +971,9 @@ fn bnb_finds_optimal() { bb(2): [diagonal(0), I->P = 20, P->I = 20] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1041,10 +1026,9 @@ fn bnb_retains_ranked_solutions() { bb(1): [diagonal(0)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1118,10 +1102,9 @@ fn bnb_pruning_preserves_optimal() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1175,10 +1158,9 @@ fn retry_returns_ranked_solutions_in_order() { bb(1): [diagonal(0), I->P = 5, P->I = 5] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1246,10 +1228,9 @@ fn retry_exhausts_then_perturbs() { bb(1): [diagonal(0)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1314,10 +1295,9 @@ fn greedy_rollback_on_empty_heap() { bb(1): [I->P = 0, I->I = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1385,10 +1365,9 @@ fn retry_perturbation_after_ranked_exhaustion() { bb(1): [complete(0)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs index 43d7b64ea49..d16e35d5165 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs @@ -5,12 +5,12 @@ use hashql_core::{heap::Heap, id::IdArray, r#type::environment::Environment}; use super::{ super::{ PlacementSolverContext, - tests::{bb, find_region_of, stmt_costs, target_set, terminators}, + tests::{bb, find_region_of, make_block_costs, stmt_costs, target_set, terminators}, }, *, }; use crate::{ - body::{basic_block::BasicBlockSlice, location::Location}, + body::location::Location, builder::body, intern::Interner, pass::execution::{ @@ -163,10 +163,9 @@ fn self_loop_edges_excluded_from_cost() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let solver = data.build_in(&body, &heap); @@ -228,10 +227,9 @@ fn boundary_multiplier_applied_to_cross_region_edges() { bb(1): [diagonal(0), I->P = 0, P->I = 20] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let solver = data.build_in(&body, &heap); @@ -301,10 +299,9 @@ fn infeasible_transition_returns_none() { bb(0): [I->I = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let solver = data.build_in(&body, &heap); @@ -367,10 +364,9 @@ fn unassigned_neighbor_uses_heuristic_minimum() { bb(0): [diagonal(0), I->P = 10, P->I = 5] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; // bb0 is NOT assigned — determine_target returns None diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs index d075c605474..b4ff3729567 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs @@ -23,12 +23,16 @@ use crate::{ context::MirContext, error::MirDiagnosticCategory, intern::Interner, - pass::execution::{ - ApproxCost, Cost, - cost::StatementCostVec, - placement::error::PlacementDiagnosticCategory, - target::{TargetArray, TargetBitSet, TargetId}, - terminator_placement::{TerminatorCostVec, TransMatrix}, + pass::{ + analysis::size_estimation::InformationRange, + execution::{ + ApproxCost, Cost, VertexType, + cost::{BasicBlockCostAnalysis, BasicBlockCostVec, StatementCostVec}, + placement::error::PlacementDiagnosticCategory, + target::{TargetArray, TargetBitSet, TargetId}, + terminator_placement::{TerminatorCostVec, TransMatrix}, + traversal::TransferCostConfig, + }, }, }; @@ -113,6 +117,25 @@ pub(crate) fn bb(index: u32) -> BasicBlockId { BasicBlockId::new(index) } +pub(crate) fn make_block_costs<'heap>( + body: &Body<'_>, + domains: &[TargetBitSet], + statements: &TargetArray>, + alloc: &'heap Heap, +) -> BasicBlockCostVec<&'heap Heap> { + let assignments = BasicBlockSlice::from_raw(domains); + BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments, + costs: statements, + } + .analyze_in( + &TransferCostConfig::new(InformationRange::full()), + &body.basic_blocks, + alloc, + ) +} + const I: TargetId = TargetId::Interpreter; const P: TargetId = TargetId::Postgres; @@ -125,10 +148,9 @@ pub(crate) fn run_solver<'heap>( terminators: &TerminatorCostVec<&'heap Heap>, ) -> BasicBlockVec { let mut context = MirContext::new(env, interner); - let assignment = BasicBlockSlice::from_raw(domains); + let block_costs = make_block_costs(body, domains, statements, env.heap); let data = PlacementSolverContext { - assignment, - statements, + blocks: &block_costs, terminators, }; let mut solver = data.build_in(body, env.heap); @@ -803,10 +825,9 @@ fn rewind_exhausts_all_regions() { bb(2): [I->P = 0, P->I = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -955,10 +976,9 @@ fn backward_pass_keeps_assignment_when_csp_fails() { bb(3): [complete(0)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1119,10 +1139,9 @@ fn trivial_failure_emits_diagnostic() { bb(2): [I->P = 0, P->I = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1189,10 +1208,9 @@ fn cyclic_failure_emits_diagnostic() { bb(1): [I->P = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new deleted file mode 100644 index 91b4141ab42..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap.new +++ /dev/null @@ -1,10 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/execution/tests.rs -assertion_line: 65 -expression: output ---- -Assignment: - bb0: interpreter - -Islands: - 0: target=interpreter, blocks=[BasicBlockId(0)] diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap.new new file mode 100644 index 00000000000..6ae9ce9bf03 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap.new @@ -0,0 +1,18 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +assertion_line: 92 +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { + let %2: ? + let %3: Integer + let %4: Boolean + + bb0(): { + %2 = %1.properties // cost: 8 + %3 = 42 // cost: 8 + %4 = %3 > 10 // cost: 8 + + return %4 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap.new new file mode 100644 index 00000000000..e647a1c125e --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap.new @@ -0,0 +1,16 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +assertion_line: 92 +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> (?, Boolean) { + let %2: ? + let %3: (?, Boolean) + + bb0(): { + %2 = %1.properties // cost: 8 + %3 = (%1.properties, %1.metadata.archived) // cost: 8 + + return %3 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap.new new file mode 100644 index 00000000000..1d09d7ea0ce --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap.new @@ -0,0 +1,16 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +assertion_line: 92 +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { + let %2: Boolean + let %3: Boolean + + bb0(): { + %2 = %1.metadata.archived // cost: 8 + %3 = !%2 // cost: 8 + + return %3 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap.new new file mode 100644 index 00000000000..cccbfb950df --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap.new @@ -0,0 +1,14 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +assertion_line: 92 +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> (?, ?) { + let %2: (?, ?) + + bb0(): { + %2 = (%1.metadata.record_id.entity_id.web_id, %1.metadata.record_id) // cost: 8 + + return %2 + } +} From 34e44d3878e5cc679ac440fd43b2bdd62daf589f Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Mon, 2 Mar 2026 21:20:10 +0100 Subject: [PATCH 27/32] feat: checkpoint --- .../mir/src/pass/execution/cost/analysis.rs | 33 +------ .../execution/placement/solve/csp/tests.rs | 94 +++++++++---------- .../placement/solve/estimate/tests.rs | 18 ++-- .../pass/execution/placement/solve/tests.rs | 57 +++++------ 4 files changed, 86 insertions(+), 116 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs index 94c5da5494f..3d7412ca700 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs @@ -61,37 +61,6 @@ impl BasicBlockCostVec { } } -impl BasicBlockCostVec { - /// Constructs a `BasicBlockCostVec` from per-block target domains and per-target statement - /// costs, without traversal analysis. - #[cfg(test)] - pub(crate) fn from_statements( - domains: &[TargetBitSet], - statements: &TargetArray>, - alloc: A, - ) -> Self { - let inner = BasicBlockVec::from_fn_in( - domains.len(), - |id: BasicBlockId| { - let targets = domains[id.as_usize()]; - let mut costs = TargetArray::from_raw([BasicBlockTargetCost::ZERO; _]); - - for target in &targets { - costs[target] = BasicBlockTargetCost { - base: statements[target].sum_approx(id), - load: ApproxCost::ZERO, - }; - } - - BasicBlockCost { targets, costs } - }, - alloc, - ); - - Self { inner } - } -} - pub(crate) struct BasicBlockCostAnalysis<'ctx, A: Allocator> { pub vertex: VertexType, pub assignments: &'ctx BasicBlockSlice, @@ -408,7 +377,7 @@ mod tests { assert!(embedding_cost > embedding_base); } - /// RecordId (composite) expands to leaf descendants. All leaves are Postgres-origin, + /// `RecordId` (composite) expands to leaf descendants. All leaves are Postgres-origin, /// so Postgres pays no premium and Interpreter does. #[test] fn composite_path_expands_to_leaves() { diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs index d99d752cbc3..074cc7b9a48 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs @@ -2,7 +2,7 @@ use core::mem; -use hashql_core::{heap::Heap, id::IdArray, r#type::environment::Environment}; +use hashql_core::{heap::Heap, id::IdArray, symbol::sym, r#type::environment::Environment}; use super::{super::PlacementSolver, CyclicPlacementRegion}; use crate::{ @@ -59,8 +59,8 @@ fn narrow_restricts_successor_domain() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -109,8 +109,8 @@ fn narrow_restricts_predecessor_domain() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -161,8 +161,8 @@ fn narrow_to_empty_domain() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -209,8 +209,8 @@ fn narrow_multiple_edges_intersect() { let env = Environment::new(&heap); // bb0→bb1, bb0→bb2, bb1→bb2, bb2→bb0, bb2→bb3 - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -271,8 +271,8 @@ fn replay_narrowing_resets_then_repropagates() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -337,8 +337,8 @@ fn lower_bound_min_statement_cost_per_block() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -398,8 +398,8 @@ fn lower_bound_min_transition_cost_per_edge() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -456,8 +456,8 @@ fn lower_bound_skips_self_loop_edges() { let env = Environment::new(&heap); // bb0→bb0 (self-loop), bb0→bb1, bb1→bb0, bb1→bb2 (exit) - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; if cond then bb0() else bb1(); }, bb1() { cond = load true; if cond then bb0() else bb2(); }, bb2() { x = load 0; return x; } @@ -505,8 +505,8 @@ fn lower_bound_fixed_successor_uses_concrete_target() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -564,8 +564,8 @@ fn lower_bound_all_fixed_returns_zero() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -617,8 +617,8 @@ fn mrv_selects_smallest_domain() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -670,8 +670,8 @@ fn mrv_tiebreak_by_constraint_degree() { let env = Environment::new(&heap); // bb0→bb1, bb0→bb2, bb1→bb0, bb2→bb0, bb0→bb3 (exit) - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; switch cond [0 => bb1(), 1 => bb2(), _ => bb3()]; }, bb1() { x = load 0; goto bb0(); }, bb2() { x = load 0; goto bb0(); }, @@ -720,8 +720,8 @@ fn mrv_skips_fixed_blocks() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -776,8 +776,8 @@ fn greedy_solves_two_block_loop() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { cond = load true; if cond then bb0() else bb2(); }, bb2() { return x; } @@ -830,8 +830,8 @@ fn greedy_rollback_finds_alternative() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -894,8 +894,8 @@ fn greedy_fails_when_infeasible() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -940,8 +940,8 @@ fn bnb_finds_optimal() { let env = Environment::new(&heap); // bb0→bb1, bb0→bb2, bb1→bb0, bb2→bb0, bb0→bb3 (exit) - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; switch cond [0 => bb1(), 1 => bb2(), _ => bb3()]; }, bb1() { x = load 0; goto bb0(); }, bb2() { x = load 0; goto bb0(); }, @@ -1000,8 +1000,8 @@ fn bnb_retains_ranked_solutions() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -1069,8 +1069,8 @@ fn bnb_pruning_preserves_optimal() { let env = Environment::new(&heap); // 4-block SCC: bb0→bb1→bb2→bb3→bb0, plus bb4 exit - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { x = load 0; goto bb3(); }, @@ -1131,8 +1131,8 @@ fn retry_returns_ranked_solutions_in_order() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -1202,8 +1202,8 @@ fn retry_exhausts_then_perturbs() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -1266,8 +1266,8 @@ fn greedy_rollback_on_empty_heap() { // 2-block SCC: bb0↔bb1, bb2 exit // bb0: `if cond then bb1 else bb2` → [bb2(arm0), bb1(arm1)] - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -1338,8 +1338,8 @@ fn retry_perturbation_after_ranked_exhaustion() { let env = Environment::new(&heap); // 2-block SCC: bb0↔bb1, bb2 exit - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs index d16e35d5165..dde4014dd4b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs @@ -1,6 +1,6 @@ #![expect(clippy::min_ident_chars)] -use hashql_core::{heap::Heap, id::IdArray, r#type::environment::Environment}; +use hashql_core::{heap::Heap, id::IdArray, symbol::sym, r#type::environment::Environment}; use super::{ super::{ @@ -135,8 +135,8 @@ fn self_loop_edges_excluded_from_cost() { let env = Environment::new(&heap); // bb0: self-loop via `if cond then bb0() else bb1()`, bb1: return - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -199,8 +199,8 @@ fn boundary_multiplier_applied_to_cross_region_edges() { let env = Environment::new(&heap); // bb0 → bb1 → bb2, three trivial SCCs - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int; bb0() { x = load 0; @@ -277,8 +277,8 @@ fn infeasible_transition_returns_none() { let env = Environment::new(&heap); // bb0 → bb1, two trivial SCCs - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int; bb0() { x = load 0; @@ -335,8 +335,8 @@ fn unassigned_neighbor_uses_heuristic_minimum() { let env = Environment::new(&heap); // bb0 → bb1, two trivial SCCs - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int; bb0() { x = load 0; diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs index b4ff3729567..6248f2ba701 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs @@ -5,6 +5,7 @@ use core::alloc::Allocator; use hashql_core::{ heap::{BumpAllocator, Heap}, id::{IdArray, bit_vec::FiniteBitSet}, + symbol::sym, r#type::environment::Environment, }; use hashql_diagnostics::severity::Severity; @@ -207,8 +208,8 @@ fn forward_pass_assigns_all_blocks() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -271,8 +272,8 @@ fn backward_pass_improves_suboptimal_forward() { // bb1=P look cheap. But bb3 ultimately gets I (because bb2=I with diagonal- // only forces bb3=I after backward). Backward then re-evaluates bb1 with // bb3=I known and sees P→I=50, correcting bb1 to I. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -349,8 +350,8 @@ fn rewind_triggers_on_join_with_conflicting_predecessors() { // bb3=I: bb1→bb3 I→I ok, bb2→bb3 I→I missing → infeasible // bb3=P: bb1→bb3 I→P missing → infeasible // bb3 heap empty → rewind flips bb2 (or bb1) to resolve the conflict. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -434,8 +435,8 @@ fn rewind_skips_exhausted_region() { // Rewind: bb2 has no alternatives (domain {I}) → skip. bb1 has alternative P. // bb1=P, resume. bb2=I (re-estimated). bb3: bb1→bb3 P→I ok, bb2→bb3 I→I ok. // bb3=I succeeds. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { x = load 0; @@ -497,8 +498,8 @@ fn single_block_trivial_region() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int; bb0() { x = load 0; @@ -533,8 +534,8 @@ fn cyclic_region_in_forward_backward() { let env = Environment::new(&heap); // bb0 → bb1, bb1 → bb2, bb2 → bb1 (loop), bb2 → bb3 (exit) - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; @@ -612,8 +613,8 @@ fn rewind_retries_cyclic_region() { // bb3=P: diagonal I→P missing → infeasible. // Rewind reaches the SCC; retry() picks all-P. With SCC=all-P: // bb3=P: diagonal P→P ok, bb3→bb4 P→I ok → feasible. - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; @@ -712,8 +713,8 @@ fn rewind_skips_exhausted_cyclic_region() { // With bb0=P: // SCC: all-P (forced). bb3 predecessors: bb0=P, bb2=P. // bb3=I: bb0→bb3 P→I ok. bb2→bb3 P→I ok. bb3→bb4 I→I ok. Feasible! - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; @@ -792,8 +793,8 @@ fn rewind_exhausts_all_regions() { let env = Environment::new(&heap); // Diamond: bb0→bb1(then), bb0→bb2(else), bb1→bb3, bb2→bb3. All trivial SCCs. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -855,8 +856,8 @@ fn forward_pass_rewinds_on_cyclic_failure() { let env = Environment::new(&heap); // bb0→bb1→bb2→bb1(loop)/bb2→bb3. - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; @@ -924,8 +925,8 @@ fn backward_pass_keeps_assignment_when_csp_fails() { let env = Environment::new(&heap); // bb0→bb1→bb2→bb1(loop)/bb2→bb3→bb4. - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; @@ -1030,8 +1031,8 @@ fn backward_pass_adopts_better_cyclic_solution() { let env = Environment::new(&heap); // bb0→bb1→bb2→bb1(loop)/bb2→bb3→bb4. - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; @@ -1106,8 +1107,8 @@ fn trivial_failure_emits_diagnostic() { // bb1→bb3: diagonal only. bb2→bb3: swap only (I→P, P→I). // No assignment for bb3 satisfies both predecessors simultaneously, and // rewind exhausts all alternatives. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -1173,8 +1174,8 @@ fn cyclic_failure_emits_diagnostic() { // bb0 branches to bb1(then) and bb2(else). bb1→bb0 closes the cycle. // bb2 is the exit. SCC = {bb0, bb1}, processed first. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; From cc32a504db9d3cfa2199ceb700401b821de06e97 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Mon, 2 Mar 2026 21:33:58 +0100 Subject: [PATCH 28/32] feat: checkpoint --- .../mir/src/pass/execution/cost/analysis.rs | 8 +- .../pass/execution/placement/solve/tests.rs | 172 +++++++++++++++++- .../statement_placement/interpret/tests.rs | 20 +- .../execution/terminator_placement/tests.rs | 108 +---------- .../non_traversal_unaffected_by_costs.snap | 2 +- ...non_traversal_unaffected_by_costs.snap.new | 18 -- .../traversal_multiple_paths_cost.snap | 4 +- .../traversal_multiple_paths_cost.snap.new | 16 -- .../interpret/traversal_single_path_cost.snap | 2 +- .../traversal_single_path_cost.snap.new | 16 -- .../traversal_swallowing_reduces_cost.snap | 2 +- ...traversal_swallowing_reduces_cost.snap.new | 14 -- 12 files changed, 190 insertions(+), 192 deletions(-) delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap.new delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap.new delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap.new delete mode 100644 libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap.new diff --git a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs index 3d7412ca700..5725f6b4c6b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs @@ -141,6 +141,7 @@ impl BasicBlockCostAnalysis<'_, A> { #[cfg(test)] mod tests { + #![expect(clippy::min_ident_chars)] use alloc::alloc::Global; use hashql_core::{heap::Heap, symbol::sym, r#type::environment::Environment}; @@ -151,7 +152,8 @@ mod tests { builder::body, intern::Interner, pass::{ - analysis::size_estimation::InformationRange, execution::traversal::TransferCostConfig, + analysis::size_estimation::{InformationRange, InformationUnit}, + execution::traversal::TransferCostConfig, }, }; @@ -353,7 +355,9 @@ mod tests { costs: &costs, }; - let result = analysis.analyze_in(&default_config(), &body.basic_blocks, Global); + // Use a bounded properties size so both premiums are finite and comparable. + let config = TransferCostConfig::new(InformationRange::value(InformationUnit::new(100))); + let result = analysis.analyze_in(&config, &body.basic_blocks, Global); let bb0 = BasicBlockId::new(0); let interpreter_cost = result.cost(bb0, TargetId::Interpreter); diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs index 6248f2ba701..592ac04d892 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs @@ -25,7 +25,7 @@ use crate::{ error::MirDiagnosticCategory, intern::Interner, pass::{ - analysis::size_estimation::InformationRange, + analysis::size_estimation::{InformationRange, InformationUnit}, execution::{ ApproxCost, Cost, VertexType, cost::{BasicBlockCostAnalysis, BasicBlockCostVec, StatementCostVec}, @@ -123,6 +123,22 @@ pub(crate) fn make_block_costs<'heap>( domains: &[TargetBitSet], statements: &TargetArray>, alloc: &'heap Heap, +) -> BasicBlockCostVec<&'heap Heap> { + make_block_costs_with_config( + body, + domains, + statements, + &TransferCostConfig::new(InformationRange::full()), + alloc, + ) +} + +pub(crate) fn make_block_costs_with_config<'heap>( + body: &Body<'_>, + domains: &[TargetBitSet], + statements: &TargetArray>, + config: &TransferCostConfig, + alloc: &'heap Heap, ) -> BasicBlockCostVec<&'heap Heap> { let assignments = BasicBlockSlice::from_raw(domains); BasicBlockCostAnalysis { @@ -130,15 +146,12 @@ pub(crate) fn make_block_costs<'heap>( assignments, costs: statements, } - .analyze_in( - &TransferCostConfig::new(InformationRange::full()), - &body.basic_blocks, - alloc, - ) + .analyze_in(config, &body.basic_blocks, alloc) } const I: TargetId = TargetId::Interpreter; const P: TargetId = TargetId::Postgres; +const E: TargetId = TargetId::Embedding; pub(crate) fn run_solver<'heap>( body: &Body<'heap>, @@ -1227,3 +1240,150 @@ fn cyclic_failure_emits_diagnostic() { MirDiagnosticCategory::Placement(PlacementDiagnosticCategory::UnsatisfiablePlacement), ); } + +/// Path premiums steer the solver toward origin backends. +/// +/// bb0 accesses `vertex.encodings.vectors` (Embedding-origin) and `vertex.properties` +/// (Postgres-origin). With equal base statement costs and permissive transitions, the solver +/// picks the backend that minimizes the combined path premium. Embedding avoids the Vectors +/// premium (3072) but pays the Properties premium. Postgres avoids the Properties premium +/// but pays the Vectors premium. Interpreter pays both. +/// +/// The solver should not pick Interpreter for bb0 since both specialized backends have lower +/// total cost. +#[test] +fn path_premiums_influence_placement() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, ?) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, ?); + @proj properties = vertex.properties: ?, + encodings = vertex.encodings: ?, + vectors = encodings.vectors: ?; + + bb0() { + result = tuple properties, vectors; + return result; + } + }); + + let all = target_set(&[I, P, E]); + let domains = [all]; + + let mut statements: TargetArray> = + IdArray::from_fn(|_: TargetId| StatementCostVec::new_in(&body.basic_blocks, &heap)); + + // Equal base costs so the path premium is the deciding factor. + stmt_costs! { statements; bb(0): I = 1, P = 1, E = 1 } + + let terminators = TerminatorCostVec::new(&body.basic_blocks, &heap); + + let config = TransferCostConfig::new(InformationRange::value(InformationUnit::new(100))); + let block_costs = make_block_costs_with_config(&body, &domains, &statements, &config, &heap); + + // Verify the premiums are as expected: Interpreter pays both, others pay one each. + let interp_cost = block_costs.cost(bb(0), I); + let pg_cost = block_costs.cost(bb(0), P); + let emb_cost = block_costs.cost(bb(0), E); + + assert!( + interp_cost > pg_cost, + "Interpreter ({interp_cost}) should be more expensive than Postgres ({pg_cost})" + ); + assert!( + interp_cost > emb_cost, + "Interpreter ({interp_cost}) should be more expensive than Embedding ({emb_cost})" + ); + + // Run the solver end-to-end. + let data = PlacementSolverContext { + blocks: &block_costs, + terminators: &terminators, + }; + let mut context = MirContext::new(&env, &interner); + let mut solver = data.build_in(&body, &heap); + let result = solver.run(&mut context, &body); + + assert_ne!( + result[bb(0)], + I, + "solver should prefer a specialized backend over Interpreter when path premiums dominate" + ); +} + +/// Provenance variants produce different path premiums due to different size estimates. +/// +/// `ProvenanceEdition` has size `3..=20` (midpoint 11) while `ProvenanceInferred` has size +/// `3..=5` (midpoint 4). A block accessing edition provenance should have a higher load cost +/// than one accessing inferred provenance, and this difference should be visible in the +/// solver's block cost inputs. +#[test] +fn provenance_variants_produce_different_premiums() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body_edition = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + edition = prov.edition: ?; + + bb0() { + val = load edition; + return val; + } + }); + + let body_inferred = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + inferred = prov.inferred: ?; + + bb0() { + val = load inferred; + return val; + } + }); + + let ip = target_set(&[I, P]); + let domains = [ip]; + + let config = TransferCostConfig::new(InformationRange::zero()); + + let statements_edition: TargetArray> = + IdArray::from_fn(|_: TargetId| StatementCostVec::new_in(&body_edition.basic_blocks, &heap)); + let edition_costs = + make_block_costs_with_config(&body_edition, &domains, &statements_edition, &config, &heap); + + let statements_inferred: TargetArray> = + IdArray::from_fn(|_: TargetId| { + StatementCostVec::new_in(&body_inferred.basic_blocks, &heap) + }); + let inferred_costs = make_block_costs_with_config( + &body_inferred, + &domains, + &statements_inferred, + &config, + &heap, + ); + + // Both are Postgres-origin, so Interpreter pays the premium, Postgres doesn't. + let edition_interp = edition_costs.cost(bb(0), I); + let edition_pg = edition_costs.cost(bb(0), P); + let inferred_interp = inferred_costs.cost(bb(0), I); + let inferred_pg = inferred_costs.cost(bb(0), P); + + // Postgres pays no premium for either (it's the origin). + assert_eq!(edition_pg, inferred_pg, "Postgres is origin for both"); + + // Edition premium (midpoint 11) > Inferred premium (midpoint 4) on Interpreter. + assert!( + edition_interp > inferred_interp, + "Edition ({edition_interp}) should cost more than Inferred ({inferred_interp}) on \ + Interpreter" + ); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs index 21bf9f0f0c9..4ac8917a460 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs @@ -70,10 +70,9 @@ fn all_statements_supported() { ); } -/// A single vertex projection yields cost 12 (base 8 + overhead 4 × 1 path). +/// All assignments get uniform cost 8 regardless of vertex projections. /// -/// Tests that `path_count` from `TraversalAnalysis` feeds into the interpreter cost -/// formula. A constant load at the same location has cost 8 (zero paths). +/// Path costs are charged at the block level via `BasicBlockCostVec`, not per statement. #[test] fn traversal_single_path_cost() { let heap = Heap::new(); @@ -110,9 +109,9 @@ fn traversal_single_path_cost() { ); } -/// Two vertex projections in a single statement yield cost 16 (base 8 + overhead 4 × 2 paths). +/// Multiple vertex projections in a statement still get uniform cost 8. /// -/// A tuple referencing both `_1.properties` and `_1.metadata.archived` has `path_count = 2`. +/// Path costs from `_1.properties` and `_1.metadata.archived` are charged at the block level. #[test] fn traversal_multiple_paths_cost() { let heap = Heap::new(); @@ -152,11 +151,9 @@ fn traversal_multiple_paths_cost() { ); } -/// Composite swallowing reduces `path_count` and therefore interpreter cost. +/// Composite swallowing is handled at the block level by `BasicBlockCostVec`. /// -/// A tuple referencing `_1.metadata.record_id.entity_id.web_id` and -/// `_1.metadata.record_id`: `RecordId` swallows `WebId`, so `path_count = 1` -/// and cost = 12, not 16. +/// The interpreter assigns uniform cost 8 to all assignments. #[test] fn traversal_swallowing_reduces_cost() { let heap = Heap::new(); @@ -195,10 +192,9 @@ fn traversal_swallowing_reduces_cost() { ); } -/// Statements without vertex access are unaffected by traversal costing. +/// All assignments get uniform cost 8 whether or not they access vertex projections. /// -/// A body with vertex projections in one statement and pure constants in another. -/// The constant-only statement still gets base cost 8 (`path_count = 0`). +/// Path-based cost differentiation is handled at the block level. #[test] fn non_traversal_unaffected_by_costs() { let heap = Heap::new(); diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs index fc46f9163f4..0b18f2d16d7 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs @@ -602,108 +602,12 @@ fn transfer_cost_is_max_for_unbounded() { ); } -/// Edition provenance live across a goto edge produces path-based transfer cost. +/// Edge transfer cost only accounts for live locals; path costs are charged at block level. /// -/// `edition_provenance_size` defaults to `3..=20`, midpoint 11. With no other live locals, -/// the Postgres→Interpreter transition cost is purely the path cost. +/// A scalar local (`live`) costs 1. Entity paths (`ProvenanceEdition`, `Properties`) are live +/// in bb1 but do not contribute to edge transfer cost (path costs moved to `BasicBlockCostVec`). #[test] -fn path_cost_from_edition_provenance() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { - decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; - @proj metadata = vertex.metadata: ?, - prov = metadata.provenance: ?, - edition = prov.edition: ?; - - bb0() { - goto bb1(); - }, - bb1() { - val = load edition; - return val; - } - }); - - let targets = [ - target_set(&[TargetId::Interpreter, TargetId::Postgres]), - target_set(&[TargetId::Interpreter, TargetId::Postgres]), - ]; - - let footprint = make_scalar_footprint(&body, &heap); - let placement = - TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); - let costs = placement.terminator_placement( - &body, - VertexType::Entity, - &footprint, - build_targets(&body, &targets), - ); - - // edition_provenance_size = 3..=20, midpoint(3, 20) = 11 - let matrix = costs.of(BasicBlockId::new(0))[0]; - assert_eq!( - matrix.get(TargetId::Postgres, TargetId::Interpreter), - Some(cost!(11)) - ); -} - -/// Inferred provenance produces a different (lower) cost than edition provenance. -/// -/// `ProvenanceInferred` has a static size `3..=5` (fixed structure, no config), midpoint 4. -/// This verifies the split: without per-variant sizing, both would produce the same cost. -#[test] -fn path_cost_from_inferred_provenance() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { - decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; - @proj metadata = vertex.metadata: ?, - prov = metadata.provenance: ?, - inferred = prov.inferred: ?; - - bb0() { - goto bb1(); - }, - bb1() { - val = load inferred; - return val; - } - }); - - let targets = [ - target_set(&[TargetId::Interpreter, TargetId::Postgres]), - target_set(&[TargetId::Interpreter, TargetId::Postgres]), - ]; - - let footprint = make_scalar_footprint(&body, &heap); - let placement = - TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); - let costs = placement.terminator_placement( - &body, - VertexType::Entity, - &footprint, - build_targets(&body, &targets), - ); - - // ProvenanceInferred is static 3..=5, midpoint(3, 5) = 4 - let matrix = costs.of(BasicBlockId::new(0))[0]; - assert_eq!( - matrix.get(TargetId::Postgres, TargetId::Interpreter), - Some(cost!(4)) - ); -} - -/// Transfer cost sums both live locals and live entity paths. -/// -/// A scalar local (`live`) costs 1. Two entity paths (`ProvenanceEdition` at 3..=20 -/// and `Properties` at 10..=10) sum to 13..=30, midpoint 21. Total = 1 + 21 = 22. -#[test] -fn transfer_cost_combines_locals_and_paths() { +fn transfer_cost_from_live_locals() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); @@ -744,12 +648,10 @@ fn transfer_cost_combines_locals_and_paths() { ); // local_cost: `live` scalar = 1 - // path_cost: Properties(10..=10) + ProvenanceEdition(3..=20) = 13..=30, midpoint(13, 30) = 21 - // total = 1 + 21 = 22 let matrix = costs.of(BasicBlockId::new(0))[0]; assert_eq!( matrix.get(TargetId::Postgres, TargetId::Interpreter), - Some(cost!(22)) + Some(cost!(1)) ); } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap index 80b872f6109..7fd988b687d 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap @@ -8,7 +8,7 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { let %4: Boolean bb0(): { - %2 = %1.properties // cost: 12 + %2 = %1.properties // cost: 8 %3 = 42 // cost: 8 %4 = %3 > 10 // cost: 8 diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap.new deleted file mode 100644 index 6ae9ce9bf03..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap.new +++ /dev/null @@ -1,18 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs -assertion_line: 92 -expression: output ---- -fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { - let %2: ? - let %3: Integer - let %4: Boolean - - bb0(): { - %2 = %1.properties // cost: 8 - %3 = 42 // cost: 8 - %4 = %3 > 10 // cost: 8 - - return %4 - } -} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap index 94b42590076..664620f8ba2 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap @@ -7,8 +7,8 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> (?, Boolean) { let %3: (?, Boolean) bb0(): { - %2 = %1.properties // cost: 12 - %3 = (%1.properties, %1.metadata.archived) // cost: 16 + %2 = %1.properties // cost: 8 + %3 = (%1.properties, %1.metadata.archived) // cost: 8 return %3 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap.new deleted file mode 100644 index e647a1c125e..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap.new +++ /dev/null @@ -1,16 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs -assertion_line: 92 -expression: output ---- -fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> (?, Boolean) { - let %2: ? - let %3: (?, Boolean) - - bb0(): { - %2 = %1.properties // cost: 8 - %3 = (%1.properties, %1.metadata.archived) // cost: 8 - - return %3 - } -} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap index e3ba22c2fd3..d51e6ceae90 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap @@ -7,7 +7,7 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { let %3: Boolean bb0(): { - %2 = %1.metadata.archived // cost: 12 + %2 = %1.metadata.archived // cost: 8 %3 = !%2 // cost: 8 return %3 diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap.new deleted file mode 100644 index 1d09d7ea0ce..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap.new +++ /dev/null @@ -1,16 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs -assertion_line: 92 -expression: output ---- -fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { - let %2: Boolean - let %3: Boolean - - bb0(): { - %2 = %1.metadata.archived // cost: 8 - %3 = !%2 // cost: 8 - - return %3 - } -} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap index 78a7ed23795..c43e55a04bd 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap @@ -6,7 +6,7 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> (?, ?) { let %2: (?, ?) bb0(): { - %2 = (%1.metadata.record_id.entity_id.web_id, %1.metadata.record_id) // cost: 12 + %2 = (%1.metadata.record_id.entity_id.web_id, %1.metadata.record_id) // cost: 8 return %2 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap.new b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap.new deleted file mode 100644 index cccbfb950df..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap.new +++ /dev/null @@ -1,14 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs -assertion_line: 92 -expression: output ---- -fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> (?, ?) { - let %2: (?, ?) - - bb0(): { - %2 = (%1.metadata.record_id.entity_id.web_id, %1.metadata.record_id) // cost: 8 - - return %2 - } -} From de3ba9b44e81d1f953fb51c0a28dd322ca352ab6 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Mon, 2 Mar 2026 21:44:20 +0100 Subject: [PATCH 29/32] fix: doc cleanup --- .../mir/src/pass/execution/cost/analysis.rs | 35 +++++++++++++++++++ .../hashql/mir/src/pass/execution/cost/mod.rs | 9 +++-- .../pass/execution/placement/solve/csp/mod.rs | 18 +++++----- .../execution/placement/solve/csp/tests.rs | 10 +++--- .../execution/placement/solve/estimate/mod.rs | 25 ++++++------- .../placement/solve/estimate/tests.rs | 2 +- .../src/pass/execution/placement/solve/mod.rs | 4 +-- .../pass/execution/placement/solve/tests.rs | 8 ++--- .../mir/src/pass/execution/traversal/mod.rs | 12 +++++-- 9 files changed, 86 insertions(+), 37 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs index 5725f6b4c6b..44b9dff1cec 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs @@ -16,9 +16,17 @@ use crate::{ visit::Visitor as _, }; +/// Cost of running a single basic block on one target. +/// +/// Separates the statement cost sum (`base`) from the path transfer premium (`load`) so that +/// callers can inspect or log each component independently, even though the solver only sees +/// the combined [`total`](Self::total). #[derive(Debug, Copy, Clone)] struct BasicBlockTargetCost { + /// Sum of per-statement costs for this target (from [`StatementCostVec::sum_approx`]). base: ApproxCost, + /// Transfer premium for vertex paths accessed in this block whose origin is a different + /// backend. Zero when the target is the natural origin for every accessed path. load: ApproxCost, } @@ -33,22 +41,38 @@ impl BasicBlockTargetCost { } } +/// Precomputed cost for one basic block across all candidate targets. #[derive(Debug, Copy, Clone)] struct BasicBlockCost { + /// Which targets can execute this block (copied from the domain after AC-3). targets: TargetBitSet, + /// Per-target cost (only entries where `targets` is set are meaningful). costs: TargetArray, } +/// Per-block cost map for the entire body. +/// +/// Indexed by [`BasicBlockId`]. Each entry stores the set of candidate targets and the +/// combined (statement + path transfer) cost for each candidate. +/// +/// Produced by [`BasicBlockCostAnalysis::analyze_in`] and consumed by the placement solver. #[derive(Debug)] pub(crate) struct BasicBlockCostVec { inner: BasicBlockVec, } impl BasicBlockCostVec { + /// Returns the set of candidate targets for `block`. pub(crate) fn assignments(&self, block: BasicBlockId) -> TargetBitSet { self.inner[block].targets } + /// Returns the total cost (statement base + path transfer load) of placing `block` on + /// `target`. + /// + /// # Panics + /// + /// Debug-asserts that `target` is in the block's candidate domain. pub(crate) fn cost(&self, block: BasicBlockId, target: TargetId) -> ApproxCost { let entry = &self.inner[block]; @@ -61,6 +85,16 @@ impl BasicBlockCostVec { } } +/// Computes per-block costs by combining statement costs with path transfer premiums. +/// +/// For each block, walks the MIR statements to discover which vertex paths are accessed, +/// then charges a transfer premium on every target that is not the natural origin for those +/// paths. The premium is the estimated transfer size multiplied by the target's cost +/// multiplier. +/// +/// Path premiums are charged once per block (intra-block dedup), not once per statement. +/// Composite paths are kept as-is rather than expanded to leaves, under the assumption that +/// a composite fetch is cheaper than fetching each leaf independently. pub(crate) struct BasicBlockCostAnalysis<'ctx, A: Allocator> { pub vertex: VertexType, pub assignments: &'ctx BasicBlockSlice, @@ -123,6 +157,7 @@ impl BasicBlockCostAnalysis<'_, A> { BasicBlockCost { targets, costs } } + /// Computes per-block costs for every block in `blocks`. pub(crate) fn analyze_in( &self, config: &TransferCostConfig, diff --git a/libs/@local/hashql/mir/src/pass/execution/cost/mod.rs b/libs/@local/hashql/mir/src/pass/execution/cost/mod.rs index 62b3b200a40..7c971af0fa7 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost/mod.rs @@ -1,7 +1,12 @@ //! Cost tracking for execution planning. //! -//! Provides data structures for recording the execution cost of statements on different targets. -//! The execution planner uses these costs to select optimal targets for each statement. +//! Two levels of cost representation: +//! +//! - **Per-statement**: [`StatementCostVec`] records the [`Cost`] of each statement on a given +//! target. Produced by the statement placement pass and consumed by [`BasicBlockCostAnalysis`]. +//! +//! - **Per-block**: [`BasicBlockCostVec`] aggregates statement costs and adds a path transfer +//! premium for non-origin backends. This is what the placement solver operates on. use alloc::alloc::Global; use core::{ diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs index 620d27debf4..ec9ea086bcf 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs @@ -395,17 +395,17 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> /// Computes a lower bound on the cost of completing the current partial assignment. /// - /// Sums `min(statement_cost)` and `min(transition_cost)` independently over unfixed blocks. + /// Sums `min(block_cost)` and `min(transition_cost)` independently over unfixed blocks. /// Used for `BnB` pruning: a branch is skipped when `cost_so_far + lower_bound ≥ /// worst_retained`. /// /// This is *not* redundant with [`CostEstimation`] despite operating on the same data. /// [`CostEstimation::estimate`] computes a per-block heuristic that jointly optimizes - /// `statement + transition` costs and double-counts edges (both predecessor and successor + /// `block + transition` costs and double-counts edges (both predecessor and successor /// sides) for join-point influence. This method instead: /// - /// - **Independently minimizes** statement and transition costs (`min(stmt) + min(trans) ≤ - /// min(stmt + trans)`), producing a weaker but valid lower bound. + /// - **Independently minimizes** block and transition costs (`min(block) + min(trans) ≤ + /// min(block + trans)`), producing a weaker but valid lower bound. /// - **Single-counts edges** — only outgoing edges from each unfixed block — to avoid inflating /// the bound when both endpoints are unfixed. /// - **Omits boundary dampening** — the bound should be optimistic, not weighted. @@ -413,16 +413,16 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> let unfixed = &self.region.blocks[self.depth..]; let mut bound = ApproxCost::ZERO; - // Per-unassigned-block: minimum statement cost over remaining domain + // Per-unassigned-block: minimum block cost over remaining domain for block in unfixed { - let mut min_stmt = ApproxCost::INF; + let mut min_block = ApproxCost::INF; for target in &block.possible { - min_stmt = cmp::min(min_stmt, self.solver.data.blocks.cost(block.id, target)); + min_block = cmp::min(min_block, self.solver.data.blocks.cost(block.id, target)); } - if min_stmt < ApproxCost::INF { - bound += min_stmt; + if min_block < ApproxCost::INF { + bound += min_block; } } diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs index 074cc7b9a48..57b38f9ae90 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs @@ -327,12 +327,12 @@ fn replay_narrowing_resets_then_repropagates() { // --- Group 4: Lower Bound --- -/// Lower bound sums the minimum statement cost across each unfixed block's domain. +/// Lower bound sums the minimum block cost across each unfixed block's domain. /// /// With zero transition costs, the bound reduces to the sum of per-block minimum -/// statement costs: min(10, 20) + min(5, 15) = 15. +/// block costs: min(10, 20) + min(5, 15) = 15. #[test] -fn lower_bound_min_statement_cost_per_block() { +fn lower_bound_min_block_cost_per_block() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); @@ -390,7 +390,7 @@ fn lower_bound_min_statement_cost_per_block() { /// Lower bound includes the minimum valid transition cost for each inter-block edge. /// -/// With zero statement costs, the bound is determined by the cheapest compatible +/// With zero block costs, the bound is determined by the cheapest compatible /// transition across each edge between unfixed blocks. #[test] fn lower_bound_min_transition_cost_per_edge() { @@ -768,7 +768,7 @@ fn mrv_skips_fixed_blocks() { /// Greedy solver assigns both blocks in a 2-block SCC to the cheapest same-target. /// -/// Both blocks prefer P (statement cost 3 vs 8). Same-target transitions cost 0, +/// Both blocks prefer P (block cost 3 vs 8). Same-target transitions cost 0, /// so greedy converges on all-P without rollback. #[test] fn greedy_solves_two_block_loop() { diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs index 6abdf91474f..5805ddf67ae 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs @@ -1,8 +1,8 @@ //! Cost estimation for placement target selection. //! //! The estimator computes an approximate cost for assigning a basic block to a given execution -//! target. Cost includes statement execution cost plus transition costs to and from predecessor and -//! successor blocks. +//! target. Cost includes the block's own cost (statement base + path transfer premium) plus +//! transition costs to and from predecessor and successor blocks. //! //! Cross-region transitions are weighted by a configurable [`CostEstimationConfig`] to //! de-emphasize boundary costs relative to intra-region costs. Self-loop edges are skipped because @@ -12,10 +12,10 @@ //! optimal option. Transition costs are counted from both predecessor and successor sides — //! intentional double-counting that gives each edge proportional influence at join points. //! -//! The double-counting inflates transition costs relative to statement costs. This is acceptable -//! (and possibly desirable) as long as transitions dominate. If statement costs ever become +//! The double-counting inflates transition costs relative to block costs. This is acceptable +//! (and possibly desirable) as long as transitions dominate. If block costs ever become //! comparable and the greedy value ordering consistently disagrees with BnB-optimal solutions, -//! consider halving the transition weight here rather than single-counting — single-counting +//! consider halving the transition weight here rather than single-counting; single-counting //! would make source-side blocks blind to downstream target demand. use core::{alloc::Allocator, cmp}; @@ -186,7 +186,7 @@ where ) -> Option { match (source, target) { (Some(source), None) => { - // Minimize over the target block's domain, weighted by statement + transition cost + // Minimize over the target block's domain, weighted by block + transition cost let mut current_minimum = ApproxCost::INF; let mut minimum_transition_cost = None; @@ -207,7 +207,7 @@ where minimum_transition_cost } (None, Some(target)) => { - // Minimize over the source block's domain, weighted by statement + transition cost + // Minimize over the source block's domain, weighted by block + transition cost let mut current_minimum = ApproxCost::INF; let mut minimum_transition_cost = None; @@ -245,11 +245,12 @@ where block: BasicBlockId, target: TargetId, ) -> Option { - // Start with the block's own statement cost, then add transition costs from each - // predecessor and to each successor. Transitions are counted on both sides (double-counted) - // so that join edges get proportional influence without frequency data. - // If a neighbor has no assignment yet, we optimistically assume its best local option. - // Returns `None` if any assigned neighbor lacks a valid transition to this target. + // Start with the block's own cost (statement base + path transfer premium), then add + // transition costs from each predecessor and to each successor. Transitions are counted on + // both sides (double-counted) so that join edges get proportional influence without + // frequency data. If a neighbor has no assignment yet, we optimistically assume its best + // local option. Returns `None` if any assigned neighbor lacks a valid transition to this + // target. let mut cost = self.solver.data.blocks.cost(block, target); for pred in body.basic_blocks.predecessors(block) { diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs index dde4014dd4b..f773a30a004 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs @@ -326,7 +326,7 @@ fn infeasible_transition_returns_none() { /// Verifies that unassigned neighbors use the heuristic minimum over their domain. /// /// When a neighbor has no committed target, the estimator picks the cheapest -/// `(statement_cost + transition_cost)` combination across the neighbor's +/// `(block_cost + transition_cost)` combination across the neighbor's /// domain to produce an optimistic lower bound. #[test] fn unassigned_neighbor_uses_heuristic_minimum() { diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs index 18eba8c9439..d5355ea276d 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs @@ -82,8 +82,8 @@ fn back_edge_span(body: &Body<'_>, members: &[BasicBlockId]) -> SpanId { /// Input data for placement solving. /// -/// Bundles the per-block target domains (`assignment`), per-target statement costs -/// (`statements`), and terminator transition costs (`terminators`). +/// Bundles the precomputed per-block costs (`blocks`) and terminator transition costs +/// (`terminators`). #[derive(Debug, Copy, Clone)] pub(crate) struct PlacementSolverContext<'ctx, A: Allocator> { pub blocks: &'ctx BasicBlockCostVec, diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs index 592ac04d892..527c4c6dd9f 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs @@ -501,10 +501,10 @@ fn rewind_skips_exhausted_region() { assert_eq!(result[bb(3)], I); } -/// Verifies the trivial region fast path picks the cheapest target by statement cost. +/// Verifies the trivial region fast path picks the cheapest target by block cost. /// /// Single block with a return terminator and no edges. The solver should select -/// the target with the lowest per-statement cost without consulting any neighbors. +/// the target with the lowest block cost without consulting any neighbors. #[test] fn single_block_trivial_region() { let heap = Heap::new(); @@ -606,7 +606,7 @@ fn cyclic_region_in_forward_backward() { /// Verifies that rewind walks back into a cyclic region and uses `retry()` to find an alternative. /// /// The SCC exit edge is diagonal, so the SCC solver sees both all-I and all-P -/// as feasible (each can reach some target in bb3's domain). Statement costs +/// as feasible (each can reach some target in bb3's domain). Block costs /// bias the SCC toward all-I. With SCC=all-I, the diagonal exit forces bb3 /// to match bb2=I, but bb3→bb4 only allows P→I, making bb3 infeasible for /// both I (outgoing fails) and P (incoming fails). Rewind reaches the SCC, @@ -1244,7 +1244,7 @@ fn cyclic_failure_emits_diagnostic() { /// Path premiums steer the solver toward origin backends. /// /// bb0 accesses `vertex.encodings.vectors` (Embedding-origin) and `vertex.properties` -/// (Postgres-origin). With equal base statement costs and permissive transitions, the solver +/// (Postgres-origin). With equal base block costs and permissive transitions, the solver /// picks the backend that minimizes the combined path premium. Embedding avoids the Vectors /// premium (3072) but pays the Properties premium. Postgres avoids the Properties premium /// but pays the Vectors premium. Interpreter pays both. diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index d0999c80df8..978d1b80deb 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -1,11 +1,17 @@ -//! Traversal path resolution and storage mapping. +//! Traversal path resolution, storage mapping, and transfer cost estimation. //! //! Maps property access projections on graph vertices to their backend storage locations. //! Each vertex type has its own path enum ([`EntityPath`] for entities) that resolves //! dot-notation field accesses to specific columns, JSONB paths, or embedding stores. //! +//! Each path knows its origin backend ([`EntityPath::origin`]) and estimated transfer size +//! ([`EntityPath::estimate_size`]). The cost analysis uses these to charge a transfer premium +//! on targets that are not the natural origin for a path. +//! //! [`TraversalPathBitSet`] and [`TraversalPath`] wrap the per-vertex-type path types so that -//! the execution pipeline can handle different vertex types uniformly. +//! the execution pipeline can handle different vertex types uniformly. [`TransferCostConfig`] +//! carries the variable-size parameters (properties, embeddings, provenance) needed for cost +//! estimation. mod access; mod entity; @@ -205,6 +211,7 @@ pub enum TraversalPath { } impl TraversalPath { + /// Returns the set of execution targets that natively serve this path. #[inline] #[must_use] pub const fn origin(self) -> TargetBitSet { @@ -213,6 +220,7 @@ impl TraversalPath { } } + /// Returns the estimated transfer size for this path. #[inline] pub(crate) fn estimate_size(self, config: &TransferCostConfig) -> InformationRange { match self { From f67d62457d9356ce43029a7c6af85f4633886e68 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Mon, 2 Mar 2026 21:53:02 +0100 Subject: [PATCH 30/32] fix: suggestions from code review --- libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs | 4 ++-- .../hashql/mir/src/pass/execution/traversal/analysis/mod.rs | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs index 44b9dff1cec..3850c247a85 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs @@ -138,8 +138,8 @@ impl BasicBlockCostAnalysis<'_, A> { let targets = self.assignments[id]; let mut costs = TargetArray::from_raw([BasicBlockTargetCost::ZERO; _]); - // We do not expand to the leave nodes on purpose, we work under the assumption that any - // composite path that is given is more efficient than it's individual components and will + // We do not expand to the leaf nodes on purpose, we work under the assumption that any + // composite path that is given is more efficient than its individual components and will // always be fetched together, therefore the cost of the parent must be used to accurately // describe the cost. If a node can be used in multiple places at the same time, then fetch // from the composite will always be preferred. diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs index 27dce8e9ef3..fbfb0a1b0b6 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs @@ -70,8 +70,9 @@ where TraversalResult::Path(TraversalPath::Entity(path)), ); } else { - // The path leads to "nothing", indicating that we must hydrate the entire - // entity. + // The path doesn't map to any known storage location (e.g. + // `link_data.*.draft_id` is synthesized, not stored). To use the value at + // runtime we must fully hydrate the entity so the runtime can construct it. (self.on_traversal)(location, TraversalResult::Complete); } } From f85ce1d940455952e90f60b65afd832234af0bd3 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Mon, 2 Mar 2026 22:15:11 +0100 Subject: [PATCH 31/32] fix: suggestions from code review --- .../mir/src/pass/analysis/dataflow/liveness/mod.rs | 2 +- libs/@local/hashql/mir/src/pass/execution/mod.rs | 5 +---- .../hashql/mir/src/pass/execution/splitting/mod.rs | 3 +-- .../hashql/mir/src/pass/execution/traversal/entity.rs | 2 +- .../hashql/mir/src/pass/execution/traversal/mod.rs | 10 ++++------ .../hashql/mir/src/pass/transform/post_inline.rs | 5 +---- 6 files changed, 9 insertions(+), 18 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs index 99fb16a2a3a..9bc0fe7b74e 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs @@ -86,7 +86,7 @@ use crate::{ /// When the vertex is accessed through a resolvable projection (e.g., `_1.metadata.archived`), /// the corresponding [`EntityPath`] is gen'd in the path bitset. When the projection cannot be /// resolved (bare `_1` or unknown path), all paths are marked live via -/// [`insert_all`](EntityPathBitSet::insert_all). +/// [`TraversalPathBitSet::insert_all`]. pub struct TraversalLivenessAnalysis { pub vertex: VertexType, } diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 22647ab3cee..d1d713b09a5 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -72,10 +72,7 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { let mut statement_costs: TargetArray<_> = TargetArray::from_fn(|_| None); - let mut targets = TargetId::all(); - targets.reverse(); // We reverse the order, so that earlier targets (aka the interpreter) can have access to traversal costs - - for target in targets { + for target in TargetId::all() { let mut statement = TargetPlacementStatement::new_in(target, &self.scratch); let statement_cost = statement.statement_placement_in(context, body, vertex, &self.scratch); diff --git a/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs b/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs index 6ae7387422c..7dacfa90feb 100644 --- a/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs @@ -311,8 +311,7 @@ impl BasicBlockSplitting { /// /// The first element is indexed by the new [`BasicBlockId`]s. The second element maps /// each original block to the number of blocks it was split into, which callers can use - /// to redistribute parallel data structures via - /// [`split_remap`](super::traversal::Traversals::split_remap). + /// to redistribute parallel data structures. pub(crate) fn split_in<'heap, A: Allocator>( &self, context: &MirContext<'_, 'heap>, diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index 666aa022682..f184abf0f0b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -113,7 +113,7 @@ pub enum EntityPath { /// /// Separates the variable-size components (properties, embeddings, provenance) from the /// fixed-size schema fields. The fixed costs (UUIDs, timestamps, scalars) are constants on -/// [`EntityPath::transfer_size`]; this config provides the values that vary per entity type +/// [`EntityPath::estimate_size`]; this config provides the values that vary per entity type /// or deployment. #[derive(Debug, Copy, Clone)] pub(crate) struct TransferCostConfig { diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index 978d1b80deb..809f1b57d53 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -4,14 +4,12 @@ //! Each vertex type has its own path enum ([`EntityPath`] for entities) that resolves //! dot-notation field accesses to specific columns, JSONB paths, or embedding stores. //! -//! Each path knows its origin backend ([`EntityPath::origin`]) and estimated transfer size -//! ([`EntityPath::estimate_size`]). The cost analysis uses these to charge a transfer premium -//! on targets that are not the natural origin for a path. +//! Each path carries its origin backend (which execution targets serve it natively) and an +//! estimated transfer size used by the cost analysis to charge a transfer premium on targets +//! that are not the natural origin for a path. //! //! [`TraversalPathBitSet`] and [`TraversalPath`] wrap the per-vertex-type path types so that -//! the execution pipeline can handle different vertex types uniformly. [`TransferCostConfig`] -//! carries the variable-size parameters (properties, embeddings, provenance) needed for cost -//! estimation. +//! the execution pipeline can handle different vertex types uniformly. mod access; mod entity; diff --git a/libs/@local/hashql/mir/src/pass/transform/post_inline.rs b/libs/@local/hashql/mir/src/pass/transform/post_inline.rs index 2c0a274c374..5eb36ebb3c2 100644 --- a/libs/@local/hashql/mir/src/pass/transform/post_inline.rs +++ b/libs/@local/hashql/mir/src/pass/transform/post_inline.rs @@ -1,9 +1,6 @@ //! Post-inlining optimization pass. //! -//! Runs [`Canonicalization`] to clean up redundancy from inlining, then [`TraversalExtraction`] -//! to materialize vertex projections in graph read filter bodies. -//! -//! After running, call [`PostInline::finish`] to retrieve the [`Traversals`] maps. +//! Runs [`Canonicalization`] to clean up redundancy from inlining. use core::alloc::Allocator; From e6439e0f23680cb5b39fd91c1d816fb9820d7fd9 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 3 Mar 2026 10:54:40 +0100 Subject: [PATCH 32/32] fix: ordering --- .../hashql/mir/src/pass/execution/mod.rs | 22 +++++++++---------- .../hashql/mir/src/pass/execution/tests.rs | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index d1d713b09a5..f1920d860c8 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -90,17 +90,6 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { &self.scratch, ); - let block_costs = BasicBlockCostAnalysis { - vertex, - assignments: &assignments, - costs: &statement_costs, - } - .analyze_in( - &TransferCostConfig::new(InformationRange::full()), - &body.basic_blocks, - &self.scratch, - ); - let terminators = TerminatorPlacement::new_in( TransferCostConfig::new(InformationRange::full()), &self.scratch, @@ -119,6 +108,17 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { } .run_in(body, &self.scratch); + let block_costs = BasicBlockCostAnalysis { + vertex, + assignments: &assignments, + costs: &statement_costs, + } + .analyze_in( + &TransferCostConfig::new(InformationRange::full()), + &body.basic_blocks, + &self.scratch, + ); + let mut solver = PlacementSolverContext { blocks: &block_costs, terminators: &terminator_costs, diff --git a/libs/@local/hashql/mir/src/pass/execution/tests.rs b/libs/@local/hashql/mir/src/pass/execution/tests.rs index 05f191d07a8..998891df660 100644 --- a/libs/@local/hashql/mir/src/pass/execution/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/tests.rs @@ -65,7 +65,7 @@ fn assert_execution<'heap>( assert_snapshot!(name, output); } -/// Runs `TraversalExtraction` and `SizeEstimationAnalysis`, then `ExecutionAnalysis`. +/// Runs `SizeEstimationAnalysis`, then `ExecutionAnalysis`. #[track_caller] fn run_execution<'heap>( context: &mut MirContext<'_, 'heap>,