diff --git a/libs/@local/hashql/core/src/id/bit_vec/finite.rs b/libs/@local/hashql/core/src/id/bit_vec/finite.rs index 48472ffc9bc..9ccfddd22ad 100644 --- a/libs/@local/hashql/core/src/id/bit_vec/finite.rs +++ b/libs/@local/hashql/core/src/id/bit_vec/finite.rs @@ -315,6 +315,30 @@ impl FiniteBitSet { self.store = !self.store & mask; } + /// Returns the first set bit, or [`None`] if the set is empty. + /// + /// # Examples + /// + /// ```ignore + /// use hashql_core::id::bit_vec::FiniteBitSet; + /// + /// let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + /// assert_eq!(set.first_set(), None); + /// + /// set.insert(MyId::from_usize(3)); + /// set.insert(MyId::from_usize(5)); + /// assert_eq!(set.first_set(), Some(MyId::from_usize(3))); + /// ``` + #[inline] + #[must_use] + pub fn first_set(&self) -> Option { + if self.is_empty() { + return None; + } + + Some(I::from_u32(self.store.trailing_zeros())) + } + /// Returns an iterator over the indices of set bits. #[inline] pub fn iter(&self) -> FiniteBitIter { @@ -728,6 +752,51 @@ mod tests { assert!(a.is_empty()); } + #[test] + fn first_set_empty() { + let set: FiniteBitSet = FiniteBitSet::new_empty(8); + assert_eq!(set.first_set(), None); + } + + #[test] + fn first_set_single() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(5)); + assert_eq!(set.first_set(), Some(TestId::from_usize(5))); + } + + #[test] + fn first_set_multiple() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(3)); + set.insert(TestId::from_usize(5)); + set.insert(TestId::from_usize(7)); + assert_eq!(set.first_set(), Some(TestId::from_usize(3))); + } + + #[test] + fn first_set_bit_zero() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(0)); + set.insert(TestId::from_usize(7)); + assert_eq!(set.first_set(), Some(TestId::from_usize(0))); + } + + #[test] + fn first_set_last_bit() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(7)); + assert_eq!(set.first_set(), Some(TestId::from_usize(7))); + } + + #[test] + fn first_set_wide_integral() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(128); + set.insert(TestId::from_usize(100)); + set.insert(TestId::from_usize(120)); + assert_eq!(set.first_set(), Some(TestId::from_usize(100))); + } + #[test] fn negate_empty_set() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(4); diff --git a/libs/@local/hashql/core/src/id/vec.rs b/libs/@local/hashql/core/src/id/vec.rs index 5bed2f54ad0..c7954435eb6 100644 --- a/libs/@local/hashql/core/src/id/vec.rs +++ b/libs/@local/hashql/core/src/id/vec.rs @@ -6,7 +6,7 @@ use core::{ fmt::{self, Debug}, hash::{Hash, Hasher}, marker::PhantomData, - ops::{Deref, DerefMut}, + ops::{Deref, DerefMut, RangeBounds}, slice, }; @@ -39,6 +39,7 @@ pub struct IdVec { } impl IdVec { + /// Returns a reference to the underlying allocator. #[inline] pub fn allocator(&self) -> &A { self.raw.allocator() @@ -125,7 +126,23 @@ where I: Id, A: Allocator, { - /// Creates an `IdVec` from a raw `Vec`. + /// Creates an `IdVec` from a raw [`Vec`]. + /// + /// No validation is performed on the contents. The caller is responsible for ensuring the + /// vector's length stays within the valid range for `I`. + /// + /// # Examples + /// + /// ``` + /// # use hashql_core::id::{IdVec, Id as _, newtype}; + /// # newtype!(struct NodeId(u32 is 0..=100)); + /// let raw = vec!["a", "b", "c"]; + /// let vec = IdVec::::from_raw(raw); + /// + /// assert_eq!(vec.len(), 3); + /// assert_eq!(vec[NodeId::new(0)], "a"); + /// assert_eq!(vec[NodeId::new(2)], "c"); + /// ``` #[inline] pub const fn from_raw(raw: Vec) -> Self { Self { @@ -203,6 +220,25 @@ where Self::from_domain_in(elem, domain, domain.raw.allocator().clone()) } + /// Creates an `IdVec` with the same length as `domain`, deriving each element from the + /// corresponding domain entry. + /// + /// The closure receives each ID and a reference to the domain element at that ID. + /// The allocator is cloned from the domain vector. + /// + /// # Examples + /// + /// ``` + /// # use hashql_core::id::{IdVec, Id as _, newtype}; + /// # newtype!(struct NodeId(u32 is 0..=100)); + /// let names: IdVec = IdVec::from_raw(vec!["alice", "bob", "charlie"]); + /// let lengths = IdVec::::from_domain_derive(|_id, name| name.len(), &names); + /// + /// assert_eq!(lengths.len(), names.len()); + /// assert_eq!(lengths[NodeId::new(0)], 5); // "alice" + /// assert_eq!(lengths[NodeId::new(1)], 3); // "bob" + /// assert_eq!(lengths[NodeId::new(2)], 7); // "charlie" + /// ``` #[inline] pub fn from_domain_derive(func: impl FnMut(I, &U) -> T, domain: &IdVec) -> Self where @@ -225,6 +261,12 @@ where Self::from_raw(alloc::vec::from_elem_in(elem, domain.len(), alloc)) } + /// Creates an `IdVec` with the same length as `domain`, deriving each element from the + /// corresponding domain entry, using a custom allocator. + /// + /// This is the allocator-aware version of [`from_domain_derive`]. + /// + /// [`from_domain_derive`]: IdVec::from_domain_derive #[inline] pub fn from_domain_derive_in( mut func: impl FnMut(I, &U) -> T, @@ -403,6 +445,9 @@ where self.raw.truncate(index.as_usize()); } + /// Clones and appends all elements in `other` to this vector. + /// + /// See [`Vec::extend_from_slice`] for details. #[inline] pub fn extend_from_slice(&mut self, other: &IdSlice) where @@ -411,11 +456,40 @@ where self.raw.extend_from_slice(other.as_raw()); } + /// Moves all elements from `other` into this vector, leaving `other` empty. + /// + /// See [`Vec::append`] for details. #[inline] pub fn append(&mut self, other: &mut Self) { self.raw.append(&mut other.raw); } + /// Returns an iterator over `(I, T)` pairs, consuming the vector. + /// + /// Each element is paired with its corresponding [`Id`]. + /// + /// # Examples + /// + /// ``` + /// # use hashql_core::id::{IdVec, Id as _, newtype}; + /// # newtype!(struct NodeId(u32 is 0..=100)); + /// let vec: IdVec = IdVec::from_raw(vec!["x", "y", "z"]); + /// let pairs: Vec<_> = vec.into_iter_enumerated().collect(); + /// + /// assert_eq!(pairs[0], (NodeId::new(0), "x")); + /// assert_eq!(pairs[1], (NodeId::new(1), "y")); + /// assert_eq!(pairs[2], (NodeId::new(2), "z")); + /// ``` + /// + /// The iterator can be reversed: + /// + /// ``` + /// # use hashql_core::id::{IdVec, Id as _, newtype}; + /// # newtype!(struct NodeId(u32 is 0..=100)); + /// let vec: IdVec = IdVec::from_raw(vec!["a", "b"]); + /// let last = vec.into_iter_enumerated().next_back().unwrap(); + /// assert_eq!(last, (NodeId::new(1), "b")); + /// ``` pub fn into_iter_enumerated( self, ) -> impl DoubleEndedIterator + ExactSizeIterator { @@ -427,6 +501,20 @@ where .enumerate() .map(|(index, value)| (I::from_usize(index), value)) } + + /// Copies elements from the `src` range to a position starting at `dst` within the vector. + /// + /// See [`slice::copy_within`](prim@slice#method.copy_within) for details. + #[inline] + pub fn copy_within(&mut self, src: impl RangeBounds, dst: I) + where + T: Copy, + { + let start = src.start_bound().copied().map(Id::as_usize); + let end = src.end_bound().copied().map(Id::as_usize); + + self.raw.copy_within((start, end), dst.as_usize()); + } } // Map-like APIs for IdVec> diff --git a/libs/@local/hashql/mir/src/lib.rs b/libs/@local/hashql/mir/src/lib.rs index f3b4006cd96..b09a6979faa 100644 --- a/libs/@local/hashql/mir/src/lib.rs +++ b/libs/@local/hashql/mir/src/lib.rs @@ -18,6 +18,7 @@ assert_matches, binary_heap_drain_sorted, clone_from_ref, + const_convert, const_type_name, get_mut_unchecked, iter_array_chunks, @@ -25,13 +26,13 @@ iter_intersperse, iterator_try_collect, likely_unlikely, + option_into_flat_iter, maybe_uninit_array_assume_init, maybe_uninit_fill, step_trait, string_from_utf8_lossy_owned, - try_trait_v2, temporary_niche_types, - const_convert, + try_trait_v2, variant_count, )] #![expect(clippy::indexing_slicing)] diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs new file mode 100644 index 00000000000..7b0dd9d82f8 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs @@ -0,0 +1,509 @@ +//! Island dependency graph with data requirement resolution. +//! +//! After [`IslandPlacement`] groups basic blocks into [`Island`]s by target, this module +//! builds a directed graph over those islands, resolves which traversal paths each island +//! needs, and inserts synthetic data islands for paths that cannot be satisfied by an +//! upstream provider. +//! +//! Three edge kinds connect islands: +//! +//! - [`ControlFlow`]: the source island must complete before the target island begins. +//! - [`DataFlow`]: the target island consumes data produced by the source island. +//! - [`Inherits`]: the target island inherits provided paths from the source island. +//! +//! [`IslandPlacement`]: super::IslandPlacement +//! [`Island`]: super::Island +//! [`ControlFlow`]: IslandEdge::ControlFlow +//! [`DataFlow`]: IslandEdge::DataFlow +//! [`Inherits`]: IslandEdge::Inherits + +#[cfg(test)] +pub(crate) mod tests; + +use alloc::alloc::Global; +use core::{ + alloc::Allocator, + ops::{Index, IndexMut}, +}; + +use hashql_core::{ + debug_panic, + graph::{ + DirectedGraph, EdgeId, LinkedGraph, NodeId, Predecessors, Successors, Traverse as _, + algorithms::{Dominators, dominators}, + linked::Edge, + }, + heap::CollectIn as _, + id::{ + HasId as _, Id as _, + bit_vec::{BitMatrix, DenseBitSet}, + }, +}; + +use super::{Island, IslandId, IslandVec}; +use crate::{ + body::{ + Body, + basic_block::{BasicBlockId, BasicBlockVec}, + }, + pass::execution::{ + TargetId, VertexType, + target::{TargetArray, TargetBitSet}, + traversal::{TraversalPath, TraversalPathBitSet}, + }, +}; + +/// The kind of dependency between two islands. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum IslandEdge { + /// The source island must complete before the target island begins. + ControlFlow, + /// The target island consumes data produced by the source island. + DataFlow, + /// The target island inherits provided paths from the source island. + Inherits, +} + +/// A computation island backed by a set of basic blocks from the placement solver. +#[derive(Debug)] +pub struct ExecIsland { + members: DenseBitSet, +} + +impl ExecIsland { + /// Returns `true` if `block` belongs to this island. + #[inline] + #[must_use] + pub fn contains(&self, block: BasicBlockId) -> bool { + self.members.contains(block) + } + + /// Iterates over the [`BasicBlockId`]s in this island in ascending order. + #[inline] + pub fn iter(&self) -> impl Iterator + '_ { + self.members.iter() + } +} + +/// Whether an island node represents computation or a data fetch. +#[derive(Debug)] +pub enum IslandKind { + /// An island containing basic blocks that execute on its assigned target. + Exec(ExecIsland), + /// A synthetic island that fetches data from its assigned target. + Data, +} + +/// A node in the island dependency graph. +/// +/// Each node tracks which traversal paths it requires and which it provides. +#[derive(Debug)] +pub struct IslandNode { + kind: IslandKind, + target: TargetId, + requires: TraversalPathBitSet, + provides: TraversalPathBitSet, +} + +impl IslandNode { + /// Returns the kind of this island node. + #[inline] + #[must_use] + pub const fn kind(&self) -> &IslandKind { + &self.kind + } + + /// Returns the execution target this island runs on. + #[inline] + #[must_use] + pub const fn target(&self) -> TargetId { + self.target + } + + /// Returns the set of traversal paths this island requires. + #[inline] + #[must_use] + pub const fn requires(&self) -> TraversalPathBitSet { + self.requires + } + + /// Returns the set of traversal paths this island provides. + #[inline] + #[must_use] + pub const fn provides(&self) -> TraversalPathBitSet { + self.provides + } +} + +/// Directed graph over [`IslandNode`]s connected by [`IslandEdge`]s. +/// +/// Supports indexing by [`IslandId`] and implements [`DirectedGraph`], [`Successors`], +/// and [`Predecessors`]. +pub struct IslandGraph { + vertex: VertexType, + inner: LinkedGraph, + lookup: BasicBlockVec, +} + +impl IslandGraph { + pub fn new( + body: &Body<'_>, + vertex: VertexType, + islands: IslandVec, + ) -> Self { + Self::new_in(body, vertex, islands, Global, Global) + } +} + +impl IslandGraph { + pub fn new_in( + body: &Body<'_>, + vertex: VertexType, + islands: IslandVec, + scratch: S, + alloc: A, + ) -> Self + where + S: Allocator + Clone, + A: Clone, + { + let mut this = Self::build_in(body, vertex, islands, scratch.clone(), alloc); + this.resolve(scratch); + + this + } + + fn build_in( + body: &Body<'_>, + vertex: VertexType, + islands: IslandVec, + scratch: S, + alloc: A, + ) -> Self + where + S: Allocator, + A: Clone, + { + let mut lookup = + BasicBlockVec::from_domain_in(IslandId::MAX, &body.basic_blocks, alloc.clone()); + let mut graph = + LinkedGraph::with_capacity_in(islands.len(), body.basic_blocks.edge_count(), alloc); + let mut matrix = BitMatrix::new_in(islands.len(), islands.len(), scratch); + + for ( + island_id, + Island { + target, + members, + traversals, + }, + ) in islands.into_iter_enumerated() + { + for block_id in &members { + lookup[block_id] = island_id; + } + + let node_id = graph.add_node(IslandNode { + kind: IslandKind::Exec(ExecIsland { members }), + target, + requires: traversals, + provides: TraversalPathBitSet::empty(vertex), + }); + debug_assert_eq!(node_id.as_u32(), island_id.as_u32()); + } + + for block_id in body.basic_blocks.ids() { + let source = lookup[block_id]; + + for successor in body.basic_blocks.successors(block_id) { + let target = lookup[successor]; + + if source == target || matrix.contains(source, target) { + continue; + } + + matrix.insert(source, target); + graph.add_edge( + NodeId::new(source.as_u32()), + NodeId::new(target.as_u32()), + IslandEdge::ControlFlow, + ); + } + } + + Self { + vertex, + inner: graph, + lookup, + } + } + + /// Resolves all island requirements and inserts data islands where needed. + fn resolve(&mut self, scratch: S) + where + S: Allocator + Clone, + { + let mut topo: Vec = self + .inner + .depth_first_forest_post_order() + .map(|node| IslandId::new(node.as_u32())) + .collect_in(scratch.clone()); + topo.reverse(); + + let start = self.lookup[BasicBlockId::START]; + + RequirementResolver::new(self, start, scratch).resolve(&topo); + } +} + +impl DirectedGraph for IslandGraph { + type Edge<'this> + = &'this Edge + where + Self: 'this; + type EdgeId = EdgeId; + type Node<'this> + = (IslandId, &'this IslandNode) + where + Self: 'this; + type NodeId = IslandId; + + fn node_count(&self) -> usize { + self.inner.node_count() + } + + fn edge_count(&self) -> usize { + self.inner.edge_count() + } + + fn iter_nodes(&self) -> impl ExactSizeIterator> + DoubleEndedIterator { + self.inner + .iter_nodes() + .map(|node| (IslandId::from_u32(node.id().as_u32()), &node.data)) + } + + fn iter_edges(&self) -> impl ExactSizeIterator> + DoubleEndedIterator { + self.inner.iter_edges() + } +} + +impl Successors for IslandGraph { + type SuccIter<'this> + = impl Iterator + 'this + where + Self: 'this; + + fn successors(&self, node: Self::NodeId) -> Self::SuccIter<'_> { + self.inner + .successors(NodeId::new(node.as_u32())) + .map(|node| IslandId::new(node.as_u32())) + } +} + +impl Predecessors for IslandGraph { + type PredIter<'this> + = impl Iterator + 'this + where + Self: 'this; + + fn predecessors(&self, node: Self::NodeId) -> Self::PredIter<'_> { + self.inner + .predecessors(NodeId::new(node.as_u32())) + .map(|node| IslandId::new(node.as_u32())) + } +} + +impl IndexMut for IslandGraph { + fn index_mut(&mut self, index: IslandId) -> &mut Self::Output { + &mut self.inner[NodeId::new(index.as_u32())].data + } +} + +impl Index for IslandGraph { + type Output = IslandNode; + + fn index(&self, index: IslandId) -> &Self::Output { + &self.inner[NodeId::new(index.as_u32())].data + } +} + +/// Returns the nearest strict dominator of `node` assigned to `target`, along with its +/// depth (0 = immediate dominator). +fn find_dominator_by_target( + dominators: &Dominators, + graph: &IslandGraph, + node: IslandId, + target: TargetId, +) -> Option<(IslandId, usize)> { + let mut current = node; + let mut depth = 0; + + loop { + let parent = dominators.immediate_dominator(current)?; + if parent == current { + return None; + } + + if graph[parent].target == target { + return Some((parent, depth)); + } + + current = parent; + depth += 1; + } +} + +/// Resolves data requirements for all islands, inserting data islands where needed. +struct RequirementResolver<'graph, A: Allocator, S: Allocator> { + graph: &'graph mut IslandGraph, + dominators: Dominators, + merged_provides: IslandVec, + data_providers: TargetArray>, +} + +impl<'graph, A: Allocator, S: Allocator + Clone> RequirementResolver<'graph, A, S> { + fn new(graph: &'graph mut IslandGraph, start: IslandId, scratch: S) -> Self { + let dominators = dominators(&*graph, start); + let merged_provides = IslandVec::from_elem_in( + TraversalPathBitSet::empty(graph.vertex), + graph.node_count(), + scratch, + ); + + Self { + graph, + dominators, + merged_provides, + data_providers: TargetArray::from_elem(None), + } + } + + fn resolve(mut self, topo: &[IslandId]) { + // Iterate in reverse for topological order + for &island_id in topo { + let island = &self.graph[island_id]; + let IslandKind::Exec(_) = &island.kind else { + debug_panic!("data islands should not be present during requirement resolution"); + continue; + }; + + self.inherit_provides(island_id); + self.resolve_island(island_id); + } + } + + /// If a same-target dominator exists, inherits its provided paths via an `Inherits` edge. + fn inherit_provides(&mut self, island_id: IslandId) { + let island_target = self.graph[island_id].target; + + if let Some((parent, _)) = + find_dominator_by_target(&self.dominators, self.graph, island_id, island_target) + { + self.merged_provides.copy_within(parent..=parent, island_id); + self.graph.inner.add_edge( + NodeId::from_u32(parent.as_u32()), + NodeId::from_u32(island_id.as_u32()), + IslandEdge::Inherits, + ); + } + } + + /// Resolves requirements for a single island. + /// + /// Paths whose origin includes this island's own target are self-provided and need no + /// external provider. All other paths are resolved via dominator walk or data island. + fn resolve_island(&mut self, island_id: IslandId) { + let requires = self.graph[island_id].requires; + if requires.is_empty() { + return; + } + + let island_target = self.graph[island_id].target; + + // Cache dominator lookups per target to avoid repeated walks. + let mut cached = TargetArray::from_elem(None); + + for requirement in &requires { + let origin = requirement.origin(); + debug_assert!(!origin.is_empty()); + + // If this island runs on an origin backend for the path, the data is + // locally available and doesn't need to be provided to downstream consumers. + if origin.contains(island_target) { + continue; + } + + let provider = self.find_best_provider(&mut cached, island_id, origin); + let provider = provider.unwrap_or_else(|| self.get_or_create_data_island(origin)); + + self.register_path(provider, island_id, requirement); + } + } + + /// Finds the nearest dominating provider among the potential origin targets. + #[expect(clippy::option_option)] + fn find_best_provider( + &self, + cached: &mut TargetArray>>, + island_id: IslandId, + origin: TargetBitSet, + ) -> Option { + origin + .iter() + .filter_map(|target| { + *cached[target].get_or_insert_with(|| { + find_dominator_by_target(&self.dominators, self.graph, island_id, target) + }) + }) + .min_by_key(|&(_, depth)| depth) + .map(|(provider, _)| provider) + } + + /// Registers a path as provided by `provider` for consumption by `consumer`. + fn register_path(&mut self, provider: IslandId, consumer: IslandId, path: TraversalPath) { + if !self.merged_provides[provider].contains(path) { + self.merged_provides[provider].insert(path); + self.graph[provider].provides.insert(path); + } + + let provider_node = NodeId::from_u32(provider.as_u32()); + let consumer_node = NodeId::from_u32(consumer.as_u32()); + + let exists = self + .graph + .inner + .outgoing_edges(provider_node) + .any(|edge| edge.target() == consumer_node && edge.data == IslandEdge::DataFlow); + + if !exists { + self.graph + .inner + .add_edge(provider_node, consumer_node, IslandEdge::DataFlow); + } + } + + /// Returns an existing data island for the given origin backend, or creates one. + fn get_or_create_data_island(&mut self, origin: TargetBitSet) -> IslandId { + // Reuse an existing data island if any origin target already has one. + if let Some(provider) = origin.iter().find_map(|target| self.data_providers[target]) { + return provider; + } + + // `TargetId` is ordered by backend priority, so the first set bit gives us the best target + // (note that interpreter is technically first, but never a target for data). + let target = origin.first_set().unwrap_or_else(|| unreachable!()); + + let node = self.graph.inner.add_node(IslandNode { + kind: IslandKind::Data, + target, + requires: TraversalPathBitSet::empty(self.graph.vertex), + provides: TraversalPathBitSet::empty(self.graph.vertex), + }); + let provider = IslandId::from_u32(node.as_u32()); + self.data_providers[target] = Some(provider); + self.merged_provides + .push(TraversalPathBitSet::empty(self.graph.vertex)); + + provider + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs new file mode 100644 index 00000000000..800a9ff063c --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs @@ -0,0 +1,579 @@ +//! Tests for island dependency graph construction and requirement resolution. +#![expect(clippy::min_ident_chars)] + +use alloc::alloc::Global; +use core::assert_matches; + +use hashql_core::{ + graph::DirectedGraph as _, heap::Heap, id::Id as _, symbol::sym, + r#type::environment::Environment, +}; + +use crate::{ + body::{Body, basic_block::BasicBlockVec}, + builder::body, + intern::Interner, + pass::execution::{ + VertexType, + island::{ + IslandId, IslandPlacement, + graph::{IslandEdge, IslandGraph, IslandKind}, + }, + target::TargetId, + traversal::EntityPath, + }, +}; + +pub(crate) fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { + let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), Global); + for &target in assignments { + targets.push(target); + } + targets +} + +pub(crate) fn build_graph(body: &Body<'_>, targets: &[TargetId]) -> IslandGraph { + let target_vec = make_targets(targets); + let islands = IslandPlacement::new().run(body, VertexType::Entity, &target_vec, Global); + IslandGraph::new_in(body, VertexType::Entity, islands, Global, Global) +} + +fn find_island(graph: &IslandGraph, target: TargetId) -> IslandId { + (0..graph.node_count()) + .map(IslandId::from_usize) + .find(|&island_id| graph[island_id].target() == target) + .unwrap_or_else(|| panic!("no island with target {target:?}")) +} + +fn find_data_island(graph: &IslandGraph, target: TargetId) -> IslandId { + (0..graph.node_count()) + .map(IslandId::from_usize) + .find(|&island_id| { + matches!(graph[island_id].kind(), IslandKind::Data) + && graph[island_id].target() == target + }) + .unwrap_or_else(|| panic!("no data island with target {target:?}")) +} + +fn has_edge( + graph: &IslandGraph, + source: IslandId, + target: IslandId, + kind: IslandEdge, +) -> bool { + graph.iter_edges().any(|edge| { + edge.source().as_u32() == source.as_u32() + && edge.target().as_u32() == target.as_u32() + && edge.data == kind + }) +} + +/// Single Postgres island accessing properties: no fetch island needed because the island +/// itself is on the origin backend for that path. +#[test] +fn single_island_no_fetch() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj props = vertex.properties: ?; + + bb0() { + val = load props; + return val; + } + }); + + let graph = build_graph(&body, &[TargetId::Postgres]); + let island = &graph[IslandId::new(0)]; + + assert_eq!(graph.node_count(), 1); + assert_eq!(graph.edge_count(), 0); + assert_matches!(island.kind(), IslandKind::Exec(_)); + assert_eq!(island.target(), TargetId::Postgres); + + // Properties are locally available on Postgres (origin), so provides is empty. + assert!( + island + .provides() + .as_entity() + .expect("entity vertex") + .is_empty() + ); +} + +/// Postgres island followed by Interpreter island that needs properties. +/// Properties originate from Postgres, so the Interpreter island gets a `DataFlow` edge +/// from the Postgres island: no fetch island needed. +#[test] +fn data_edge_from_predecessor() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val1: ?, val2: ?; + @proj props = vertex.properties: ?; + + bb0() { + val1 = load props; + goto bb1(); + }, + bb1() { + val2 = load props; + return val2; + } + }); + + let graph = build_graph(&body, &[TargetId::Postgres, TargetId::Interpreter]); + + assert_eq!(graph.node_count(), 2); + + let postgres = find_island(&graph, TargetId::Postgres); + let interpreter = find_island(&graph, TargetId::Interpreter); + + assert_matches!(graph[postgres].kind(), IslandKind::Exec(_)); + assert_matches!(graph[interpreter].kind(), IslandKind::Exec(_)); + + // Postgres self-provides exactly Properties (it's the origin). + let provides = graph[postgres].provides(); + let provides = provides.as_entity().expect("entity vertex"); + assert_eq!(provides.len(), 1); + assert!(provides.contains(EntityPath::Properties)); + + // ControlFlow edge: Postgres → Interpreter (CFG successor). + assert!(has_edge( + &graph, + postgres, + interpreter, + IslandEdge::ControlFlow + )); + + // DataFlow edge: Postgres → Interpreter (Postgres provides Properties to Interpreter). + assert!(has_edge( + &graph, + postgres, + interpreter, + IslandEdge::DataFlow + )); + + assert_eq!(graph.edge_count(), 2); +} + +/// Interpreter island needs embedding data but has no Embedding predecessor. +/// A data island for Embedding should be inserted. +#[test] +fn fetch_island_for_unsatisfied_requirement() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + return val; + } + }); + + let graph = build_graph(&body, &[TargetId::Interpreter]); + + assert_eq!(graph.node_count(), 2); + + let exec = find_island(&graph, TargetId::Interpreter); + let data = find_data_island(&graph, TargetId::Embedding); + + assert_matches!(graph[exec].kind(), IslandKind::Exec(_)); + assert_matches!(graph[data].kind(), IslandKind::Data); + + // DataFlow edge: Embedding data island → Interpreter exec island. + assert!(has_edge(&graph, data, exec, IslandEdge::DataFlow)); + + // Data island provides exactly Vectors. + let provides = graph[data].provides(); + let provides = provides.as_entity().expect("entity vertex"); + assert_eq!(provides.len(), 1); + assert!(provides.contains(EntityPath::Vectors)); +} + +/// Diamond CFG: Postgres branches to Interpreter and Embedding, both merge into a +/// final Postgres island. The Embedding path is only available on one branch, so the +/// final Postgres island needs a data island for embedding data. +#[test] +fn diamond_branch_needs_fetch() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + val: ?, cond: Bool; + @proj props = vertex.properties: ?, + enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + val = load props; + goto bb3(); + }, + bb2() { + val = load vecs; + goto bb3(); + }, + bb3() { + val = load vecs; + return val; + } + }); + + // bb0=Postgres, bb1=Interpreter, bb2=Embedding, bb3=Postgres + let graph = build_graph( + &body, + &[ + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Embedding, + TargetId::Postgres, + ], + ); + + // 4 exec islands + 1 data island for Embedding. + assert_eq!(graph.node_count(), 5); + + let data = find_data_island(&graph, TargetId::Embedding); + assert_matches!(graph[data].kind(), IslandKind::Data); + + // The data island provides exactly Vectors. + let provides = graph[data].provides(); + let provides = provides.as_entity().expect("entity vertex"); + assert_eq!(provides.len(), 1); + assert!(provides.contains(EntityPath::Vectors)); + + // bb3 (second Postgres island) consumes Vectors from the data island. + // Find both Postgres exec islands: bb0 is the entry, bb3 is the merge point. + let postgres_islands: Vec<_> = (0..graph.node_count()) + .map(IslandId::from_usize) + .filter(|&island_id| { + graph[island_id].target() == TargetId::Postgres + && matches!(graph[island_id].kind(), IslandKind::Exec(_)) + }) + .collect(); + assert_eq!(postgres_islands.len(), 2); + + // The merge-point Postgres island (bb3) should have a DataFlow edge from the data island. + let merge_postgres = postgres_islands + .iter() + .find(|&&island_id| has_edge(&graph, data, island_id, IslandEdge::DataFlow)) + .expect("data island should have DataFlow edge to a Postgres island"); + + // bb3 also inherits from bb0 (both Postgres, bb0 dominates bb3). + let entry_postgres = postgres_islands + .iter() + .find(|&&island_id| island_id != *merge_postgres) + .expect("two distinct Postgres islands"); + assert!(has_edge( + &graph, + *entry_postgres, + *merge_postgres, + IslandEdge::Inherits, + )); +} + +/// Inherits edge: when two same-target islands are in a dominator relationship, +/// the child inherits provided paths from the parent. +#[test] +fn inherits_edge_same_target_dominator() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val1: ?, val2: ?; + @proj props = vertex.properties: ?; + + bb0() { + val1 = load props; + goto bb1(); + }, + bb1() { + goto bb2(); + }, + bb2() { + val2 = load props; + return val2; + } + }); + + // bb0=Postgres, bb1=Interpreter, bb2=Postgres + // bb0 dominates bb2 (through bb1). Both are Postgres, so bb2 should inherit from bb0. + let graph = build_graph( + &body, + &[ + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Postgres, + ], + ); + + assert_eq!(graph.node_count(), 3); + + // Find the two Postgres exec islands. + let postgres_islands: Vec<_> = (0..graph.node_count()) + .map(IslandId::from_usize) + .filter(|&island_id| graph[island_id].target() == TargetId::Postgres) + .collect(); + assert_eq!(postgres_islands.len(), 2); + + // Determine which is the dominator (bb0) and which is the child (bb2). + // The Inherits edge points dominator → child. + let (dominator, child) = if has_edge( + &graph, + postgres_islands[0], + postgres_islands[1], + IslandEdge::Inherits, + ) { + (postgres_islands[0], postgres_islands[1]) + } else { + assert!(has_edge( + &graph, + postgres_islands[1], + postgres_islands[0], + IslandEdge::Inherits, + )); + (postgres_islands[1], postgres_islands[0]) + }; + + // Both islands access Properties locally (origin backend), so neither needs to + // provide it to anyone. provides is empty on both. + assert!( + graph[dominator] + .provides() + .as_entity() + .expect("entity vertex") + .is_empty() + ); + assert!( + graph[child] + .provides() + .as_entity() + .expect("entity vertex") + .is_empty() + ); +} + +/// Two Interpreter islands both need Vectors (origin: Embedding). Only one data island +/// should be created, and both consumers get a `DataFlow` edge from it. +#[test] +fn data_island_reused_across_consumers() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + goto bb1(); + }, + bb1() { + goto bb2(); + }, + bb2() { + val = load vecs; + return val; + } + }); + + // bb0=Interpreter, bb1=Postgres (separates the two Interpreter islands), bb2=Interpreter. + let graph = build_graph( + &body, + &[ + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Interpreter, + ], + ); + + // Two Interpreter exec islands + one Postgres exec island + exactly one data island. + let data_islands: Vec<_> = (0..graph.node_count()) + .map(IslandId::from_usize) + .filter(|&island_id| matches!(graph[island_id].kind(), IslandKind::Data)) + .collect(); + assert_eq!(data_islands.len(), 1); + + let data = data_islands[0]; + assert_eq!(graph[data].target(), TargetId::Embedding); + + // Both Interpreter exec islands get a DataFlow edge from the single data island. + let interpreter_islands: Vec<_> = (0..graph.node_count()) + .map(IslandId::from_usize) + .filter(|&island_id| { + matches!(graph[island_id].kind(), IslandKind::Exec(_)) + && graph[island_id].target() == TargetId::Interpreter + }) + .collect(); + assert_eq!(interpreter_islands.len(), 2); + + for &exec in &interpreter_islands { + assert!( + has_edge(&graph, data, exec, IslandEdge::DataFlow), + "expected DataFlow edge from data island to exec island {exec:?}", + ); + } +} + +/// Entry island on a non-origin backend: the dominator walk reaches the root without +/// finding an origin, so a data island is created. +#[test] +fn entry_island_needs_fetch() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj props = vertex.properties: ?; + + bb0() { + val = load props; + return val; + } + }); + + // Entry island is Interpreter, but Properties origin is Postgres. + // Dominator walk from the entry hits the root (itself), so no dominator found. + let graph = build_graph(&body, &[TargetId::Interpreter]); + + assert_eq!(graph.node_count(), 2); + + let exec = find_island(&graph, TargetId::Interpreter); + let data = find_data_island(&graph, TargetId::Postgres); + + assert!(has_edge(&graph, data, exec, IslandEdge::DataFlow)); + + let provides = graph[data].provides(); + let provides = provides.as_entity().expect("entity vertex"); + assert_eq!(provides.len(), 1); + assert!(provides.contains(EntityPath::Properties)); +} + +/// `DataFlow` edge dedup: an Interpreter island accesses two paths that both originate +/// from Postgres (`Properties` and `EntityUuid`). Both resolve to the same Postgres +/// dominator, but only one `DataFlow` edge should exist between them. +#[test] +fn data_flow_edge_dedup() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val1: ?, val2: ?; + @proj props = vertex.properties: ?, + meta = vertex.metadata: ?, + rec = meta.record_id: ?, + eid = rec.entity_id: ?, + uuid = eid.entity_uuid: ?; + + bb0() { + val1 = load props; + goto bb1(); + }, + bb1() { + val1 = load props; + val2 = load uuid; + return val1; + } + }); + + let graph = build_graph(&body, &[TargetId::Postgres, TargetId::Interpreter]); + assert_eq!(graph.node_count(), 2); + + let postgres = find_island(&graph, TargetId::Postgres); + let interpreter = find_island(&graph, TargetId::Interpreter); + + // Postgres provides both paths to the Interpreter island. + let provides = graph[postgres].provides(); + let provides = provides.as_entity().expect("entity vertex"); + assert!(provides.contains(EntityPath::Properties)); + assert!(provides.contains(EntityPath::EntityUuid)); + assert_eq!(provides.len(), 2); + + // Exactly two edges: one ControlFlow and one DataFlow, both Postgres -> Interpreter. + // Without dedup, the two paths would produce two DataFlow edges to the same consumer. + assert_eq!(graph.edge_count(), 2); + assert!(has_edge( + &graph, + postgres, + interpreter, + IslandEdge::ControlFlow + )); + assert!(has_edge( + &graph, + postgres, + interpreter, + IslandEdge::DataFlow + )); +} + +/// Control flow edge dedup: two blocks in the same island both have a successor in +/// another island, but only one `ControlFlow` edge should be created. +#[test] +fn control_flow_edge_dedup() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + val: ?, cond: Bool; + + bb0() { + cond = load true; + goto bb1(); + }, + bb1() { + if cond then bb2() else bb2(); + }, + bb2() { + val = load true; + return val; + } + }); + + // bb0 and bb1 are both Interpreter (same island). bb2 is Postgres. + // bb1→bb2 appears twice (both arms of the branch), but the BitMatrix dedup + // ensures only one ControlFlow edge from the Interpreter island to Postgres. + let graph = build_graph( + &body, + &[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Postgres, + ], + ); + + assert_eq!(graph.node_count(), 2); + assert_eq!(graph.edge_count(), 1); + + let interpreter = find_island(&graph, TargetId::Interpreter); + let postgres = find_island(&graph, TargetId::Postgres); + + assert!(has_edge( + &graph, + interpreter, + postgres, + IslandEdge::ControlFlow + )); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs index be767150b88..661ba8f5302 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs @@ -29,6 +29,8 @@ use crate::{ visit::Visitor as _, }; +pub(crate) mod graph; +pub(crate) mod schedule; #[cfg(test)] mod tests; diff --git a/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs new file mode 100644 index 00000000000..26099f9396d --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs @@ -0,0 +1,128 @@ +#[cfg(test)] +mod tests; + +use alloc::{alloc::Global, collections::VecDeque}; +use core::{alloc::Allocator, cmp}; + +use hashql_core::graph::{DirectedGraph as _, Predecessors as _, Successors as _}; + +use super::{IslandId, IslandVec, graph::IslandGraph}; + +/// An island with its assigned parallelism level. +/// +/// Islands at the same level have no dependencies between them and can execute concurrently. +/// Level 0 contains islands with no predecessors. +#[derive(Debug, Copy, Clone)] +pub struct ScheduledIsland { + /// The island this entry refers to. + pub island: IslandId, + /// The parallelism level. All islands at the same level are independent. + pub level: u32, +} + +/// Topological ordering of islands with parallelism levels. +/// +/// Produced by [`IslandGraph::schedule`]. Each island appears exactly once, +/// ordered so that all predecessors of an island appear before it. +#[derive(Debug)] +pub struct IslandSchedule { + entries: Vec, +} + +impl IslandSchedule { + /// Returns the scheduled entries in topological order. + #[inline] + #[must_use] + pub fn entries(&self) -> &[ScheduledIsland] { + &self.entries + } + + /// Returns the number of scheduled islands. + #[inline] + #[must_use] + pub const fn len(&self) -> usize { + self.entries.len() + } + + /// Returns `true` if the schedule contains no islands. + #[inline] + #[must_use] + pub const fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Iterates over the scheduled entries in topological order. + #[inline] + pub fn iter(&self) -> impl ExactSizeIterator { + self.entries.iter() + } + + #[inline] + pub fn level_count(&self) -> usize { + self.entries + .last() + .map_or(0, |entry| entry.level as usize + 1) + } + + #[inline] + pub fn levels(&self) -> impl Iterator { + self.entries.chunk_by(|lhs, rhs| lhs.level == rhs.level) + } +} + +impl IslandGraph { + #[must_use] + pub fn schedule(&self) -> IslandSchedule { + self.schedule_in(Global, Global) + } +} + +impl IslandGraph { + /// Computes a topological schedule with level assignment for parallelism. + /// + /// Each island is assigned the lowest level such that all its predecessors are at + /// strictly lower levels. Islands at the same level have no direct dependencies and + /// can execute concurrently. + #[expect(clippy::cast_possible_truncation)] + pub fn schedule_in(&self, scratch: S, alloc: A) -> IslandSchedule + where + S: Allocator + Clone, + { + let node_count = self.node_count(); + + let mut in_degree = IslandVec::from_elem_in(0_u32, node_count, scratch.clone()); + let mut levels = IslandVec::from_elem_in(0_u32, node_count, scratch.clone()); + + for (island_id, _) in self.iter_nodes() { + in_degree[island_id] = self.predecessors(island_id).count() as u32; + } + + let mut queue: VecDeque = VecDeque::new_in(scratch); + for (island_id, _) in self.iter_nodes() { + if in_degree[island_id] == 0 { + queue.push_back(island_id); + } + } + + let mut entries = Vec::with_capacity_in(node_count, alloc); + + while let Some(island_id) = queue.pop_front() { + entries.push(ScheduledIsland { + island: island_id, + level: levels[island_id], + }); + + for successor in self.successors(island_id) { + levels[successor] = cmp::max(levels[successor], levels[island_id] + 1); + in_degree[successor] -= 1; + + if in_degree[successor] == 0 { + queue.push_back(successor); + } + } + } + + entries.sort_by_key(|entry| entry.level); + IslandSchedule { entries } + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs new file mode 100644 index 00000000000..5439d7da89a --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs @@ -0,0 +1,232 @@ +//! Tests for island schedule computation. +#![expect(clippy::min_ident_chars)] + +use hashql_core::{ + graph::DirectedGraph as _, heap::Heap, symbol::sym, r#type::environment::Environment, +}; + +use crate::{ + builder::body, + intern::Interner, + pass::execution::{ + island::graph::{IslandKind, tests::build_graph}, + target::TargetId, + }, +}; + +/// Data islands should be at a lower level than their consumers. +#[test] +fn data_island_before_consumer() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + return val; + } + }); + + let graph = build_graph(&body, &[TargetId::Interpreter]); + let schedule = graph.schedule(); + let entries = schedule.entries(); + + assert_eq!(entries.len(), 2); + + let exec_entry = entries + .iter() + .find(|entry| matches!(graph[entry.island].kind(), IslandKind::Exec(_))) + .expect("should have an exec island"); + let data_entry = entries + .iter() + .find(|entry| matches!(graph[entry.island].kind(), IslandKind::Data)) + .expect("should have a data island"); + + assert_eq!(data_entry.level, 0); + assert_eq!(exec_entry.level, 1); +} + +/// Every island in the graph appears exactly once in the schedule. +#[test] +fn covers_all_nodes() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + val: ?, cond: Bool; + @proj props = vertex.properties: ?, + enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + val = load props; + goto bb3(); + }, + bb2() { + val = load vecs; + goto bb3(); + }, + bb3() { + val = load vecs; + return val; + } + }); + + let graph = build_graph( + &body, + &[ + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Embedding, + TargetId::Postgres, + ], + ); + + let schedule = graph.schedule(); + assert_eq!(schedule.entries().len(), graph.node_count()); +} + +/// Two levels: data island at level 0, exec island at level 1. +#[test] +fn level_count_with_data_dependency() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + return val; + } + }); + + let graph = build_graph(&body, &[TargetId::Interpreter]); + let schedule = graph.schedule(); + + assert_eq!(schedule.level_count(), 2); + assert_eq!(schedule.levels().count(), 2); +} + +/// Each level slice contains islands with the same level, and levels are ascending. +#[test] +fn levels_are_contiguous_and_ascending() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + val: ?, cond: Bool; + @proj props = vertex.properties: ?, + enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + val = load props; + goto bb3(); + }, + bb2() { + val = load vecs; + goto bb3(); + }, + bb3() { + val = load vecs; + return val; + } + }); + + let graph = build_graph( + &body, + &[ + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Embedding, + TargetId::Postgres, + ], + ); + + let schedule = graph.schedule(); + let mut prev_level = None; + let mut total_entries = 0; + + for level_slice in schedule.levels() { + assert!(!level_slice.is_empty()); + + let expected_level = level_slice[0].level; + for entry in level_slice { + assert_eq!(entry.level, expected_level, "mixed levels within a slice"); + } + + if let Some(prev) = prev_level { + assert!(expected_level > prev, "levels not strictly ascending"); + } + prev_level = Some(expected_level); + total_entries += level_slice.len(); + } + + assert_eq!( + total_entries, + schedule.len(), + "levels must cover all entries" + ); +} + +/// Data islands appear in an earlier level slice than their exec consumers. +#[test] +fn levels_order_data_before_exec() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + return val; + } + }); + + let graph = build_graph(&body, &[TargetId::Interpreter]); + let schedule = graph.schedule(); + + let level_slices: Vec<_> = schedule.levels().collect(); + assert_eq!(level_slices.len(), 2); + + for entry in level_slices[0] { + assert_eq!(entry.level, 0); + assert!( + matches!(graph[entry.island].kind(), IslandKind::Data), + "level 0 should contain the data island" + ); + } + + for entry in level_slices[1] { + assert_eq!(entry.level, 1); + assert!( + matches!(graph[entry.island].kind(), IslandKind::Exec(_)), + "level 1 should contain the exec island" + ); + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs index d24ef1b7f8e..542013c8cd6 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs @@ -343,6 +343,7 @@ fn island_joins_traversal_paths() { let island = &islands[IslandId::new(0)]; let traversal_paths = island.traversals(); let joined = traversal_paths.as_entity().expect("entity vertex"); + assert_eq!(joined.len(), 2); assert!(joined.contains(EntityPath::Properties)); assert!(joined.contains(EntityPath::ProvenanceEdition)); } diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index f1920d860c8..2c8a56fbb1a 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -36,7 +36,11 @@ use self::{ }; pub use self::{ cost::{ApproxCost, Cost}, - island::{Island, IslandId, IslandVec}, + island::{ + Island, IslandId, IslandVec, + graph::{ExecIsland, IslandEdge, IslandGraph, IslandKind, IslandNode}, + schedule::{IslandSchedule, ScheduledIsland}, + }, placement::error::PlacementDiagnosticCategory, target::TargetId, vertex::VertexType, diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/access.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/access.rs index 581408bf766..5c703ef7aa5 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/access.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/access.rs @@ -1,11 +1,17 @@ +/// How a path maps to its backend storage. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub(crate) enum AccessMode { + /// The path corresponds to a single column or embedding slot. Direct, + /// The path is a composite whose children are the actual storage locations. Composite, } +/// Backend and access mode for a resolved entity field path. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub(crate) enum Access { + /// Served by the Postgres graph store. Postgres(AccessMode), + /// Served by the embedding backend. Embedding(AccessMode), } diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs index fbfb0a1b0b6..d84d03d0994 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs @@ -12,12 +12,19 @@ use crate::{ visit::{self, Visitor}, }; +/// Outcome of resolving a vertex access to a storage path. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub(crate) enum TraversalResult { + /// The access resolved to a specific storage location. Path(TraversalPath), + /// The access could not be resolved; full entity hydration is required. Complete, } +/// MIR visitor that resolves vertex field accesses to [`TraversalResult`]s. +/// +/// Walks a body's places, finds uses of [`Local::VERTEX`], resolves the projection chain +/// via [`EntityPath::resolve`], and calls `on_traversal` with the [`Location`] and result. // TODO: Each consumer (statement placement per target, island placement) resolves traversal paths // independently. Consider caching resolved paths per body to avoid redundant work. // See: https://linear.app/hash/issue/BE-435 @@ -27,6 +34,8 @@ pub(crate) struct TraversalAnalysisVisitor { } impl TraversalAnalysisVisitor { + /// Creates a visitor for the given vertex type, calling `on_traversal` for each resolved + /// vertex access. pub(crate) const fn new(vertex: VertexType, on_traversal: F) -> Self where F: FnMut(Location, TraversalResult), diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index f184abf0f0b..575f9c91c96 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -23,6 +23,7 @@ use crate::{ }, }; +/// Shorthand for accessing a compile-time [`ConstantSymbol`] from the [`sym`] module. macro_rules! sym { ($($sym:tt)::*) => { sym::$($sym)::*::CONST @@ -172,6 +173,9 @@ impl TransferCostConfig { } } +/// Integer type backing the [`FiniteBitSet`] for [`EntityPath`]. +/// +/// Must have at least as many bits as there are [`EntityPath`] variants (asserted below). type FiniteBitSetWidth = u32; const _: () = { assert!( @@ -181,6 +185,10 @@ const _: () = { }; impl EntityPath { + /// Resolves a sequence of field projections to an [`EntityPath`]. + /// + /// Returns the resolved path together with the number of projections consumed, or `None` + /// if the projections do not map to any known storage location. #[must_use] pub fn resolve(projections: &[Projection<'_>]) -> Option<(Self, usize)> { resolve(projections) @@ -370,6 +378,7 @@ impl EntityPath { } } + /// Returns `true` if this path targets a JSONB column that allows arbitrary sub-paths. const fn is_jsonb(self) -> bool { matches!( self, @@ -383,6 +392,10 @@ impl EntityPath { } } +/// Paths that have at least one ancestor composite, collected at compile time. +/// +/// Used to compute [`EntityPathBitSet::TOP`] by removing children that are subsumed by +/// their ancestor composites. const HAS_ANCESTORS: [EntityPath; HAS_ANCESTOR_COUNT] = { let mut out = [EntityPath::Archived; HAS_ANCESTOR_COUNT]; @@ -401,6 +414,7 @@ const HAS_ANCESTORS: [EntityPath; HAS_ANCESTOR_COUNT] = { out }; +/// Number of [`EntityPath`] variants that have at least one ancestor composite. const HAS_ANCESTOR_COUNT: usize = { let mut count = 0; let mut index = 0; @@ -417,6 +431,11 @@ const HAS_ANCESTOR_COUNT: usize = { count }; +/// Bitset of [`EntityPath`] values with composite swallowing. +/// +/// Insertions respect the composite hierarchy: inserting a composite removes its children, +/// and inserting a child when its ancestor is already present is a no-op. The lattice top +/// contains exactly the root-level and childless paths (18 of 25 variants). #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct EntityPathBitSet(FiniteBitSet); @@ -437,6 +456,7 @@ impl EntityPathBitSet { Self(set) }; + /// Creates an empty bitset with no paths set. #[expect(clippy::cast_possible_truncation)] #[must_use] pub const fn new_empty() -> Self { @@ -464,6 +484,15 @@ impl EntityPathBitSet { } } + /// Returns `true` if `path` is present in the bitset. + #[must_use] + pub(crate) const fn contains(self, path: EntityPath) -> bool { + self.0.contains(path) + } + + /// Re-applies composite swallowing after a raw union. + /// + /// Removes any path whose ancestor composite is also present in the set. fn normalize(&mut self) { for path in &self.0 { for &ancestor in path.ancestors() { @@ -540,6 +569,10 @@ impl const core::ops::Deref for EntityPathBitSet { } } +/// Extracts the field name from the projection at `*index`, advancing the index on success. +/// +/// Returns `None` if the projection is not a [`FieldByName`](ProjectionKind::FieldByName) +/// or if `*index` is out of bounds. #[inline] fn project(projections: &[Projection<'_>], index: &mut usize) -> Option { let projection = projections.get(*index).and_then(|projection| { diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index 809f1b57d53..4b89cccb71e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -38,11 +38,13 @@ pub struct TraversalLattice { } impl TraversalLattice { + /// Creates a lattice for the given vertex type. #[must_use] pub const fn new(vertex: VertexType) -> Self { Self { vertex } } + /// Returns the vertex type this lattice operates over. #[must_use] pub const fn vertex(self) -> VertexType { self.vertex @@ -124,6 +126,14 @@ impl TraversalPathBitSet { } } + /// Returns `true` if `path` is present in the bitset. + #[must_use] + pub const fn contains(self, path: TraversalPath) -> bool { + match (self, path) { + (Self::Entity(bitset), TraversalPath::Entity(path)) => bitset.contains(path), + } + } + /// Inserts all possible paths into the set. #[inline] pub const fn insert_all(&mut self) { @@ -132,6 +142,7 @@ impl TraversalPathBitSet { } } + /// Iterates over the paths in this bitset. #[must_use] #[inline] pub fn iter(&self) -> impl ExactSizeIterator {