From a6cc82c11c8dd46b88c2311c543c805961e67021 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 3 Mar 2026 19:09:34 +0100 Subject: [PATCH 1/9] feat: core logic --- .../hashql/core/src/id/bit_vec/finite.rs | 69 +++ libs/@local/hashql/core/src/id/vec.rs | 92 ++- libs/@local/hashql/mir/src/lib.rs | 5 +- .../src/pass/execution/island/graph/mod.rs | 580 ++++++++++++++++++ .../src/pass/execution/island/graph/tests.rs | 257 ++++++++ .../mir/src/pass/execution/island/graph2.rs | 371 +++++++++++ .../mir/src/pass/execution/island/mod.rs | 2 + .../hashql/mir/src/pass/execution/mod.rs | 8 +- .../src/pass/execution/traversal/entity.rs | 4 + .../mir/src/pass/execution/traversal/mod.rs | 6 + 10 files changed, 1389 insertions(+), 5 deletions(-) create mode 100644 libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs create mode 100644 libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs create mode 100644 libs/@local/hashql/mir/src/pass/execution/island/graph2.rs diff --git a/libs/@local/hashql/core/src/id/bit_vec/finite.rs b/libs/@local/hashql/core/src/id/bit_vec/finite.rs index 48472ffc9bc..9ccfddd22ad 100644 --- a/libs/@local/hashql/core/src/id/bit_vec/finite.rs +++ b/libs/@local/hashql/core/src/id/bit_vec/finite.rs @@ -315,6 +315,30 @@ impl FiniteBitSet { self.store = !self.store & mask; } + /// Returns the first set bit, or [`None`] if the set is empty. + /// + /// # Examples + /// + /// ```ignore + /// use hashql_core::id::bit_vec::FiniteBitSet; + /// + /// let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + /// assert_eq!(set.first_set(), None); + /// + /// set.insert(MyId::from_usize(3)); + /// set.insert(MyId::from_usize(5)); + /// assert_eq!(set.first_set(), Some(MyId::from_usize(3))); + /// ``` + #[inline] + #[must_use] + pub fn first_set(&self) -> Option { + if self.is_empty() { + return None; + } + + Some(I::from_u32(self.store.trailing_zeros())) + } + /// Returns an iterator over the indices of set bits. #[inline] pub fn iter(&self) -> FiniteBitIter { @@ -728,6 +752,51 @@ mod tests { assert!(a.is_empty()); } + #[test] + fn first_set_empty() { + let set: FiniteBitSet = FiniteBitSet::new_empty(8); + assert_eq!(set.first_set(), None); + } + + #[test] + fn first_set_single() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(5)); + assert_eq!(set.first_set(), Some(TestId::from_usize(5))); + } + + #[test] + fn first_set_multiple() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(3)); + set.insert(TestId::from_usize(5)); + set.insert(TestId::from_usize(7)); + assert_eq!(set.first_set(), Some(TestId::from_usize(3))); + } + + #[test] + fn first_set_bit_zero() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(0)); + set.insert(TestId::from_usize(7)); + assert_eq!(set.first_set(), Some(TestId::from_usize(0))); + } + + #[test] + fn first_set_last_bit() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(7)); + assert_eq!(set.first_set(), Some(TestId::from_usize(7))); + } + + #[test] + fn first_set_wide_integral() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(128); + set.insert(TestId::from_usize(100)); + set.insert(TestId::from_usize(120)); + assert_eq!(set.first_set(), Some(TestId::from_usize(100))); + } + #[test] fn negate_empty_set() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(4); diff --git a/libs/@local/hashql/core/src/id/vec.rs b/libs/@local/hashql/core/src/id/vec.rs index 5bed2f54ad0..c7954435eb6 100644 --- a/libs/@local/hashql/core/src/id/vec.rs +++ b/libs/@local/hashql/core/src/id/vec.rs @@ -6,7 +6,7 @@ use core::{ fmt::{self, Debug}, hash::{Hash, Hasher}, marker::PhantomData, - ops::{Deref, DerefMut}, + ops::{Deref, DerefMut, RangeBounds}, slice, }; @@ -39,6 +39,7 @@ pub struct IdVec { } impl IdVec { + /// Returns a reference to the underlying allocator. #[inline] pub fn allocator(&self) -> &A { self.raw.allocator() @@ -125,7 +126,23 @@ where I: Id, A: Allocator, { - /// Creates an `IdVec` from a raw `Vec`. + /// Creates an `IdVec` from a raw [`Vec`]. + /// + /// No validation is performed on the contents. The caller is responsible for ensuring the + /// vector's length stays within the valid range for `I`. + /// + /// # Examples + /// + /// ``` + /// # use hashql_core::id::{IdVec, Id as _, newtype}; + /// # newtype!(struct NodeId(u32 is 0..=100)); + /// let raw = vec!["a", "b", "c"]; + /// let vec = IdVec::::from_raw(raw); + /// + /// assert_eq!(vec.len(), 3); + /// assert_eq!(vec[NodeId::new(0)], "a"); + /// assert_eq!(vec[NodeId::new(2)], "c"); + /// ``` #[inline] pub const fn from_raw(raw: Vec) -> Self { Self { @@ -203,6 +220,25 @@ where Self::from_domain_in(elem, domain, domain.raw.allocator().clone()) } + /// Creates an `IdVec` with the same length as `domain`, deriving each element from the + /// corresponding domain entry. + /// + /// The closure receives each ID and a reference to the domain element at that ID. + /// The allocator is cloned from the domain vector. + /// + /// # Examples + /// + /// ``` + /// # use hashql_core::id::{IdVec, Id as _, newtype}; + /// # newtype!(struct NodeId(u32 is 0..=100)); + /// let names: IdVec = IdVec::from_raw(vec!["alice", "bob", "charlie"]); + /// let lengths = IdVec::::from_domain_derive(|_id, name| name.len(), &names); + /// + /// assert_eq!(lengths.len(), names.len()); + /// assert_eq!(lengths[NodeId::new(0)], 5); // "alice" + /// assert_eq!(lengths[NodeId::new(1)], 3); // "bob" + /// assert_eq!(lengths[NodeId::new(2)], 7); // "charlie" + /// ``` #[inline] pub fn from_domain_derive(func: impl FnMut(I, &U) -> T, domain: &IdVec) -> Self where @@ -225,6 +261,12 @@ where Self::from_raw(alloc::vec::from_elem_in(elem, domain.len(), alloc)) } + /// Creates an `IdVec` with the same length as `domain`, deriving each element from the + /// corresponding domain entry, using a custom allocator. + /// + /// This is the allocator-aware version of [`from_domain_derive`]. + /// + /// [`from_domain_derive`]: IdVec::from_domain_derive #[inline] pub fn from_domain_derive_in( mut func: impl FnMut(I, &U) -> T, @@ -403,6 +445,9 @@ where self.raw.truncate(index.as_usize()); } + /// Clones and appends all elements in `other` to this vector. + /// + /// See [`Vec::extend_from_slice`] for details. #[inline] pub fn extend_from_slice(&mut self, other: &IdSlice) where @@ -411,11 +456,40 @@ where self.raw.extend_from_slice(other.as_raw()); } + /// Moves all elements from `other` into this vector, leaving `other` empty. + /// + /// See [`Vec::append`] for details. #[inline] pub fn append(&mut self, other: &mut Self) { self.raw.append(&mut other.raw); } + /// Returns an iterator over `(I, T)` pairs, consuming the vector. + /// + /// Each element is paired with its corresponding [`Id`]. + /// + /// # Examples + /// + /// ``` + /// # use hashql_core::id::{IdVec, Id as _, newtype}; + /// # newtype!(struct NodeId(u32 is 0..=100)); + /// let vec: IdVec = IdVec::from_raw(vec!["x", "y", "z"]); + /// let pairs: Vec<_> = vec.into_iter_enumerated().collect(); + /// + /// assert_eq!(pairs[0], (NodeId::new(0), "x")); + /// assert_eq!(pairs[1], (NodeId::new(1), "y")); + /// assert_eq!(pairs[2], (NodeId::new(2), "z")); + /// ``` + /// + /// The iterator can be reversed: + /// + /// ``` + /// # use hashql_core::id::{IdVec, Id as _, newtype}; + /// # newtype!(struct NodeId(u32 is 0..=100)); + /// let vec: IdVec = IdVec::from_raw(vec!["a", "b"]); + /// let last = vec.into_iter_enumerated().next_back().unwrap(); + /// assert_eq!(last, (NodeId::new(1), "b")); + /// ``` pub fn into_iter_enumerated( self, ) -> impl DoubleEndedIterator + ExactSizeIterator { @@ -427,6 +501,20 @@ where .enumerate() .map(|(index, value)| (I::from_usize(index), value)) } + + /// Copies elements from the `src` range to a position starting at `dst` within the vector. + /// + /// See [`slice::copy_within`](prim@slice#method.copy_within) for details. + #[inline] + pub fn copy_within(&mut self, src: impl RangeBounds, dst: I) + where + T: Copy, + { + let start = src.start_bound().copied().map(Id::as_usize); + let end = src.end_bound().copied().map(Id::as_usize); + + self.raw.copy_within((start, end), dst.as_usize()); + } } // Map-like APIs for IdVec> diff --git a/libs/@local/hashql/mir/src/lib.rs b/libs/@local/hashql/mir/src/lib.rs index f3b4006cd96..b09a6979faa 100644 --- a/libs/@local/hashql/mir/src/lib.rs +++ b/libs/@local/hashql/mir/src/lib.rs @@ -18,6 +18,7 @@ assert_matches, binary_heap_drain_sorted, clone_from_ref, + const_convert, const_type_name, get_mut_unchecked, iter_array_chunks, @@ -25,13 +26,13 @@ iter_intersperse, iterator_try_collect, likely_unlikely, + option_into_flat_iter, maybe_uninit_array_assume_init, maybe_uninit_fill, step_trait, string_from_utf8_lossy_owned, - try_trait_v2, temporary_niche_types, - const_convert, + try_trait_v2, variant_count, )] #![expect(clippy::indexing_slicing)] diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs new file mode 100644 index 00000000000..91bfa88be85 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs @@ -0,0 +1,580 @@ +//! Island dependency graph with requirement-based edges and fetch island insertion. +//! +//! Builds a directed graph over [`Island`]s where edges carry the [`EntityPathBitSet`]s +//! that flow between islands. Two edge kinds exist: +//! +//! - **CFG edges**: derived from block-level control flow crossing island boundaries. The successor +//! island consumes data the predecessor island produces. +//! - **Data edges**: an island needs entity paths from a non-adjacent producer. The data is fetched +//! directly from the producer's backend, not routed through intermediaries. +//! +//! When an island requires paths that no dominating predecessor can provide, a +//! [`FetchIsland`] is inserted as a synthetic parallel predecessor dedicated to fetching +//! that data from the origin backend. +//! +//! The output includes a topological schedule with level assignment for parallelism: +//! islands at the same level with no edges between them can execute concurrently. + +use alloc::alloc::Global; +use core::alloc::Allocator; + +use hashql_core::{ + graph::{DirectedGraph, Predecessors, Successors, algorithms::dominators}, + id::{self, Id, IdVec, bit_vec::DenseBitSet}, +}; + +use super::{Island, IslandId, IslandSlice, IslandVec}; +use crate::pass::execution::{ + VertexType, + target::TargetId, + traversal::{EntityPathBitSet, TraversalPath, TraversalPathBitSet}, +}; + +#[cfg(test)] +mod tests; + +id::newtype!( + /// Identifies a node in the [`IslandGraph`], which may be either a real [`Island`] + /// or a synthetic [`FetchIsland`]. + pub struct IslandNodeId(u32 is 0..=0xFFFF_FF00) +); +id::newtype_collections!(pub type IslandNode* from IslandNodeId); + +/// The kind of edge in the island dependency graph. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum IslandEdgeKind { + /// Direct control flow between islands (block-level CFG edge crossing an island boundary). + Cfg, + /// Data dependency where the consumer fetches directly from the producer's backend. + Data, +} + +/// A directed edge in the island dependency graph. +/// +/// Carries the set of entity paths that flow from the source island to the target island, +/// along with the edge kind. +#[derive(Debug, Clone)] +pub struct IslandEdge { + pub source: IslandNodeId, + pub target: IslandNodeId, + pub kind: IslandEdgeKind, + pub paths: EntityPathBitSet, +} + +/// A synthetic island that exists solely to fetch entity data from a specific backend. +/// +/// Inserted when a real island requires entity paths that no dominating predecessor +/// can provide. Groups all unsatisfied paths for a single origin backend into one fetch +/// operation. +#[derive(Debug, Clone)] +pub struct FetchIsland { + pub target: TargetId, + pub paths: EntityPathBitSet, +} + +/// A node in the island dependency graph: either a real computation island or a +/// synthetic fetch island. +#[derive(Debug, Clone)] +pub enum IslandNode { + /// A real island from the placement solver. + Real(IslandId), + /// A synthetic fetch island inserted to satisfy data requirements. + Fetch(FetchIsland), +} + +impl IslandNode { + /// Returns the execution target for this node. + #[must_use] + pub fn target(&self, islands: &IslandSlice) -> TargetId { + match self { + Self::Real(island_id) => islands[*island_id].target(), + Self::Fetch(fetch) => fetch.target, + } + } +} + +/// A scheduled island with its parallelism level. +/// +/// Islands at the same level have no dependencies between them and can execute concurrently. +/// Level 0 contains islands with no predecessors (entry points and independent fetch islands). +#[derive(Debug, Copy, Clone)] +pub struct ScheduledIsland { + pub node: IslandNodeId, + pub level: u32, +} + +/// The island dependency graph. +/// +/// Contains the set of island nodes (real + fetch), directed edges with path requirements, +/// and a topological schedule with level assignment for parallelism. +#[derive(Debug)] +pub struct IslandGraph { + pub nodes: IslandNodeVec, + pub edges: Vec, + pub schedule: Vec, +} + +/// Adapter that provides [`DirectedGraph`], [`Successors`], and [`Predecessors`] over +/// island nodes, enabling dominator computation on the island-level CFG. +struct IslandCfg { + node_count: usize, + successors: IslandNodeVec>, + predecessors: IslandNodeVec>, +} + +impl IslandCfg { + fn new(node_count: usize) -> Self { + Self { + node_count, + successors: IslandNodeVec::from_fn_in(node_count, |_| Vec::new(), Global), + predecessors: IslandNodeVec::from_fn_in(node_count, |_| Vec::new(), Global), + } + } + + fn add_edge(&mut self, source: IslandNodeId, target: IslandNodeId) { + if !self.successors[source].contains(&target) { + self.successors[source].push(target); + self.predecessors[target].push(source); + } + } +} + +impl DirectedGraph for IslandCfg { + type Edge<'this> = (IslandNodeId, IslandNodeId); + type EdgeId = (IslandNodeId, IslandNodeId); + type Node<'this> = (IslandNodeId, &'this [IslandNodeId]); + type NodeId = IslandNodeId; + + fn node_count(&self) -> usize { + self.node_count + } + + fn edge_count(&self) -> usize { + self.successors.iter().map(|succs| succs.len()).sum() + } + + fn iter_nodes(&self) -> impl ExactSizeIterator> + DoubleEndedIterator { + self.successors + .iter_enumerated() + .map(|(id, succs)| (id, succs.as_slice())) + } + + fn iter_edges(&self) -> impl ExactSizeIterator> + DoubleEndedIterator { + // Not needed for dominator computation, provide a dummy implementation. + [].into_iter() + } +} + +impl Successors for IslandCfg { + type SuccIter<'this> = impl Iterator + 'this; + + fn successors(&self, node: Self::NodeId) -> Self::SuccIter<'_> { + self.successors[node].iter().copied() + } +} + +impl Predecessors for IslandCfg { + type PredIter<'this> = impl Iterator + 'this; + + fn predecessors(&self, node: Self::NodeId) -> Self::PredIter<'_> { + self.predecessors[node].iter().copied() + } +} + +/// Maps a block-level CFG into island-level CFG edges. +/// +/// For each block-level CFG edge where the source and target belong to different islands, +/// adds an edge between the corresponding island nodes. +fn build_island_cfg( + body: &crate::body::Body<'_>, + islands: &IslandSlice, + block_to_island: &crate::body::basic_block::BasicBlockSlice, + cfg: &mut IslandCfg, + edges: &mut Vec, + alloc: &A, +) { + use hashql_core::graph::Successors as _; + + for block in body.basic_blocks.ids() { + let source_island = block_to_island[block]; + + for successor in body.basic_blocks.successors(block) { + let target_island = block_to_island[successor]; + + if source_island != target_island { + cfg.add_edge(source_island, target_island); + + // Check if this CFG edge already exists in our edge list. + let existing = edges.iter_mut().find(|edge| { + edge.source == source_island + && edge.target == target_island + && edge.kind == IslandEdgeKind::Cfg + }); + + if existing.is_none() { + edges.push(IslandEdge { + source: source_island, + target: target_island, + kind: IslandEdgeKind::Cfg, + paths: EntityPathBitSet::new_empty(), + }); + } + } + } + } +} + +/// Resolves data requirements for each island using dominance-aware provider search. +/// +/// Walks islands in topological order. For each required path in an island's traversal set, +/// finds the nearest dominating predecessor whose target matches the path's origin backend. +/// If found, the path is registered on that predecessor (growing its fetch set). If not, +/// a [`FetchIsland`] is created. +fn resolve_requirements( + islands: &IslandSlice, + nodes: &mut IslandNodeVec, + cfg: &mut IslandCfg, + edges: &mut Vec, + topo_order: &[IslandNodeId], + vertex: VertexType, +) { + let doms = dominators(&*cfg, IslandNodeId::new(0)); + + // For each island, track which paths are "available" from dominating predecessors, + // grouped by the origin backend that provides them. + // We walk in topological order so predecessors are always processed before successors. + + // Per-node: which paths are available at this node, keyed by origin target. + let mut available: IslandNodeVec<[EntityPathBitSet; TargetId::VARIANT_COUNT]> = + IslandNodeVec::from_fn_in( + nodes.len(), + |_| [EntityPathBitSet::new_empty(); TargetId::VARIANT_COUNT], + Global, + ); + + // For real islands, the island's own target makes all its fetched/produced paths available. + for node_id in topo_order { + let node = &nodes[*node_id]; + + if let IslandNode::Real(island_id) = node { + let island = &islands[*island_id]; + let target = island.target(); + + // Paths this island accesses are available from its target backend going forward. + if let Some(entity_paths) = island.traversals().as_entity() { + let avail = &mut available[*node_id][target.as_usize()]; + for path in entity_paths { + avail.insert(path); + } + } + } + + // Propagate availability to successors: a successor inherits availability from + // all dominating predecessors. + let current_available = available[*node_id]; + for succ in cfg.successors[*node_id].clone() { + if doms.dominates(*node_id, succ) { + for (target_idx, paths) in current_available.iter().enumerate() { + for path in paths { + available[succ][target_idx].insert(path); + } + } + } + } + } + + // Now resolve requirements: for each real island, check which of its required paths + // are NOT available from any dominating predecessor on the correct origin backend. + // Those need FetchIslands. + for &node_id in topo_order { + let node = &nodes[node_id]; + + let island_id = match node { + IslandNode::Real(island_id) => *island_id, + IslandNode::Fetch(_) => continue, + }; + + let island = &islands[island_id]; + let required = island.traversals(); + + let Some(entity_paths) = required.as_entity() else { + continue; + }; + + if entity_paths.is_empty() { + continue; + } + + // Group unsatisfied paths by origin backend. + let mut unsatisfied: [EntityPathBitSet; TargetId::VARIANT_COUNT] = + [EntityPathBitSet::new_empty(); TargetId::VARIANT_COUNT]; + + for path in entity_paths { + let traversal_path = TraversalPath::Entity(path); + let origin = traversal_path.origin(); + + // Check if any origin backend has this path available from a dominating predecessor. + let is_satisfied = origin + .iter() + .any(|origin_target| available[node_id][origin_target.as_usize()].contains(path)); + + if is_satisfied { + // Find the nearest dominating predecessor that provides this path and + // add a data edge if one doesn't already exist. + for origin_target in origin.iter() { + if available[node_id][origin_target.as_usize()].contains(path) { + // Find the nearest predecessor on this target by walking up the + // dominator tree. + if let Some(provider) = find_nearest_provider( + node_id, + origin_target, + &doms, + nodes, + islands, + cfg, + ) { + add_data_edge(edges, provider, node_id, path); + } + break; + } + } + } else { + // No provider: needs a FetchIsland. Group by origin backend. + // Use the first origin target (for EntityPath, there's always exactly one). + if let Some(origin_target) = origin.iter().next() { + unsatisfied[origin_target.as_usize()].insert(path); + } + } + } + + // Create FetchIslands for unsatisfied paths, one per backend. + for target in TargetId::all() { + let paths = &unsatisfied[target.as_usize()]; + if paths.is_empty() { + continue; + } + + let fetch_node_id = IslandNodeId::from_usize(nodes.len()); + + // Extend the CFG adapter. + cfg.successors.push(Vec::new()); + cfg.predecessors.push(Vec::new()); + cfg.node_count += 1; + + // Extend available. + // (We won't re-process this node in the topo walk, but the structure must be + // consistent.) + + nodes.push(IslandNode::Fetch(FetchIsland { + target, + paths: *paths, + })); + + cfg.add_edge(fetch_node_id, node_id); + + edges.push(IslandEdge { + source: fetch_node_id, + target: node_id, + kind: IslandEdgeKind::Data, + paths: *paths, + }); + } + } +} + +/// Finds the nearest dominating predecessor of `node` whose target matches `origin_target`. +/// +/// Walks up the dominator tree from `node`, checking each ancestor. Returns the first +/// (nearest) node that runs on the requested backend. +fn find_nearest_provider( + node: IslandNodeId, + origin_target: TargetId, + doms: &hashql_core::graph::algorithms::Dominators, + nodes: &IslandNodeSlice, + islands: &IslandSlice, + cfg: &IslandCfg, +) -> Option { + let mut current = node; + + loop { + let parent = doms.immediate_dominator(current)?; + + if parent == current { + return None; + } + + let parent_target = nodes[parent].target(islands); + if parent_target == origin_target { + return Some(parent); + } + + current = parent; + } +} + +/// Adds a data edge carrying `path` from `source` to `target`, merging into existing edges. +fn add_data_edge( + edges: &mut Vec, + source: IslandNodeId, + target: IslandNodeId, + path: EntityPath, +) { + use crate::pass::execution::traversal::EntityPath; + + let existing = edges.iter_mut().find(|edge| { + edge.source == source && edge.target == target && edge.kind == IslandEdgeKind::Data + }); + + if let Some(edge) = existing { + edge.paths.insert(path); + } else { + let mut paths = EntityPathBitSet::new_empty(); + paths.insert(path); + edges.push(IslandEdge { + source, + target, + kind: IslandEdgeKind::Data, + paths, + }); + } +} + +/// Computes a topological schedule with level assignment for parallelism. +/// +/// Each node is assigned the lowest level such that all its predecessors are at lower levels. +/// Nodes at the same level have no direct dependencies and can execute concurrently. +fn compute_schedule( + nodes: &IslandNodeSlice, + edges: &[IslandEdge], +) -> Vec { + let node_count = nodes.len(); + + // Compute in-degree for each node. + let mut in_degree = IslandNodeVec::from_fn_in(node_count, |_| 0u32, Global); + let mut successors: IslandNodeVec> = + IslandNodeVec::from_fn_in(node_count, |_| Vec::new(), Global); + + for edge in edges { + // Only count edges within the known node range (FetchIslands added later may + // exceed the initial allocation). + if edge.source.as_usize() < node_count && edge.target.as_usize() < node_count { + in_degree[edge.target] += 1; + if !successors[edge.source].contains(&edge.target) { + successors[edge.source].push(edge.target); + } + } + } + + let mut levels = IslandNodeVec::from_fn_in(node_count, |_| 0u32, Global); + let mut queue: Vec = Vec::new(); + + // Seed the queue with nodes that have no predecessors. + for node_id in (0..node_count).map(IslandNodeId::from_usize) { + if in_degree[node_id] == 0 { + queue.push(node_id); + } + } + + let mut schedule = Vec::with_capacity(node_count); + let mut head = 0; + + while head < queue.len() { + let node_id = queue[head]; + head += 1; + + schedule.push(ScheduledIsland { + node: node_id, + level: levels[node_id], + }); + + for &succ in &successors[node_id] { + levels[succ] = levels[succ].max(levels[node_id] + 1); + in_degree[succ] -= 1; + if in_degree[succ] == 0 { + queue.push(succ); + } + } + } + + schedule +} + +/// Builds the island dependency graph from placement results. +/// +/// Takes the body CFG, the discovered islands, and produces a graph with: +/// - Real island nodes mapped 1:1 from the input islands +/// - FetchIsland nodes for unsatisfied data requirements +/// - CFG and data edges between nodes +/// - A topological schedule with parallelism levels +pub(crate) fn build_island_graph( + body: &crate::body::Body<'_>, + islands: &IslandVec, + vertex: VertexType, +) -> IslandGraph { + use crate::body::basic_block::BasicBlockVec; + + let island_count = islands.len(); + + // Map each basic block to its island's node ID. + let mut block_to_island = + BasicBlockVec::from_domain_in(IslandNodeId::new(0), &body.basic_blocks, Global); + + for island_id in islands.ids() { + let node_id = IslandNodeId::from_usize(island_id.as_usize()); + for block in islands[island_id].iter() { + block_to_island[block] = node_id; + } + } + + // Initialize nodes: one per real island. + let mut nodes: IslandNodeVec = IslandNodeVec::from_fn_in( + island_count, + |id| IslandNode::Real(IslandId::from_usize(id.as_usize())), + Global, + ); + + let mut cfg = IslandCfg::new(island_count); + let mut edges = Vec::new(); + + // Phase 1: Build island-level CFG edges from block-level CFG. + build_island_cfg( + body, + islands, + &block_to_island, + &mut cfg, + &mut edges, + &Global, + ); + + // Phase 2: Compute topological order for the forward walk. + // Use reverse postorder (which is a valid topological order for DAGs). + let topo_order = { + use hashql_core::graph::Traverse as _; + let rpo: Vec = cfg + .depth_first_traversal_post_order([IslandNodeId::new(0)]) + .collect::>() + .into_iter() + .rev() + .collect(); + rpo + }; + + // Phase 3: Resolve data requirements with dominance-aware provider search. + resolve_requirements( + islands, + &mut nodes, + &mut cfg, + &mut edges, + &topo_order, + vertex, + ); + + // Phase 4: Compute the schedule with parallelism levels. + let schedule = compute_schedule(&nodes, &edges); + + IslandGraph { + nodes, + edges, + schedule, + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs new file mode 100644 index 00000000000..1f1ab5e44e7 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs @@ -0,0 +1,257 @@ +//! Tests for island dependency graph construction and fetch island insertion. + +use alloc::alloc::Global; + +use hashql_core::{heap::Heap, symbol::sym, r#type::environment::Environment}; + +use crate::{ + body::basic_block::{BasicBlockId, BasicBlockVec}, + builder::body, + intern::Interner, + pass::execution::{ + VertexType, + island::{ + IslandId, IslandPlacement, + graph::{IslandEdgeKind, IslandNode, IslandNodeId, build_island_graph}, + }, + target::TargetId, + traversal::EntityPath, + }, +}; + +fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { + let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), Global); + for &target in assignments { + targets.push(target); + } + targets +} + +/// Single Postgres island accessing properties — no fetch island needed because the island +/// itself is on the origin backend for that path. +#[test] +fn single_island_no_fetch() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj props = vertex.properties: ?; + + bb0() { + val = load props; + return val; + } + }); + + let targets = make_targets(&[TargetId::Postgres]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let graph = build_island_graph(&body, &islands, VertexType::Entity); + + // One real node, no fetch islands. + assert_eq!(graph.nodes.len(), 1); + assert!( + matches!(graph.nodes[IslandNodeId::new(0)], IslandNode::Real(id) if id == IslandId::new(0)) + ); + assert!(graph.edges.is_empty()); +} + +/// Postgres island followed by Interpreter island that needs properties. +/// Properties originate from Postgres, so the Interpreter island gets a data edge +/// from the Postgres island — no fetch island needed. +#[test] +fn data_edge_from_predecessor() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val1: ?, val2: ?; + @proj props = vertex.properties: ?; + + bb0() { + val1 = load props; + goto bb1(); + }, + bb1() { + val2 = load props; + return val2; + } + }); + + let targets = make_targets(&[TargetId::Postgres, TargetId::Interpreter]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + + assert_eq!(islands.len(), 2); + + let graph = build_island_graph(&body, &islands, VertexType::Entity); + + // Two real nodes, no fetch islands. + assert_eq!(graph.nodes.len(), 2); + + // Should have a CFG edge from Postgres to Interpreter. + let cfg_edges: Vec<_> = graph + .edges + .iter() + .filter(|edge| edge.kind == IslandEdgeKind::Control) + .collect(); + assert_eq!(cfg_edges.len(), 1); + + // Should have a data edge carrying Properties from Postgres to Interpreter. + let data_edges: Vec<_> = graph + .edges + .iter() + .filter(|edge| edge.kind == IslandEdgeKind::Data) + .collect(); + assert_eq!(data_edges.len(), 1); + assert!(data_edges[0].paths.contains(EntityPath::Properties)); +} + +/// Interpreter island needs embedding data but has no Embedding predecessor. +/// A FetchIsland(Embedding) should be inserted. +#[test] +fn fetch_island_for_unsatisfied_requirement() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + return val; + } + }); + + let targets = make_targets(&[TargetId::Interpreter]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let graph = build_island_graph(&body, &islands, VertexType::Entity); + + // One real node plus one FetchIsland(Embedding). + assert_eq!(graph.nodes.len(), 2); + assert!(matches!( + graph.nodes[IslandNodeId::new(0)], + IslandNode::Real(_) + )); + + let fetch_node = &graph.nodes[IslandNodeId::new(1)]; + match fetch_node { + IslandNode::Fetch(fetch) => { + assert_eq!(fetch.target, TargetId::Embedding); + assert!(fetch.paths.contains(EntityPath::Vectors)); + } + IslandNode::Real(_) => panic!("expected FetchIsland, got Real"), + } +} + +/// Diamond CFG: Postgres branches to Interpreter and Embedding, both merge into a +/// final Postgres island. The Embedding path is only available on one branch, so the +/// final Postgres island needs a FetchIsland for embedding data. +#[test] +fn diamond_branch_needs_fetch() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + val: ?, cond: Bool; + @proj props = vertex.properties: ?, + enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + val = load props; + goto bb3(); + }, + bb2() { + val = load vecs; + goto bb3(); + }, + bb3() { + val = load vecs; + return val; + } + }); + + // bb0=Postgres, bb1=Interpreter, bb2=Embedding, bb3=Postgres + let targets = make_targets(&[ + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Embedding, + TargetId::Postgres, + ]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let graph = build_island_graph(&body, &islands, VertexType::Entity); + + // The final Postgres island needs Vectors. The Embedding island (bb2) only runs on + // one branch and doesn't dominate bb3, so a FetchIsland(Embedding) must be inserted. + let fetch_nodes: Vec<_> = graph + .nodes + .iter() + .filter(|node| matches!(node, IslandNode::Fetch(_))) + .collect(); + + assert!( + !fetch_nodes.is_empty(), + "expected at least one FetchIsland for embedding data" + ); + + let has_embedding_fetch = fetch_nodes.iter().any( + |node| matches!(node, IslandNode::Fetch(fetch) if fetch.target == TargetId::Embedding), + ); + assert!(has_embedding_fetch); +} + +/// Schedule levels: independent fetch islands and entry islands should be at level 0. +/// Their consumers should be at level 1 or higher. +#[test] +fn schedule_levels() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + return val; + } + }); + + let targets = make_targets(&[TargetId::Interpreter]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let graph = build_island_graph(&body, &islands, VertexType::Entity); + + // The real island depends on the FetchIsland, so it should be at a higher level. + assert!(graph.schedule.len() >= 2); + + let real_level = graph + .schedule + .iter() + .find(|sched| matches!(graph.nodes[sched.node], IslandNode::Real(_))) + .map(|sched| sched.level); + let fetch_level = graph + .schedule + .iter() + .find(|sched| matches!(graph.nodes[sched.node], IslandNode::Fetch(_))) + .map(|sched| sched.level); + + if let (Some(real_level), Some(fetch_level)) = (real_level, fetch_level) { + assert!( + real_level > fetch_level, + "real island (level {real_level}) should be after fetch island (level {fetch_level})" + ); + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph2.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph2.rs new file mode 100644 index 00000000000..5fe388b03d2 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph2.rs @@ -0,0 +1,371 @@ +use core::ops::{Index, IndexMut}; +use std::alloc::Allocator; + +use hashql_core::{ + debug_panic, + graph::{ + DirectedGraph, EdgeId, LinkedGraph, NodeId, Predecessors, Successors, Traverse, + algorithms::{Dominators, dominators}, + linked::Edge, + }, + heap::CollectIn, + id::{ + HasId, Id, + bit_vec::{BitMatrix, DenseBitSet}, + }, +}; + +use super::{Island, IslandId, IslandVec}; +use crate::{ + body::{ + Body, + basic_block::{BasicBlockId, BasicBlockVec}, + }, + pass::execution::{TargetId, VertexType, target::TargetArray, traversal::TraversalPathBitSet}, +}; + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum IslandEdge { + ControlFlow, + DataFlow, + Inherits, +} + +#[derive(Debug)] +pub struct ExecIsland { + members: DenseBitSet, +} + +#[derive(Debug)] +pub enum IslandKind { + Exec(ExecIsland), + Data, +} + +#[derive(Debug)] +pub struct IslandNode { + kind: IslandKind, + + target: TargetId, + + requires: TraversalPathBitSet, + provides: TraversalPathBitSet, +} + +pub struct IslandGraph { + vertex: VertexType, + + inner: LinkedGraph, + lookup: BasicBlockVec, +} + +impl IslandGraph { + fn build_in( + body: &Body<'_>, + vertex: VertexType, + islands: IslandVec, + scratch: S, + alloc: A, + ) -> Self + where + S: Allocator, + A: Clone, + { + let mut lookup = + BasicBlockVec::from_domain_in(IslandId::MAX, &body.basic_blocks, alloc.clone()); + let mut graph = + LinkedGraph::with_capacity_in(islands.len(), body.basic_blocks.edge_count(), alloc); + let mut matrix = BitMatrix::new_in(islands.len(), islands.len(), scratch); + + for ( + island_id, + Island { + target, + members, + traversals, + }, + ) in islands.into_iter_enumerated() + { + for block_id in &members { + lookup[block_id] = island_id + } + + let node_id = graph.add_node(IslandNode { + kind: IslandKind::Exec(ExecIsland { members }), + + target, + requires: traversals, + provides: TraversalPathBitSet::empty(vertex), + }); + debug_assert_eq!(node_id.as_u32(), island_id.as_u32()); + } + + for block_id in body.basic_blocks.ids() { + let source = lookup[block_id]; + + for successor in body.basic_blocks.successors(block_id) { + let target = lookup[successor]; + + // We *ignore* anything that points locally within the same island, to deduplicate + // work we also do not duplicate any edges + if source == target || matrix.contains(source, target) { + continue; + } + + matrix.insert(source, target); + graph.add_edge( + NodeId::new(source.as_u32()), + NodeId::new(target.as_u32()), + IslandEdge::ControlFlow, + ); + } + } + + Self { + vertex, + inner: graph, + lookup, + } + } + + fn resolve_requirements(&mut self, topo: &[IslandId], scratch: S) + where + S: Allocator + Clone, + { + if topo.is_empty() { + return; + } + + let start = self.lookup[BasicBlockId::START]; + let dominators = dominators(self, start); + + let mut merged_provides = IslandVec::from_elem_in( + TraversalPathBitSet::empty(self.vertex), + self.node_count(), + scratch, + ); + let mut data_providers = TargetArray::from_elem(None); + + for &island_id in topo { + let island = &self.inner[NodeId::new(island_id.as_u32())].data; + let IslandKind::Exec(exec) = &island.kind else { + debug_panic!("data islands should not be available yet"); + + continue; + }; + + // Get the immediate dominator of this island, and copy the what we provide, this allows + // us to minify the amount of data required across multiple islands. + if let Some((parent, _)) = + find_dominator_by_target(&dominators, self, island_id, island.target) + { + merged_provides.copy_within(parent..=parent, island_id); + self.inner.add_edge( + NodeId::from_u32(island_id.as_u32()), + NodeId::from_u32(parent.as_u32()), + IslandEdge::Inherits, + ); + } + + // Now for the data that we require, find the backend that satisfies it, because there + // *may* be cases, in which the data is satisfied from multiple parties we pick the one + // that is "closest" to us in terms of the dominator tree. We save the closest provider + // lazily, and only initialize them where necessary. + let mut providers = TargetArray::from_elem(None); + + for requirement in &island.requires { + let potential_targets = requirement.origin(); + debug_assert!(!potential_targets.is_empty()); + + let mut current_candidate = None; + for target in &potential_targets { + debug_assert_ne!( + target, island.target, + "island should never require its own target" + ); + + // Find if we need to re-compute the dominator for this target and node, + // otherwise we can just use the result of the previous iteration + if providers[target].is_none() { + // We "double-some" to ensure that we don't recompute every-time if there's + // no dominator. + providers[target] = Some(find_dominator_by_target( + &dominators, + self, + island_id, + target, + )); + } + + if let Some((provider, depth)) = providers[target].flatten() { + // There *does* exist a dominator, check if we already have a candidate + if let Some((_, existing_depth)) = current_candidate { + if depth < existing_depth { + current_candidate = Some((provider, depth)); + } + } else { + current_candidate = Some((provider, depth)); + } + } + } + + if let Some((provider, _)) = current_candidate { + if !merged_provides[provider].contains(requirement) { + merged_provides[provider].insert(requirement); + self[provider].provides.insert(requirement); + } + + self.inner.add_edge( + NodeId::from_u32(island_id.as_u32()), + NodeId::from_u32(provider.as_u32()), + IslandEdge::DataFlow, + ); + } else { + // Find the first that fits, the order of TargetId guarantees that the most + // ideal (except for interpreter) is first. We need *some* way to determine + // preference + // TODO: check if we already have a backend provider, then do that + + let first = potential_targets + .first_set() + .unwrap_or_else(|| unreachable!()); + + let provider = if let Some(provider) = data_providers[first] { + provider + } else { + let node = self.inner.add_node(IslandNode { + kind: IslandKind::Data, + target: first, + requires: TraversalPathBitSet::empty(self.vertex), + provides: TraversalPathBitSet::empty(self.vertex), + }); + let node = IslandId::from_u32(node.as_u32()); + data_providers[first] = Some(node); + + node + }; + + if !merged_provides[provider].contains(requirement) { + merged_provides[provider].insert(requirement); + self[provider].provides.insert(requirement); + } + + self.inner.add_edge( + NodeId::from_u32(island_id.as_u32()), + NodeId::from_u32(provider.as_u32()), + IslandEdge::DataFlow, + ); + } + } + } + todo!() + } + + fn resolve(&mut self, scratch: S) + where + S: Allocator + Clone, + { + // RPO is a valid topological ordering of the islands, where each island is visited after + // all of its predecessors. + let topo: Vec<_, _> = self + .inner + .depth_first_forest_post_order() + .map(|node| IslandId::new(node.as_u32())) + .collect_in(scratch.clone()); + } +} + +impl DirectedGraph for IslandGraph { + type Edge<'this> + = &'this Edge + where + Self: 'this; + type EdgeId = EdgeId; + type Node<'this> + = (IslandId, &'this IslandNode) + where + Self: 'this; + type NodeId = IslandId; + + fn node_count(&self) -> usize { + self.inner.node_count() + } + + fn edge_count(&self) -> usize { + self.inner.edge_count() + } + + fn iter_nodes(&self) -> impl ExactSizeIterator> + DoubleEndedIterator { + self.inner + .iter_nodes() + .map(|node| (IslandId::from_u32(node.id().as_u32()), &node.data)) + } + + fn iter_edges(&self) -> impl ExactSizeIterator> + DoubleEndedIterator { + self.inner.iter_edges() + } +} + +impl Successors for IslandGraph { + type SuccIter<'this> + = impl Iterator + 'this + where + Self: 'this; + + fn successors(&self, node: Self::NodeId) -> Self::SuccIter<'_> { + self.inner + .successors(NodeId::new(node.as_u32())) + .map(|node| IslandId::new(node.as_u32())) + } +} + +impl Predecessors for IslandGraph { + type PredIter<'this> + = impl Iterator + 'this + where + Self: 'this; + + fn predecessors(&self, node: Self::NodeId) -> Self::PredIter<'_> { + self.inner + .predecessors(NodeId::new(node.as_u32())) + .map(|node| IslandId::new(node.as_u32())) + } +} + +impl IndexMut for IslandGraph { + fn index_mut(&mut self, index: IslandId) -> &mut Self::Output { + &mut self.inner[NodeId::new(index.as_u32())].data + } +} + +impl Index for IslandGraph { + type Output = IslandNode; + + fn index(&self, index: IslandId) -> &Self::Output { + &self.inner[NodeId::new(index.as_u32())].data + } +} + +fn find_dominator_by_target( + dominators: &Dominators, + graph: &IslandGraph, + node: IslandId, + requirement: TargetId, +) -> Option<(IslandId, usize)> { + let mut current = node; + let mut depth = 0; + + loop { + let parent = dominators.immediate_dominator(current)?; + if parent == node { + return None; // is that even possible? + } + + if graph[parent].target == requirement { + return Some((parent, depth)); + } + + current = parent; + depth += 1; + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs index be767150b88..64c6bf4b868 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs @@ -29,6 +29,8 @@ use crate::{ visit::Visitor as _, }; +// pub(crate) mod graph; +mod graph2; #[cfg(test)] mod tests; diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index f1920d860c8..5e667b8ff92 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -36,7 +36,13 @@ use self::{ }; pub use self::{ cost::{ApproxCost, Cost}, - island::{Island, IslandId, IslandVec}, + island::{ + Island, IslandId, IslandVec, + graph::{ + FetchIsland, IslandEdge, IslandEdgeKind, IslandGraph, IslandNode, IslandNodeId, + ScheduledIsland, + }, + }, placement::error::PlacementDiagnosticCategory, target::TargetId, vertex::VertexType, diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index f184abf0f0b..2d55da0271c 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -464,6 +464,10 @@ impl EntityPathBitSet { } } + pub(crate) fn contains(&self, path: EntityPath) -> bool { + self.0.contains(path) + } + fn normalize(&mut self) { for path in &self.0 { for &ancestor in path.ancestors() { diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index 809f1b57d53..daf97acb051 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -124,6 +124,12 @@ impl TraversalPathBitSet { } } + pub fn contains(&self, path: TraversalPath) -> bool { + match (self, path) { + (Self::Entity(bitset), TraversalPath::Entity(path)) => bitset.contains(path), + } + } + /// Inserts all possible paths into the set. #[inline] pub const fn insert_all(&mut self) { From 92a81ac002c1c68c80e40d887f8252c7f5f173a5 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 3 Mar 2026 19:36:44 +0100 Subject: [PATCH 2/9] feat: checkpoint --- .../mir/src/pass/execution/island/graph2.rs | 344 ++++++++++-------- .../src/pass/execution/traversal/access.rs | 6 + .../pass/execution/traversal/analysis/mod.rs | 9 + .../src/pass/execution/traversal/entity.rs | 28 ++ .../mir/src/pass/execution/traversal/mod.rs | 4 + 5 files changed, 242 insertions(+), 149 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph2.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph2.rs index 5fe388b03d2..65dae2d7e31 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph2.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph2.rs @@ -21,13 +21,21 @@ use crate::{ Body, basic_block::{BasicBlockId, BasicBlockVec}, }, - pass::execution::{TargetId, VertexType, target::TargetArray, traversal::TraversalPathBitSet}, + pass::execution::{ + TargetId, VertexType, + target::{TargetArray, TargetBitSet}, + traversal::{TraversalPath, TraversalPathBitSet}, + }, }; #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum IslandEdge { + /// Direct control flow between islands (block-level CFG edge crossing an island boundary). ControlFlow, + /// Data dependency: the consumer fetches directly from the producer's backend. DataFlow, + /// Same-target inheritance: the child island inherits provided paths from a dominating + /// ancestor on the same backend. Inherits, } @@ -38,23 +46,48 @@ pub struct ExecIsland { #[derive(Debug)] pub enum IslandKind { + /// A real island from the placement solver. Exec(ExecIsland), + /// A synthetic island dedicated to fetching data from a specific backend. Data, } #[derive(Debug)] pub struct IslandNode { kind: IslandKind, - target: TargetId, - requires: TraversalPathBitSet, provides: TraversalPathBitSet, } +impl IslandNode { + #[inline] + #[must_use] + pub const fn kind(&self) -> &IslandKind { + &self.kind + } + + #[inline] + #[must_use] + pub const fn target(&self) -> TargetId { + self.target + } + + #[inline] + #[must_use] + pub const fn requires(&self) -> TraversalPathBitSet { + self.requires + } + + #[inline] + #[must_use] + pub const fn provides(&self) -> TraversalPathBitSet { + self.provides + } +} + pub struct IslandGraph { vertex: VertexType, - inner: LinkedGraph, lookup: BasicBlockVec, } @@ -87,12 +120,11 @@ impl IslandGraph { ) in islands.into_iter_enumerated() { for block_id in &members { - lookup[block_id] = island_id + lookup[block_id] = island_id; } let node_id = graph.add_node(IslandNode { kind: IslandKind::Exec(ExecIsland { members }), - target, requires: traversals, provides: TraversalPathBitSet::empty(vertex), @@ -106,8 +138,6 @@ impl IslandGraph { for successor in body.basic_blocks.successors(block_id) { let target = lookup[successor]; - // We *ignore* anything that points locally within the same island, to deduplicate - // work we also do not duplicate any edges if source == target || matrix.contains(source, target) { continue; } @@ -128,150 +158,22 @@ impl IslandGraph { } } - fn resolve_requirements(&mut self, topo: &[IslandId], scratch: S) + /// Resolves all island requirements and inserts data islands where needed. + pub(crate) fn resolve(&mut self, scratch: S) where S: Allocator + Clone, { - if topo.is_empty() { - return; - } - - let start = self.lookup[BasicBlockId::START]; - let dominators = dominators(self, start); - - let mut merged_provides = IslandVec::from_elem_in( - TraversalPathBitSet::empty(self.vertex), - self.node_count(), - scratch, - ); - let mut data_providers = TargetArray::from_elem(None); - - for &island_id in topo { - let island = &self.inner[NodeId::new(island_id.as_u32())].data; - let IslandKind::Exec(exec) = &island.kind else { - debug_panic!("data islands should not be available yet"); - - continue; - }; - - // Get the immediate dominator of this island, and copy the what we provide, this allows - // us to minify the amount of data required across multiple islands. - if let Some((parent, _)) = - find_dominator_by_target(&dominators, self, island_id, island.target) - { - merged_provides.copy_within(parent..=parent, island_id); - self.inner.add_edge( - NodeId::from_u32(island_id.as_u32()), - NodeId::from_u32(parent.as_u32()), - IslandEdge::Inherits, - ); - } - - // Now for the data that we require, find the backend that satisfies it, because there - // *may* be cases, in which the data is satisfied from multiple parties we pick the one - // that is "closest" to us in terms of the dominator tree. We save the closest provider - // lazily, and only initialize them where necessary. - let mut providers = TargetArray::from_elem(None); - - for requirement in &island.requires { - let potential_targets = requirement.origin(); - debug_assert!(!potential_targets.is_empty()); - - let mut current_candidate = None; - for target in &potential_targets { - debug_assert_ne!( - target, island.target, - "island should never require its own target" - ); - - // Find if we need to re-compute the dominator for this target and node, - // otherwise we can just use the result of the previous iteration - if providers[target].is_none() { - // We "double-some" to ensure that we don't recompute every-time if there's - // no dominator. - providers[target] = Some(find_dominator_by_target( - &dominators, - self, - island_id, - target, - )); - } - - if let Some((provider, depth)) = providers[target].flatten() { - // There *does* exist a dominator, check if we already have a candidate - if let Some((_, existing_depth)) = current_candidate { - if depth < existing_depth { - current_candidate = Some((provider, depth)); - } - } else { - current_candidate = Some((provider, depth)); - } - } - } - - if let Some((provider, _)) = current_candidate { - if !merged_provides[provider].contains(requirement) { - merged_provides[provider].insert(requirement); - self[provider].provides.insert(requirement); - } - - self.inner.add_edge( - NodeId::from_u32(island_id.as_u32()), - NodeId::from_u32(provider.as_u32()), - IslandEdge::DataFlow, - ); - } else { - // Find the first that fits, the order of TargetId guarantees that the most - // ideal (except for interpreter) is first. We need *some* way to determine - // preference - // TODO: check if we already have a backend provider, then do that - - let first = potential_targets - .first_set() - .unwrap_or_else(|| unreachable!()); - - let provider = if let Some(provider) = data_providers[first] { - provider - } else { - let node = self.inner.add_node(IslandNode { - kind: IslandKind::Data, - target: first, - requires: TraversalPathBitSet::empty(self.vertex), - provides: TraversalPathBitSet::empty(self.vertex), - }); - let node = IslandId::from_u32(node.as_u32()); - data_providers[first] = Some(node); - - node - }; - - if !merged_provides[provider].contains(requirement) { - merged_provides[provider].insert(requirement); - self[provider].provides.insert(requirement); - } - - self.inner.add_edge( - NodeId::from_u32(island_id.as_u32()), - NodeId::from_u32(provider.as_u32()), - IslandEdge::DataFlow, - ); - } - } - } - todo!() - } - - fn resolve(&mut self, scratch: S) - where - S: Allocator + Clone, - { - // RPO is a valid topological ordering of the islands, where each island is visited after - // all of its predecessors. - let topo: Vec<_, _> = self + let mut topo: Vec = self .inner .depth_first_forest_post_order() .map(|node| IslandId::new(node.as_u32())) .collect_in(scratch.clone()); + topo.reverse(); + + // Postorder collected into a vec; iterate in reverse for topological order. + let start = self.lookup[BasicBlockId::START]; + + RequirementResolver::new(self, start, scratch).resolve(&topo); } } @@ -346,22 +248,25 @@ impl Index for IslandGraph { } } +/// Walks up the dominator tree from `node` to find the nearest ancestor whose target matches. +/// +/// Returns the ancestor and its depth in the dominator tree (0 = immediate dominator). fn find_dominator_by_target( dominators: &Dominators, graph: &IslandGraph, node: IslandId, - requirement: TargetId, + target: TargetId, ) -> Option<(IslandId, usize)> { let mut current = node; let mut depth = 0; loop { let parent = dominators.immediate_dominator(current)?; - if parent == node { - return None; // is that even possible? + if parent == current { + return None; } - if graph[parent].target == requirement { + if graph[parent].target == target { return Some((parent, depth)); } @@ -369,3 +274,144 @@ fn find_dominator_by_target( depth += 1; } } + +/// Resolves data requirements for all islands, inserting data islands where needed. +/// +/// Walks islands in reverse postorder (topological order). For each required path, finds +/// the nearest dominating predecessor on the matching backend. If none exists, creates a +/// synthetic data island. Carries all shared state so individual methods stay clean. +struct RequirementResolver<'graph, A: Allocator, S: Allocator> { + graph: &'graph mut IslandGraph, + dominators: Dominators, + merged_provides: IslandVec, + data_providers: TargetArray>, +} + +impl<'graph, A: Allocator, S: Allocator + Clone> RequirementResolver<'graph, A, S> { + fn new(graph: &'graph mut IslandGraph, start: IslandId, scratch: S) -> Self { + let dominators = dominators(&*graph, start); + let merged_provides = IslandVec::from_elem_in( + TraversalPathBitSet::empty(graph.vertex), + graph.node_count(), + scratch, + ); + + Self { + graph, + dominators, + merged_provides, + data_providers: TargetArray::from_elem(None), + } + } + + fn resolve(mut self, topo: &[IslandId]) { + // Iterate in reverse for topological order + for &island_id in topo { + let island = &self.graph[island_id]; + let IslandKind::Exec(_) = &island.kind else { + debug_panic!("data islands should not be present during requirement resolution"); + continue; + }; + + self.inherit_provides(island_id); + self.resolve_island(island_id); + } + } + + /// If a same-target dominator exists, inherits its provided paths via an `Inherits` edge. + fn inherit_provides(&mut self, island_id: IslandId) { + let island_target = self.graph[island_id].target; + + if let Some((parent, _)) = + find_dominator_by_target(&self.dominators, self.graph, island_id, island_target) + { + self.merged_provides.copy_within(parent..=parent, island_id); + self.graph.inner.add_edge( + NodeId::from_u32(island_id.as_u32()), + NodeId::from_u32(parent.as_u32()), + IslandEdge::Inherits, + ); + } + } + + /// Resolves requirements for a single island. + fn resolve_island(&mut self, island_id: IslandId) { + let requires = self.graph[island_id].requires; + if requires.is_empty() { + return; + } + + // Cache dominator lookups per target to avoid repeated walks. + let mut cached = TargetArray::from_elem(None); + + for requirement in &requires { + let origin = requirement.origin(); + debug_assert!(!origin.is_empty()); + + let provider = self.find_best_provider(&mut cached, island_id, &origin); + let provider = provider.unwrap_or_else(|| self.get_or_create_data_island(&origin)); + + self.register_path(provider, island_id, requirement); + } + } + + /// Finds the nearest dominating provider among the potential origin targets. + fn find_best_provider( + &self, + cached: &mut TargetArray>>, + island_id: IslandId, + origin: &TargetBitSet, + ) -> Option { + origin + .iter() + .filter_map(|target| { + *cached[target].get_or_insert_with(|| { + find_dominator_by_target(&self.dominators, self.graph, island_id, target) + }) + }) + .min_by_key(|&(_, depth)| depth) + .map(|(provider, _)| provider) + } + + /// Registers a path as provided by `provider` for consumption by `consumer`. + fn register_path(&mut self, provider: IslandId, consumer: IslandId, path: TraversalPath) { + if !self.merged_provides[provider].contains(path) { + self.merged_provides[provider].insert(path); + self.graph[provider].provides.insert(path); + } + + self.graph.inner.add_edge( + NodeId::from_u32(consumer.as_u32()), + NodeId::from_u32(provider.as_u32()), + IslandEdge::DataFlow, + ); + } + + /// Returns an existing data island for the given origin backend, or creates one. + fn get_or_create_data_island(&mut self, origin: &TargetBitSet) -> IslandId { + // Check if *any* of the providers already have an initialised provider, if that's the case + // we create our own. + if let Some(provider) = origin.iter().find_map(|target| self.data_providers[target]) { + return provider; + } + + // `TargetId` is ordered by backend priority, so the first set bit gives us the best target + // (note that interpreter is technically first, but never a target for data). + let target = origin.first_set().unwrap_or_else(|| unreachable!()); + + if let Some(provider) = self.data_providers[target] { + return provider; + } + + let node = self.graph.inner.add_node(IslandNode { + kind: IslandKind::Data, + target, + requires: TraversalPathBitSet::empty(self.graph.vertex), + provides: TraversalPathBitSet::empty(self.graph.vertex), + }); + let provider = IslandId::from_u32(node.as_u32()); + self.data_providers[target] = Some(provider); + + provider + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/access.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/access.rs index 581408bf766..5c703ef7aa5 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/access.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/access.rs @@ -1,11 +1,17 @@ +/// How a path maps to its backend storage. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub(crate) enum AccessMode { + /// The path corresponds to a single column or embedding slot. Direct, + /// The path is a composite whose children are the actual storage locations. Composite, } +/// Backend and access mode for a resolved entity field path. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub(crate) enum Access { + /// Served by the Postgres graph store. Postgres(AccessMode), + /// Served by the embedding backend. Embedding(AccessMode), } diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs index fbfb0a1b0b6..d84d03d0994 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs @@ -12,12 +12,19 @@ use crate::{ visit::{self, Visitor}, }; +/// Outcome of resolving a vertex access to a storage path. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub(crate) enum TraversalResult { + /// The access resolved to a specific storage location. Path(TraversalPath), + /// The access could not be resolved; full entity hydration is required. Complete, } +/// MIR visitor that resolves vertex field accesses to [`TraversalResult`]s. +/// +/// Walks a body's places, finds uses of [`Local::VERTEX`], resolves the projection chain +/// via [`EntityPath::resolve`], and calls `on_traversal` with the [`Location`] and result. // TODO: Each consumer (statement placement per target, island placement) resolves traversal paths // independently. Consider caching resolved paths per body to avoid redundant work. // See: https://linear.app/hash/issue/BE-435 @@ -27,6 +34,8 @@ pub(crate) struct TraversalAnalysisVisitor { } impl TraversalAnalysisVisitor { + /// Creates a visitor for the given vertex type, calling `on_traversal` for each resolved + /// vertex access. pub(crate) const fn new(vertex: VertexType, on_traversal: F) -> Self where F: FnMut(Location, TraversalResult), diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index 2d55da0271c..3ba9e974a76 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -23,6 +23,7 @@ use crate::{ }, }; +/// Shorthand for accessing a compile-time [`ConstantSymbol`] from the [`sym`] module. macro_rules! sym { ($($sym:tt)::*) => { sym::$($sym)::*::CONST @@ -172,6 +173,9 @@ impl TransferCostConfig { } } +/// Integer type backing the [`FiniteBitSet`] for [`EntityPath`]. +/// +/// Must have at least as many bits as there are [`EntityPath`] variants (asserted below). type FiniteBitSetWidth = u32; const _: () = { assert!( @@ -181,6 +185,10 @@ const _: () = { }; impl EntityPath { + /// Resolves a sequence of field projections to an [`EntityPath`]. + /// + /// Returns the resolved path together with the number of projections consumed, or `None` + /// if the projections do not map to any known storage location. #[must_use] pub fn resolve(projections: &[Projection<'_>]) -> Option<(Self, usize)> { resolve(projections) @@ -370,6 +378,7 @@ impl EntityPath { } } + /// Returns `true` if this path targets a JSONB column that allows arbitrary sub-paths. const fn is_jsonb(self) -> bool { matches!( self, @@ -383,6 +392,10 @@ impl EntityPath { } } +/// Paths that have at least one ancestor composite, collected at compile time. +/// +/// Used to compute [`EntityPathBitSet::TOP`] by removing children that are subsumed by +/// their ancestor composites. const HAS_ANCESTORS: [EntityPath; HAS_ANCESTOR_COUNT] = { let mut out = [EntityPath::Archived; HAS_ANCESTOR_COUNT]; @@ -401,6 +414,7 @@ const HAS_ANCESTORS: [EntityPath; HAS_ANCESTOR_COUNT] = { out }; +/// Number of [`EntityPath`] variants that have at least one ancestor composite. const HAS_ANCESTOR_COUNT: usize = { let mut count = 0; let mut index = 0; @@ -417,6 +431,11 @@ const HAS_ANCESTOR_COUNT: usize = { count }; +/// Bitset of [`EntityPath`] values with composite swallowing. +/// +/// Insertions respect the composite hierarchy: inserting a composite removes its children, +/// and inserting a child when its ancestor is already present is a no-op. The lattice top +/// contains exactly the root-level and childless paths (18 of 25 variants). #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct EntityPathBitSet(FiniteBitSet); @@ -437,6 +456,7 @@ impl EntityPathBitSet { Self(set) }; + /// Creates an empty bitset with no paths set. #[expect(clippy::cast_possible_truncation)] #[must_use] pub const fn new_empty() -> Self { @@ -464,10 +484,14 @@ impl EntityPathBitSet { } } + /// Returns `true` if `path` is present in the bitset. pub(crate) fn contains(&self, path: EntityPath) -> bool { self.0.contains(path) } + /// Re-applies composite swallowing after a raw union. + /// + /// Removes any path whose ancestor composite is also present in the set. fn normalize(&mut self) { for path in &self.0 { for &ancestor in path.ancestors() { @@ -544,6 +568,10 @@ impl const core::ops::Deref for EntityPathBitSet { } } +/// Extracts the field name from the projection at `*index`, advancing the index on success. +/// +/// Returns `None` if the projection is not a [`FieldByName`](ProjectionKind::FieldByName) +/// or if `*index` is out of bounds. #[inline] fn project(projections: &[Projection<'_>], index: &mut usize) -> Option { let projection = projections.get(*index).and_then(|projection| { diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index daf97acb051..64c59a1b072 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -38,11 +38,13 @@ pub struct TraversalLattice { } impl TraversalLattice { + /// Creates a lattice for the given vertex type. #[must_use] pub const fn new(vertex: VertexType) -> Self { Self { vertex } } + /// Returns the vertex type this lattice operates over. #[must_use] pub const fn vertex(self) -> VertexType { self.vertex @@ -124,6 +126,7 @@ impl TraversalPathBitSet { } } + /// Returns `true` if `path` is present in the bitset. pub fn contains(&self, path: TraversalPath) -> bool { match (self, path) { (Self::Entity(bitset), TraversalPath::Entity(path)) => bitset.contains(path), @@ -138,6 +141,7 @@ impl TraversalPathBitSet { } } + /// Iterates over the paths in this bitset. #[must_use] #[inline] pub fn iter(&self) -> impl ExactSizeIterator { From 0b4c867a258626a5dece2ba5539b5d74da32f2e8 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 3 Mar 2026 20:01:15 +0100 Subject: [PATCH 3/9] feat: checkpoint --- .../src/pass/execution/island/graph/mod.rs | 874 ++++++++---------- .../src/pass/execution/island/graph/tests.rs | 239 ++--- .../mir/src/pass/execution/island/graph2.rs | 417 --------- .../mir/src/pass/execution/island/mod.rs | 4 +- .../src/pass/execution/island/schedule/mod.rs | 110 +++ .../pass/execution/island/schedule/tests.rs | 104 +++ .../hashql/mir/src/pass/execution/mod.rs | 6 +- .../src/pass/execution/traversal/entity.rs | 3 +- .../mir/src/pass/execution/traversal/mod.rs | 3 +- 9 files changed, 747 insertions(+), 1013 deletions(-) delete mode 100644 libs/@local/hashql/mir/src/pass/execution/island/graph2.rs create mode 100644 libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs create mode 100644 libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs index 91bfa88be85..29b1e312ae2 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs @@ -1,580 +1,508 @@ -//! Island dependency graph with requirement-based edges and fetch island insertion. +//! Island dependency graph with data requirement resolution. //! -//! Builds a directed graph over [`Island`]s where edges carry the [`EntityPathBitSet`]s -//! that flow between islands. Two edge kinds exist: +//! After [`IslandPlacement`] groups basic blocks into [`Island`]s by target, this module +//! builds a directed graph over those islands, resolves which traversal paths each island +//! needs, and inserts synthetic data islands for paths that cannot be satisfied by an +//! upstream provider. //! -//! - **CFG edges**: derived from block-level control flow crossing island boundaries. The successor -//! island consumes data the predecessor island produces. -//! - **Data edges**: an island needs entity paths from a non-adjacent producer. The data is fetched -//! directly from the producer's backend, not routed through intermediaries. +//! Three edge kinds connect islands: //! -//! When an island requires paths that no dominating predecessor can provide, a -//! [`FetchIsland`] is inserted as a synthetic parallel predecessor dedicated to fetching -//! that data from the origin backend. +//! - [`ControlFlow`]: the source island must complete before the target island begins. +//! - [`DataFlow`]: the target island consumes data produced by the source island. +//! - [`Inherits`]: the target island inherits provided paths from the source island. //! -//! The output includes a topological schedule with level assignment for parallelism: -//! islands at the same level with no edges between them can execute concurrently. +//! [`IslandPlacement`]: super::IslandPlacement +//! [`Island`]: super::Island +//! [`ControlFlow`]: IslandEdge::ControlFlow +//! [`DataFlow`]: IslandEdge::DataFlow +//! [`Inherits`]: IslandEdge::Inherits + +#[cfg(test)] +pub(crate) mod tests; use alloc::alloc::Global; -use core::alloc::Allocator; +use core::{ + alloc::Allocator, + ops::{Index, IndexMut}, +}; use hashql_core::{ - graph::{DirectedGraph, Predecessors, Successors, algorithms::dominators}, - id::{self, Id, IdVec, bit_vec::DenseBitSet}, + debug_panic, + graph::{ + DirectedGraph, EdgeId, LinkedGraph, NodeId, Predecessors, Successors, Traverse as _, + algorithms::{Dominators, dominators}, + linked::Edge, + }, + heap::CollectIn as _, + id::{ + HasId as _, Id as _, + bit_vec::{BitMatrix, DenseBitSet}, + }, }; -use super::{Island, IslandId, IslandSlice, IslandVec}; -use crate::pass::execution::{ - VertexType, - target::TargetId, - traversal::{EntityPathBitSet, TraversalPath, TraversalPathBitSet}, +use super::{Island, IslandId, IslandVec}; +use crate::{ + body::{ + Body, + basic_block::{BasicBlockId, BasicBlockVec}, + }, + pass::execution::{ + TargetId, VertexType, + target::{TargetArray, TargetBitSet}, + traversal::{TraversalPath, TraversalPathBitSet}, + }, }; -#[cfg(test)] -mod tests; - -id::newtype!( - /// Identifies a node in the [`IslandGraph`], which may be either a real [`Island`] - /// or a synthetic [`FetchIsland`]. - pub struct IslandNodeId(u32 is 0..=0xFFFF_FF00) -); -id::newtype_collections!(pub type IslandNode* from IslandNodeId); - -/// The kind of edge in the island dependency graph. +/// The kind of dependency between two islands. #[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum IslandEdgeKind { - /// Direct control flow between islands (block-level CFG edge crossing an island boundary). - Cfg, - /// Data dependency where the consumer fetches directly from the producer's backend. - Data, +pub enum IslandEdge { + /// The source island must complete before the target island begins. + ControlFlow, + /// The target island consumes data produced by the source island. + DataFlow, + /// The target island inherits provided paths from the source island. + Inherits, } -/// A directed edge in the island dependency graph. -/// -/// Carries the set of entity paths that flow from the source island to the target island, -/// along with the edge kind. -#[derive(Debug, Clone)] -pub struct IslandEdge { - pub source: IslandNodeId, - pub target: IslandNodeId, - pub kind: IslandEdgeKind, - pub paths: EntityPathBitSet, +/// A computation island backed by a set of basic blocks from the placement solver. +#[derive(Debug)] +pub struct ExecIsland { + members: DenseBitSet, } -/// A synthetic island that exists solely to fetch entity data from a specific backend. -/// -/// Inserted when a real island requires entity paths that no dominating predecessor -/// can provide. Groups all unsatisfied paths for a single origin backend into one fetch -/// operation. -#[derive(Debug, Clone)] -pub struct FetchIsland { - pub target: TargetId, - pub paths: EntityPathBitSet, +impl ExecIsland { + /// Returns `true` if `block` belongs to this island. + #[inline] + #[must_use] + pub fn contains(&self, block: BasicBlockId) -> bool { + self.members.contains(block) + } + + /// Iterates over the [`BasicBlockId`]s in this island in ascending order. + #[inline] + pub fn iter(&self) -> impl Iterator + '_ { + self.members.iter() + } +} + +/// Whether an island node represents computation or a data fetch. +#[derive(Debug)] +pub enum IslandKind { + /// An island containing basic blocks that execute on its assigned target. + Exec(ExecIsland), + /// A synthetic island that fetches data from its assigned target. + Data, } -/// A node in the island dependency graph: either a real computation island or a -/// synthetic fetch island. -#[derive(Debug, Clone)] -pub enum IslandNode { - /// A real island from the placement solver. - Real(IslandId), - /// A synthetic fetch island inserted to satisfy data requirements. - Fetch(FetchIsland), +/// A node in the island dependency graph. +/// +/// Each node tracks which traversal paths it requires and which it provides. +#[derive(Debug)] +pub struct IslandNode { + kind: IslandKind, + target: TargetId, + requires: TraversalPathBitSet, + provides: TraversalPathBitSet, } impl IslandNode { - /// Returns the execution target for this node. + /// Returns the kind of this island node. + #[inline] #[must_use] - pub fn target(&self, islands: &IslandSlice) -> TargetId { - match self { - Self::Real(island_id) => islands[*island_id].target(), - Self::Fetch(fetch) => fetch.target, - } + pub const fn kind(&self) -> &IslandKind { + &self.kind } -} -/// A scheduled island with its parallelism level. -/// -/// Islands at the same level have no dependencies between them and can execute concurrently. -/// Level 0 contains islands with no predecessors (entry points and independent fetch islands). -#[derive(Debug, Copy, Clone)] -pub struct ScheduledIsland { - pub node: IslandNodeId, - pub level: u32, + /// Returns the execution target this island runs on. + #[inline] + #[must_use] + pub const fn target(&self) -> TargetId { + self.target + } + + /// Returns the set of traversal paths this island requires. + #[inline] + #[must_use] + pub const fn requires(&self) -> TraversalPathBitSet { + self.requires + } + + /// Returns the set of traversal paths this island provides. + #[inline] + #[must_use] + pub const fn provides(&self) -> TraversalPathBitSet { + self.provides + } } -/// The island dependency graph. +/// Directed graph over [`IslandNode`]s connected by [`IslandEdge`]s. /// -/// Contains the set of island nodes (real + fetch), directed edges with path requirements, -/// and a topological schedule with level assignment for parallelism. -#[derive(Debug)] -pub struct IslandGraph { - pub nodes: IslandNodeVec, - pub edges: Vec, - pub schedule: Vec, +/// Supports indexing by [`IslandId`] and implements [`DirectedGraph`], [`Successors`], +/// and [`Predecessors`]. +pub struct IslandGraph { + vertex: VertexType, + inner: LinkedGraph, + lookup: BasicBlockVec, } -/// Adapter that provides [`DirectedGraph`], [`Successors`], and [`Predecessors`] over -/// island nodes, enabling dominator computation on the island-level CFG. -struct IslandCfg { - node_count: usize, - successors: IslandNodeVec>, - predecessors: IslandNodeVec>, +impl IslandGraph { + pub fn new( + body: &Body<'_>, + vertex: VertexType, + islands: IslandVec, + ) -> Self { + Self::new_in(body, vertex, islands, Global, Global) + } } -impl IslandCfg { - fn new(node_count: usize) -> Self { +impl IslandGraph { + pub fn new_in( + body: &Body<'_>, + vertex: VertexType, + islands: IslandVec, + scratch: S, + alloc: A, + ) -> Self + where + S: Allocator + Clone, + A: Clone, + { + let mut this = Self::build_in(body, vertex, islands, scratch.clone(), alloc); + this.resolve(scratch); + + this + } + + fn build_in( + body: &Body<'_>, + vertex: VertexType, + islands: IslandVec, + scratch: S, + alloc: A, + ) -> Self + where + S: Allocator, + A: Clone, + { + let mut lookup = + BasicBlockVec::from_domain_in(IslandId::MAX, &body.basic_blocks, alloc.clone()); + let mut graph = + LinkedGraph::with_capacity_in(islands.len(), body.basic_blocks.edge_count(), alloc); + let mut matrix = BitMatrix::new_in(islands.len(), islands.len(), scratch); + + for ( + island_id, + Island { + target, + members, + traversals, + }, + ) in islands.into_iter_enumerated() + { + for block_id in &members { + lookup[block_id] = island_id; + } + + let node_id = graph.add_node(IslandNode { + kind: IslandKind::Exec(ExecIsland { members }), + target, + requires: traversals, + provides: TraversalPathBitSet::empty(vertex), + }); + debug_assert_eq!(node_id.as_u32(), island_id.as_u32()); + } + + for block_id in body.basic_blocks.ids() { + let source = lookup[block_id]; + + for successor in body.basic_blocks.successors(block_id) { + let target = lookup[successor]; + + if source == target || matrix.contains(source, target) { + continue; + } + + matrix.insert(source, target); + graph.add_edge( + NodeId::new(source.as_u32()), + NodeId::new(target.as_u32()), + IslandEdge::ControlFlow, + ); + } + } + Self { - node_count, - successors: IslandNodeVec::from_fn_in(node_count, |_| Vec::new(), Global), - predecessors: IslandNodeVec::from_fn_in(node_count, |_| Vec::new(), Global), + vertex, + inner: graph, + lookup, } } - fn add_edge(&mut self, source: IslandNodeId, target: IslandNodeId) { - if !self.successors[source].contains(&target) { - self.successors[source].push(target); - self.predecessors[target].push(source); - } + /// Resolves all island requirements and inserts data islands where needed. + pub(crate) fn resolve(&mut self, scratch: S) + where + S: Allocator + Clone, + { + let mut topo: Vec = self + .inner + .depth_first_forest_post_order() + .map(|node| IslandId::new(node.as_u32())) + .collect_in(scratch.clone()); + topo.reverse(); + + let start = self.lookup[BasicBlockId::START]; + + RequirementResolver::new(self, start, scratch).resolve(&topo); } } -impl DirectedGraph for IslandCfg { - type Edge<'this> = (IslandNodeId, IslandNodeId); - type EdgeId = (IslandNodeId, IslandNodeId); - type Node<'this> = (IslandNodeId, &'this [IslandNodeId]); - type NodeId = IslandNodeId; +impl DirectedGraph for IslandGraph { + type Edge<'this> + = &'this Edge + where + Self: 'this; + type EdgeId = EdgeId; + type Node<'this> + = (IslandId, &'this IslandNode) + where + Self: 'this; + type NodeId = IslandId; fn node_count(&self) -> usize { - self.node_count + self.inner.node_count() } fn edge_count(&self) -> usize { - self.successors.iter().map(|succs| succs.len()).sum() + self.inner.edge_count() } fn iter_nodes(&self) -> impl ExactSizeIterator> + DoubleEndedIterator { - self.successors - .iter_enumerated() - .map(|(id, succs)| (id, succs.as_slice())) + self.inner + .iter_nodes() + .map(|node| (IslandId::from_u32(node.id().as_u32()), &node.data)) } fn iter_edges(&self) -> impl ExactSizeIterator> + DoubleEndedIterator { - // Not needed for dominator computation, provide a dummy implementation. - [].into_iter() + self.inner.iter_edges() } } -impl Successors for IslandCfg { - type SuccIter<'this> = impl Iterator + 'this; +impl Successors for IslandGraph { + type SuccIter<'this> + = impl Iterator + 'this + where + Self: 'this; fn successors(&self, node: Self::NodeId) -> Self::SuccIter<'_> { - self.successors[node].iter().copied() + self.inner + .successors(NodeId::new(node.as_u32())) + .map(|node| IslandId::new(node.as_u32())) } } -impl Predecessors for IslandCfg { - type PredIter<'this> = impl Iterator + 'this; +impl Predecessors for IslandGraph { + type PredIter<'this> + = impl Iterator + 'this + where + Self: 'this; fn predecessors(&self, node: Self::NodeId) -> Self::PredIter<'_> { - self.predecessors[node].iter().copied() + self.inner + .predecessors(NodeId::new(node.as_u32())) + .map(|node| IslandId::new(node.as_u32())) } } -/// Maps a block-level CFG into island-level CFG edges. -/// -/// For each block-level CFG edge where the source and target belong to different islands, -/// adds an edge between the corresponding island nodes. -fn build_island_cfg( - body: &crate::body::Body<'_>, - islands: &IslandSlice, - block_to_island: &crate::body::basic_block::BasicBlockSlice, - cfg: &mut IslandCfg, - edges: &mut Vec, - alloc: &A, -) { - use hashql_core::graph::Successors as _; - - for block in body.basic_blocks.ids() { - let source_island = block_to_island[block]; - - for successor in body.basic_blocks.successors(block) { - let target_island = block_to_island[successor]; - - if source_island != target_island { - cfg.add_edge(source_island, target_island); - - // Check if this CFG edge already exists in our edge list. - let existing = edges.iter_mut().find(|edge| { - edge.source == source_island - && edge.target == target_island - && edge.kind == IslandEdgeKind::Cfg - }); - - if existing.is_none() { - edges.push(IslandEdge { - source: source_island, - target: target_island, - kind: IslandEdgeKind::Cfg, - paths: EntityPathBitSet::new_empty(), - }); - } - } - } +impl IndexMut for IslandGraph { + fn index_mut(&mut self, index: IslandId) -> &mut Self::Output { + &mut self.inner[NodeId::new(index.as_u32())].data } } -/// Resolves data requirements for each island using dominance-aware provider search. -/// -/// Walks islands in topological order. For each required path in an island's traversal set, -/// finds the nearest dominating predecessor whose target matches the path's origin backend. -/// If found, the path is registered on that predecessor (growing its fetch set). If not, -/// a [`FetchIsland`] is created. -fn resolve_requirements( - islands: &IslandSlice, - nodes: &mut IslandNodeVec, - cfg: &mut IslandCfg, - edges: &mut Vec, - topo_order: &[IslandNodeId], - vertex: VertexType, -) { - let doms = dominators(&*cfg, IslandNodeId::new(0)); - - // For each island, track which paths are "available" from dominating predecessors, - // grouped by the origin backend that provides them. - // We walk in topological order so predecessors are always processed before successors. - - // Per-node: which paths are available at this node, keyed by origin target. - let mut available: IslandNodeVec<[EntityPathBitSet; TargetId::VARIANT_COUNT]> = - IslandNodeVec::from_fn_in( - nodes.len(), - |_| [EntityPathBitSet::new_empty(); TargetId::VARIANT_COUNT], - Global, - ); - - // For real islands, the island's own target makes all its fetched/produced paths available. - for node_id in topo_order { - let node = &nodes[*node_id]; - - if let IslandNode::Real(island_id) = node { - let island = &islands[*island_id]; - let target = island.target(); +impl Index for IslandGraph { + type Output = IslandNode; - // Paths this island accesses are available from its target backend going forward. - if let Some(entity_paths) = island.traversals().as_entity() { - let avail = &mut available[*node_id][target.as_usize()]; - for path in entity_paths { - avail.insert(path); - } - } - } - - // Propagate availability to successors: a successor inherits availability from - // all dominating predecessors. - let current_available = available[*node_id]; - for succ in cfg.successors[*node_id].clone() { - if doms.dominates(*node_id, succ) { - for (target_idx, paths) in current_available.iter().enumerate() { - for path in paths { - available[succ][target_idx].insert(path); - } - } - } - } - } - - // Now resolve requirements: for each real island, check which of its required paths - // are NOT available from any dominating predecessor on the correct origin backend. - // Those need FetchIslands. - for &node_id in topo_order { - let node = &nodes[node_id]; - - let island_id = match node { - IslandNode::Real(island_id) => *island_id, - IslandNode::Fetch(_) => continue, - }; - - let island = &islands[island_id]; - let required = island.traversals(); - - let Some(entity_paths) = required.as_entity() else { - continue; - }; - - if entity_paths.is_empty() { - continue; - } - - // Group unsatisfied paths by origin backend. - let mut unsatisfied: [EntityPathBitSet; TargetId::VARIANT_COUNT] = - [EntityPathBitSet::new_empty(); TargetId::VARIANT_COUNT]; - - for path in entity_paths { - let traversal_path = TraversalPath::Entity(path); - let origin = traversal_path.origin(); - - // Check if any origin backend has this path available from a dominating predecessor. - let is_satisfied = origin - .iter() - .any(|origin_target| available[node_id][origin_target.as_usize()].contains(path)); - - if is_satisfied { - // Find the nearest dominating predecessor that provides this path and - // add a data edge if one doesn't already exist. - for origin_target in origin.iter() { - if available[node_id][origin_target.as_usize()].contains(path) { - // Find the nearest predecessor on this target by walking up the - // dominator tree. - if let Some(provider) = find_nearest_provider( - node_id, - origin_target, - &doms, - nodes, - islands, - cfg, - ) { - add_data_edge(edges, provider, node_id, path); - } - break; - } - } - } else { - // No provider: needs a FetchIsland. Group by origin backend. - // Use the first origin target (for EntityPath, there's always exactly one). - if let Some(origin_target) = origin.iter().next() { - unsatisfied[origin_target.as_usize()].insert(path); - } - } - } - - // Create FetchIslands for unsatisfied paths, one per backend. - for target in TargetId::all() { - let paths = &unsatisfied[target.as_usize()]; - if paths.is_empty() { - continue; - } - - let fetch_node_id = IslandNodeId::from_usize(nodes.len()); - - // Extend the CFG adapter. - cfg.successors.push(Vec::new()); - cfg.predecessors.push(Vec::new()); - cfg.node_count += 1; - - // Extend available. - // (We won't re-process this node in the topo walk, but the structure must be - // consistent.) - - nodes.push(IslandNode::Fetch(FetchIsland { - target, - paths: *paths, - })); - - cfg.add_edge(fetch_node_id, node_id); - - edges.push(IslandEdge { - source: fetch_node_id, - target: node_id, - kind: IslandEdgeKind::Data, - paths: *paths, - }); - } + fn index(&self, index: IslandId) -> &Self::Output { + &self.inner[NodeId::new(index.as_u32())].data } } -/// Finds the nearest dominating predecessor of `node` whose target matches `origin_target`. -/// -/// Walks up the dominator tree from `node`, checking each ancestor. Returns the first -/// (nearest) node that runs on the requested backend. -fn find_nearest_provider( - node: IslandNodeId, - origin_target: TargetId, - doms: &hashql_core::graph::algorithms::Dominators, - nodes: &IslandNodeSlice, - islands: &IslandSlice, - cfg: &IslandCfg, -) -> Option { +/// Returns the nearest strict dominator of `node` assigned to `target`, along with its +/// depth (0 = immediate dominator). +fn find_dominator_by_target( + dominators: &Dominators, + graph: &IslandGraph, + node: IslandId, + target: TargetId, +) -> Option<(IslandId, usize)> { let mut current = node; + let mut depth = 0; loop { - let parent = doms.immediate_dominator(current)?; - + let parent = dominators.immediate_dominator(current)?; if parent == current { return None; } - let parent_target = nodes[parent].target(islands); - if parent_target == origin_target { - return Some(parent); + if graph[parent].target == target { + return Some((parent, depth)); } current = parent; + depth += 1; } } -/// Adds a data edge carrying `path` from `source` to `target`, merging into existing edges. -fn add_data_edge( - edges: &mut Vec, - source: IslandNodeId, - target: IslandNodeId, - path: EntityPath, -) { - use crate::pass::execution::traversal::EntityPath; - - let existing = edges.iter_mut().find(|edge| { - edge.source == source && edge.target == target && edge.kind == IslandEdgeKind::Data - }); - - if let Some(edge) = existing { - edge.paths.insert(path); - } else { - let mut paths = EntityPathBitSet::new_empty(); - paths.insert(path); - edges.push(IslandEdge { - source, - target, - kind: IslandEdgeKind::Data, - paths, - }); - } +/// Resolves data requirements for all islands, inserting data islands where needed. +struct RequirementResolver<'graph, A: Allocator, S: Allocator> { + graph: &'graph mut IslandGraph, + dominators: Dominators, + merged_provides: IslandVec, + data_providers: TargetArray>, } -/// Computes a topological schedule with level assignment for parallelism. -/// -/// Each node is assigned the lowest level such that all its predecessors are at lower levels. -/// Nodes at the same level have no direct dependencies and can execute concurrently. -fn compute_schedule( - nodes: &IslandNodeSlice, - edges: &[IslandEdge], -) -> Vec { - let node_count = nodes.len(); - - // Compute in-degree for each node. - let mut in_degree = IslandNodeVec::from_fn_in(node_count, |_| 0u32, Global); - let mut successors: IslandNodeVec> = - IslandNodeVec::from_fn_in(node_count, |_| Vec::new(), Global); - - for edge in edges { - // Only count edges within the known node range (FetchIslands added later may - // exceed the initial allocation). - if edge.source.as_usize() < node_count && edge.target.as_usize() < node_count { - in_degree[edge.target] += 1; - if !successors[edge.source].contains(&edge.target) { - successors[edge.source].push(edge.target); - } +impl<'graph, A: Allocator, S: Allocator + Clone> RequirementResolver<'graph, A, S> { + fn new(graph: &'graph mut IslandGraph, start: IslandId, scratch: S) -> Self { + let dominators = dominators(&*graph, start); + let merged_provides = IslandVec::from_elem_in( + TraversalPathBitSet::empty(graph.vertex), + graph.node_count(), + scratch, + ); + + Self { + graph, + dominators, + merged_provides, + data_providers: TargetArray::from_elem(None), } } - let mut levels = IslandNodeVec::from_fn_in(node_count, |_| 0u32, Global); - let mut queue: Vec = Vec::new(); + fn resolve(mut self, topo: &[IslandId]) { + // Iterate in reverse for topological order + for &island_id in topo { + let island = &self.graph[island_id]; + let IslandKind::Exec(_) = &island.kind else { + debug_panic!("data islands should not be present during requirement resolution"); + continue; + }; + + self.inherit_provides(island_id); + self.resolve_island(island_id); + } + } - // Seed the queue with nodes that have no predecessors. - for node_id in (0..node_count).map(IslandNodeId::from_usize) { - if in_degree[node_id] == 0 { - queue.push(node_id); + /// If a same-target dominator exists, inherits its provided paths via an `Inherits` edge. + fn inherit_provides(&mut self, island_id: IslandId) { + let island_target = self.graph[island_id].target; + + if let Some((parent, _)) = + find_dominator_by_target(&self.dominators, self.graph, island_id, island_target) + { + self.merged_provides.copy_within(parent..=parent, island_id); + self.graph.inner.add_edge( + NodeId::from_u32(parent.as_u32()), + NodeId::from_u32(island_id.as_u32()), + IslandEdge::Inherits, + ); } } - let mut schedule = Vec::with_capacity(node_count); - let mut head = 0; + /// Resolves requirements for a single island. + /// + /// Paths whose origin includes this island's own target are self-provided and need no + /// external provider. All other paths are resolved via dominator walk or data island. + fn resolve_island(&mut self, island_id: IslandId) { + let requires = self.graph[island_id].requires; + if requires.is_empty() { + return; + } + + let island_target = self.graph[island_id].target; - while head < queue.len() { - let node_id = queue[head]; - head += 1; + // Cache dominator lookups per target to avoid repeated walks. + let mut cached = TargetArray::from_elem(None); - schedule.push(ScheduledIsland { - node: node_id, - level: levels[node_id], - }); + for requirement in &requires { + let origin = requirement.origin(); + debug_assert!(!origin.is_empty()); - for &succ in &successors[node_id] { - levels[succ] = levels[succ].max(levels[node_id] + 1); - in_degree[succ] -= 1; - if in_degree[succ] == 0 { - queue.push(succ); + // If this island runs on an origin backend for the path, it self-provides. + if origin.contains(island_target) { + if !self.merged_provides[island_id].contains(requirement) { + self.merged_provides[island_id].insert(requirement); + self.graph[island_id].provides.insert(requirement); + } + continue; } + + let provider = self.find_best_provider(&mut cached, island_id, origin); + let provider = provider.unwrap_or_else(|| self.get_or_create_data_island(origin)); + + self.register_path(provider, island_id, requirement); } } - schedule -} + /// Finds the nearest dominating provider among the potential origin targets. + #[expect(clippy::option_option)] + fn find_best_provider( + &self, + cached: &mut TargetArray>>, + island_id: IslandId, + origin: TargetBitSet, + ) -> Option { + origin + .iter() + .filter_map(|target| { + *cached[target].get_or_insert_with(|| { + find_dominator_by_target(&self.dominators, self.graph, island_id, target) + }) + }) + .min_by_key(|&(_, depth)| depth) + .map(|(provider, _)| provider) + } -/// Builds the island dependency graph from placement results. -/// -/// Takes the body CFG, the discovered islands, and produces a graph with: -/// - Real island nodes mapped 1:1 from the input islands -/// - FetchIsland nodes for unsatisfied data requirements -/// - CFG and data edges between nodes -/// - A topological schedule with parallelism levels -pub(crate) fn build_island_graph( - body: &crate::body::Body<'_>, - islands: &IslandVec, - vertex: VertexType, -) -> IslandGraph { - use crate::body::basic_block::BasicBlockVec; + /// Registers a path as provided by `provider` for consumption by `consumer`. + fn register_path(&mut self, provider: IslandId, consumer: IslandId, path: TraversalPath) { + if !self.merged_provides[provider].contains(path) { + self.merged_provides[provider].insert(path); + self.graph[provider].provides.insert(path); + } - let island_count = islands.len(); + self.graph.inner.add_edge( + NodeId::from_u32(provider.as_u32()), + NodeId::from_u32(consumer.as_u32()), + IslandEdge::DataFlow, + ); + } + + /// Returns an existing data island for the given origin backend, or creates one. + fn get_or_create_data_island(&mut self, origin: TargetBitSet) -> IslandId { + // Check if *any* of the providers already have an initialised provider, if that's the case + // we create our own. + if let Some(provider) = origin.iter().find_map(|target| self.data_providers[target]) { + return provider; + } - // Map each basic block to its island's node ID. - let mut block_to_island = - BasicBlockVec::from_domain_in(IslandNodeId::new(0), &body.basic_blocks, Global); + // `TargetId` is ordered by backend priority, so the first set bit gives us the best target + // (note that interpreter is technically first, but never a target for data). + let target = origin.first_set().unwrap_or_else(|| unreachable!()); - for island_id in islands.ids() { - let node_id = IslandNodeId::from_usize(island_id.as_usize()); - for block in islands[island_id].iter() { - block_to_island[block] = node_id; + if let Some(provider) = self.data_providers[target] { + return provider; } - } - // Initialize nodes: one per real island. - let mut nodes: IslandNodeVec = IslandNodeVec::from_fn_in( - island_count, - |id| IslandNode::Real(IslandId::from_usize(id.as_usize())), - Global, - ); - - let mut cfg = IslandCfg::new(island_count); - let mut edges = Vec::new(); - - // Phase 1: Build island-level CFG edges from block-level CFG. - build_island_cfg( - body, - islands, - &block_to_island, - &mut cfg, - &mut edges, - &Global, - ); - - // Phase 2: Compute topological order for the forward walk. - // Use reverse postorder (which is a valid topological order for DAGs). - let topo_order = { - use hashql_core::graph::Traverse as _; - let rpo: Vec = cfg - .depth_first_traversal_post_order([IslandNodeId::new(0)]) - .collect::>() - .into_iter() - .rev() - .collect(); - rpo - }; - - // Phase 3: Resolve data requirements with dominance-aware provider search. - resolve_requirements( - islands, - &mut nodes, - &mut cfg, - &mut edges, - &topo_order, - vertex, - ); - - // Phase 4: Compute the schedule with parallelism levels. - let schedule = compute_schedule(&nodes, &edges); - - IslandGraph { - nodes, - edges, - schedule, + let node = self.graph.inner.add_node(IslandNode { + kind: IslandKind::Data, + target, + requires: TraversalPathBitSet::empty(self.graph.vertex), + provides: TraversalPathBitSet::empty(self.graph.vertex), + }); + let provider = IslandId::from_u32(node.as_u32()); + self.data_providers[target] = Some(provider); + self.merged_provides + .push(TraversalPathBitSet::empty(self.graph.vertex)); + + provider } } diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs index 1f1ab5e44e7..9ec41335fcd 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs @@ -1,25 +1,30 @@ -//! Tests for island dependency graph construction and fetch island insertion. +//! Tests for island dependency graph construction and requirement resolution. +#![expect(clippy::min_ident_chars)] use alloc::alloc::Global; +use core::assert_matches; -use hashql_core::{heap::Heap, symbol::sym, r#type::environment::Environment}; +use hashql_core::{ + graph::DirectedGraph as _, heap::Heap, id::Id as _, symbol::sym, + r#type::environment::Environment, +}; use crate::{ - body::basic_block::{BasicBlockId, BasicBlockVec}, + body::{Body, basic_block::BasicBlockVec}, builder::body, intern::Interner, pass::execution::{ VertexType, island::{ IslandId, IslandPlacement, - graph::{IslandEdgeKind, IslandNode, IslandNodeId, build_island_graph}, + graph::{IslandEdge, IslandGraph, IslandKind}, }, target::TargetId, traversal::EntityPath, }, }; -fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { +pub(crate) fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), Global); for &target in assignments { targets.push(target); @@ -27,7 +32,15 @@ fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { targets } -/// Single Postgres island accessing properties — no fetch island needed because the island +pub(crate) fn build_graph(body: &Body<'_>, targets: &[TargetId]) -> IslandGraph { + let target_vec = make_targets(targets); + let islands = IslandPlacement::new().run(body, VertexType::Entity, &target_vec, Global); + let mut graph = IslandGraph::build_in(body, VertexType::Entity, islands, Global, Global); + graph.resolve(Global); + graph +} + +/// Single Postgres island accessing properties: no fetch island needed because the island /// itself is on the origin backend for that path. #[test] fn single_island_no_fetch() { @@ -45,21 +58,16 @@ fn single_island_no_fetch() { } }); - let targets = make_targets(&[TargetId::Postgres]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); - let graph = build_island_graph(&body, &islands, VertexType::Entity); + let graph = build_graph(&body, &[TargetId::Postgres]); - // One real node, no fetch islands. - assert_eq!(graph.nodes.len(), 1); - assert!( - matches!(graph.nodes[IslandNodeId::new(0)], IslandNode::Real(id) if id == IslandId::new(0)) - ); - assert!(graph.edges.is_empty()); + assert_eq!(graph.node_count(), 1); + assert_matches!(graph[IslandId::new(0)].kind(), IslandKind::Exec(_)); + assert_eq!(graph.edge_count(), 0); } /// Postgres island followed by Interpreter island that needs properties. -/// Properties originate from Postgres, so the Interpreter island gets a data edge -/// from the Postgres island — no fetch island needed. +/// Properties originate from Postgres, so the Interpreter island gets a `DataFlow` edge +/// from the Postgres island: no fetch island needed. #[test] fn data_edge_from_predecessor() { let heap = Heap::new(); @@ -80,36 +88,39 @@ fn data_edge_from_predecessor() { } }); - let targets = make_targets(&[TargetId::Postgres, TargetId::Interpreter]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let graph = build_graph(&body, &[TargetId::Postgres, TargetId::Interpreter]); - assert_eq!(islands.len(), 2); + // Two exec nodes, no data islands. + assert_eq!(graph.node_count(), 2); - let graph = build_island_graph(&body, &islands, VertexType::Entity); + let control_flow_count = graph + .iter_edges() + .filter(|edge| edge.data == IslandEdge::ControlFlow) + .count(); + assert_eq!(control_flow_count, 1); - // Two real nodes, no fetch islands. - assert_eq!(graph.nodes.len(), 2); + let data_flow_count = graph + .iter_edges() + .filter(|edge| edge.data == IslandEdge::DataFlow) + .count(); + assert_eq!(data_flow_count, 1); - // Should have a CFG edge from Postgres to Interpreter. - let cfg_edges: Vec<_> = graph - .edges - .iter() - .filter(|edge| edge.kind == IslandEdgeKind::Control) - .collect(); - assert_eq!(cfg_edges.len(), 1); - - // Should have a data edge carrying Properties from Postgres to Interpreter. - let data_edges: Vec<_> = graph - .edges - .iter() - .filter(|edge| edge.kind == IslandEdgeKind::Data) - .collect(); - assert_eq!(data_edges.len(), 1); - assert!(data_edges[0].paths.contains(EntityPath::Properties)); + // The Postgres island should provide Properties. + let postgres_island = (0..graph.node_count()) + .map(IslandId::from_usize) + .find(|&island_id| graph[island_id].target() == TargetId::Postgres) + .expect("postgres island exists"); + assert!( + graph[postgres_island] + .provides() + .as_entity() + .expect("entity vertex") + .contains(EntityPath::Properties) + ); } /// Interpreter island needs embedding data but has no Embedding predecessor. -/// A FetchIsland(Embedding) should be inserted. +/// A data island for Embedding should be inserted. #[test] fn fetch_island_for_unsatisfied_requirement() { let heap = Heap::new(); @@ -127,30 +138,29 @@ fn fetch_island_for_unsatisfied_requirement() { } }); - let targets = make_targets(&[TargetId::Interpreter]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); - let graph = build_island_graph(&body, &islands, VertexType::Entity); - - // One real node plus one FetchIsland(Embedding). - assert_eq!(graph.nodes.len(), 2); - assert!(matches!( - graph.nodes[IslandNodeId::new(0)], - IslandNode::Real(_) - )); - - let fetch_node = &graph.nodes[IslandNodeId::new(1)]; - match fetch_node { - IslandNode::Fetch(fetch) => { - assert_eq!(fetch.target, TargetId::Embedding); - assert!(fetch.paths.contains(EntityPath::Vectors)); - } - IslandNode::Real(_) => panic!("expected FetchIsland, got Real"), - } + let graph = build_graph(&body, &[TargetId::Interpreter]); + + // One exec node plus one data island for Embedding. + assert_eq!(graph.node_count(), 2); + + let data_island = (0..graph.node_count()) + .map(IslandId::from_usize) + .find(|&island_id| matches!(graph[island_id].kind(), IslandKind::Data)) + .expect("data island exists"); + + assert_eq!(graph[data_island].target(), TargetId::Embedding); + assert!( + graph[data_island] + .provides() + .as_entity() + .expect("entity vertex") + .contains(EntityPath::Vectors) + ); } /// Diamond CFG: Postgres branches to Interpreter and Embedding, both merge into a /// final Postgres island. The Embedding path is only available on one branch, so the -/// final Postgres island needs a FetchIsland for embedding data. +/// final Postgres island needs a data island for embedding data. #[test] fn diamond_branch_needs_fetch() { let heap = Heap::new(); @@ -183,75 +193,74 @@ fn diamond_branch_needs_fetch() { }); // bb0=Postgres, bb1=Interpreter, bb2=Embedding, bb3=Postgres - let targets = make_targets(&[ - TargetId::Postgres, - TargetId::Interpreter, - TargetId::Embedding, - TargetId::Postgres, - ]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); - let graph = build_island_graph(&body, &islands, VertexType::Entity); - - // The final Postgres island needs Vectors. The Embedding island (bb2) only runs on - // one branch and doesn't dominate bb3, so a FetchIsland(Embedding) must be inserted. - let fetch_nodes: Vec<_> = graph - .nodes - .iter() - .filter(|node| matches!(node, IslandNode::Fetch(_))) - .collect(); - - assert!( - !fetch_nodes.is_empty(), - "expected at least one FetchIsland for embedding data" + let graph = build_graph( + &body, + &[ + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Embedding, + TargetId::Postgres, + ], ); - let has_embedding_fetch = fetch_nodes.iter().any( - |node| matches!(node, IslandNode::Fetch(fetch) if fetch.target == TargetId::Embedding), + // The Embedding island (bb2) only runs on one branch and doesn't dominate bb3, + // so a data island for Embedding must be inserted. + let has_embedding_data_island = + (0..graph.node_count()) + .map(IslandId::from_usize) + .any(|island_id| { + matches!(graph[island_id].kind(), IslandKind::Data) + && graph[island_id].target() == TargetId::Embedding + }); + + assert!( + has_embedding_data_island, + "expected a data island for Embedding" ); - assert!(has_embedding_fetch); } -/// Schedule levels: independent fetch islands and entry islands should be at level 0. -/// Their consumers should be at level 1 or higher. +/// Inherits edge: when two same-target islands are in a dominator relationship, +/// the child inherits provided paths from the parent. #[test] -fn schedule_levels() { +fn inherits_edge_same_target_dominator() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { - decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; - @proj enc = vertex.encodings: ?, - vecs = enc.vectors: ?; + decl env: (), vertex: [Opaque sym::path::Entity; ?], val1: ?, val2: ?; + @proj props = vertex.properties: ?; bb0() { - val = load vecs; - return val; + val1 = load props; + goto bb1(); + }, + bb1() { + goto bb2(); + }, + bb2() { + val2 = load props; + return val2; } }); - let targets = make_targets(&[TargetId::Interpreter]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); - let graph = build_island_graph(&body, &islands, VertexType::Entity); - - // The real island depends on the FetchIsland, so it should be at a higher level. - assert!(graph.schedule.len() >= 2); - - let real_level = graph - .schedule - .iter() - .find(|sched| matches!(graph.nodes[sched.node], IslandNode::Real(_))) - .map(|sched| sched.level); - let fetch_level = graph - .schedule - .iter() - .find(|sched| matches!(graph.nodes[sched.node], IslandNode::Fetch(_))) - .map(|sched| sched.level); - - if let (Some(real_level), Some(fetch_level)) = (real_level, fetch_level) { - assert!( - real_level > fetch_level, - "real island (level {real_level}) should be after fetch island (level {fetch_level})" - ); - } + // bb0=Postgres, bb1=Interpreter, bb2=Postgres + // bb0 dominates bb2 (through bb1). Both are Postgres, so bb2 should inherit from bb0. + let graph = build_graph( + &body, + &[ + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Postgres, + ], + ); + + let inherits_count = graph + .iter_edges() + .filter(|edge| edge.data == IslandEdge::Inherits) + .count(); + assert!( + inherits_count > 0, + "expected an Inherits edge between same-target dominating islands" + ); } diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph2.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph2.rs deleted file mode 100644 index 65dae2d7e31..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph2.rs +++ /dev/null @@ -1,417 +0,0 @@ -use core::ops::{Index, IndexMut}; -use std::alloc::Allocator; - -use hashql_core::{ - debug_panic, - graph::{ - DirectedGraph, EdgeId, LinkedGraph, NodeId, Predecessors, Successors, Traverse, - algorithms::{Dominators, dominators}, - linked::Edge, - }, - heap::CollectIn, - id::{ - HasId, Id, - bit_vec::{BitMatrix, DenseBitSet}, - }, -}; - -use super::{Island, IslandId, IslandVec}; -use crate::{ - body::{ - Body, - basic_block::{BasicBlockId, BasicBlockVec}, - }, - pass::execution::{ - TargetId, VertexType, - target::{TargetArray, TargetBitSet}, - traversal::{TraversalPath, TraversalPathBitSet}, - }, -}; - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum IslandEdge { - /// Direct control flow between islands (block-level CFG edge crossing an island boundary). - ControlFlow, - /// Data dependency: the consumer fetches directly from the producer's backend. - DataFlow, - /// Same-target inheritance: the child island inherits provided paths from a dominating - /// ancestor on the same backend. - Inherits, -} - -#[derive(Debug)] -pub struct ExecIsland { - members: DenseBitSet, -} - -#[derive(Debug)] -pub enum IslandKind { - /// A real island from the placement solver. - Exec(ExecIsland), - /// A synthetic island dedicated to fetching data from a specific backend. - Data, -} - -#[derive(Debug)] -pub struct IslandNode { - kind: IslandKind, - target: TargetId, - requires: TraversalPathBitSet, - provides: TraversalPathBitSet, -} - -impl IslandNode { - #[inline] - #[must_use] - pub const fn kind(&self) -> &IslandKind { - &self.kind - } - - #[inline] - #[must_use] - pub const fn target(&self) -> TargetId { - self.target - } - - #[inline] - #[must_use] - pub const fn requires(&self) -> TraversalPathBitSet { - self.requires - } - - #[inline] - #[must_use] - pub const fn provides(&self) -> TraversalPathBitSet { - self.provides - } -} - -pub struct IslandGraph { - vertex: VertexType, - inner: LinkedGraph, - lookup: BasicBlockVec, -} - -impl IslandGraph { - fn build_in( - body: &Body<'_>, - vertex: VertexType, - islands: IslandVec, - scratch: S, - alloc: A, - ) -> Self - where - S: Allocator, - A: Clone, - { - let mut lookup = - BasicBlockVec::from_domain_in(IslandId::MAX, &body.basic_blocks, alloc.clone()); - let mut graph = - LinkedGraph::with_capacity_in(islands.len(), body.basic_blocks.edge_count(), alloc); - let mut matrix = BitMatrix::new_in(islands.len(), islands.len(), scratch); - - for ( - island_id, - Island { - target, - members, - traversals, - }, - ) in islands.into_iter_enumerated() - { - for block_id in &members { - lookup[block_id] = island_id; - } - - let node_id = graph.add_node(IslandNode { - kind: IslandKind::Exec(ExecIsland { members }), - target, - requires: traversals, - provides: TraversalPathBitSet::empty(vertex), - }); - debug_assert_eq!(node_id.as_u32(), island_id.as_u32()); - } - - for block_id in body.basic_blocks.ids() { - let source = lookup[block_id]; - - for successor in body.basic_blocks.successors(block_id) { - let target = lookup[successor]; - - if source == target || matrix.contains(source, target) { - continue; - } - - matrix.insert(source, target); - graph.add_edge( - NodeId::new(source.as_u32()), - NodeId::new(target.as_u32()), - IslandEdge::ControlFlow, - ); - } - } - - Self { - vertex, - inner: graph, - lookup, - } - } - - /// Resolves all island requirements and inserts data islands where needed. - pub(crate) fn resolve(&mut self, scratch: S) - where - S: Allocator + Clone, - { - let mut topo: Vec = self - .inner - .depth_first_forest_post_order() - .map(|node| IslandId::new(node.as_u32())) - .collect_in(scratch.clone()); - topo.reverse(); - - // Postorder collected into a vec; iterate in reverse for topological order. - let start = self.lookup[BasicBlockId::START]; - - RequirementResolver::new(self, start, scratch).resolve(&topo); - } -} - -impl DirectedGraph for IslandGraph { - type Edge<'this> - = &'this Edge - where - Self: 'this; - type EdgeId = EdgeId; - type Node<'this> - = (IslandId, &'this IslandNode) - where - Self: 'this; - type NodeId = IslandId; - - fn node_count(&self) -> usize { - self.inner.node_count() - } - - fn edge_count(&self) -> usize { - self.inner.edge_count() - } - - fn iter_nodes(&self) -> impl ExactSizeIterator> + DoubleEndedIterator { - self.inner - .iter_nodes() - .map(|node| (IslandId::from_u32(node.id().as_u32()), &node.data)) - } - - fn iter_edges(&self) -> impl ExactSizeIterator> + DoubleEndedIterator { - self.inner.iter_edges() - } -} - -impl Successors for IslandGraph { - type SuccIter<'this> - = impl Iterator + 'this - where - Self: 'this; - - fn successors(&self, node: Self::NodeId) -> Self::SuccIter<'_> { - self.inner - .successors(NodeId::new(node.as_u32())) - .map(|node| IslandId::new(node.as_u32())) - } -} - -impl Predecessors for IslandGraph { - type PredIter<'this> - = impl Iterator + 'this - where - Self: 'this; - - fn predecessors(&self, node: Self::NodeId) -> Self::PredIter<'_> { - self.inner - .predecessors(NodeId::new(node.as_u32())) - .map(|node| IslandId::new(node.as_u32())) - } -} - -impl IndexMut for IslandGraph { - fn index_mut(&mut self, index: IslandId) -> &mut Self::Output { - &mut self.inner[NodeId::new(index.as_u32())].data - } -} - -impl Index for IslandGraph { - type Output = IslandNode; - - fn index(&self, index: IslandId) -> &Self::Output { - &self.inner[NodeId::new(index.as_u32())].data - } -} - -/// Walks up the dominator tree from `node` to find the nearest ancestor whose target matches. -/// -/// Returns the ancestor and its depth in the dominator tree (0 = immediate dominator). -fn find_dominator_by_target( - dominators: &Dominators, - graph: &IslandGraph, - node: IslandId, - target: TargetId, -) -> Option<(IslandId, usize)> { - let mut current = node; - let mut depth = 0; - - loop { - let parent = dominators.immediate_dominator(current)?; - if parent == current { - return None; - } - - if graph[parent].target == target { - return Some((parent, depth)); - } - - current = parent; - depth += 1; - } -} - -/// Resolves data requirements for all islands, inserting data islands where needed. -/// -/// Walks islands in reverse postorder (topological order). For each required path, finds -/// the nearest dominating predecessor on the matching backend. If none exists, creates a -/// synthetic data island. Carries all shared state so individual methods stay clean. -struct RequirementResolver<'graph, A: Allocator, S: Allocator> { - graph: &'graph mut IslandGraph, - dominators: Dominators, - merged_provides: IslandVec, - data_providers: TargetArray>, -} - -impl<'graph, A: Allocator, S: Allocator + Clone> RequirementResolver<'graph, A, S> { - fn new(graph: &'graph mut IslandGraph, start: IslandId, scratch: S) -> Self { - let dominators = dominators(&*graph, start); - let merged_provides = IslandVec::from_elem_in( - TraversalPathBitSet::empty(graph.vertex), - graph.node_count(), - scratch, - ); - - Self { - graph, - dominators, - merged_provides, - data_providers: TargetArray::from_elem(None), - } - } - - fn resolve(mut self, topo: &[IslandId]) { - // Iterate in reverse for topological order - for &island_id in topo { - let island = &self.graph[island_id]; - let IslandKind::Exec(_) = &island.kind else { - debug_panic!("data islands should not be present during requirement resolution"); - continue; - }; - - self.inherit_provides(island_id); - self.resolve_island(island_id); - } - } - - /// If a same-target dominator exists, inherits its provided paths via an `Inherits` edge. - fn inherit_provides(&mut self, island_id: IslandId) { - let island_target = self.graph[island_id].target; - - if let Some((parent, _)) = - find_dominator_by_target(&self.dominators, self.graph, island_id, island_target) - { - self.merged_provides.copy_within(parent..=parent, island_id); - self.graph.inner.add_edge( - NodeId::from_u32(island_id.as_u32()), - NodeId::from_u32(parent.as_u32()), - IslandEdge::Inherits, - ); - } - } - - /// Resolves requirements for a single island. - fn resolve_island(&mut self, island_id: IslandId) { - let requires = self.graph[island_id].requires; - if requires.is_empty() { - return; - } - - // Cache dominator lookups per target to avoid repeated walks. - let mut cached = TargetArray::from_elem(None); - - for requirement in &requires { - let origin = requirement.origin(); - debug_assert!(!origin.is_empty()); - - let provider = self.find_best_provider(&mut cached, island_id, &origin); - let provider = provider.unwrap_or_else(|| self.get_or_create_data_island(&origin)); - - self.register_path(provider, island_id, requirement); - } - } - - /// Finds the nearest dominating provider among the potential origin targets. - fn find_best_provider( - &self, - cached: &mut TargetArray>>, - island_id: IslandId, - origin: &TargetBitSet, - ) -> Option { - origin - .iter() - .filter_map(|target| { - *cached[target].get_or_insert_with(|| { - find_dominator_by_target(&self.dominators, self.graph, island_id, target) - }) - }) - .min_by_key(|&(_, depth)| depth) - .map(|(provider, _)| provider) - } - - /// Registers a path as provided by `provider` for consumption by `consumer`. - fn register_path(&mut self, provider: IslandId, consumer: IslandId, path: TraversalPath) { - if !self.merged_provides[provider].contains(path) { - self.merged_provides[provider].insert(path); - self.graph[provider].provides.insert(path); - } - - self.graph.inner.add_edge( - NodeId::from_u32(consumer.as_u32()), - NodeId::from_u32(provider.as_u32()), - IslandEdge::DataFlow, - ); - } - - /// Returns an existing data island for the given origin backend, or creates one. - fn get_or_create_data_island(&mut self, origin: &TargetBitSet) -> IslandId { - // Check if *any* of the providers already have an initialised provider, if that's the case - // we create our own. - if let Some(provider) = origin.iter().find_map(|target| self.data_providers[target]) { - return provider; - } - - // `TargetId` is ordered by backend priority, so the first set bit gives us the best target - // (note that interpreter is technically first, but never a target for data). - let target = origin.first_set().unwrap_or_else(|| unreachable!()); - - if let Some(provider) = self.data_providers[target] { - return provider; - } - - let node = self.graph.inner.add_node(IslandNode { - kind: IslandKind::Data, - target, - requires: TraversalPathBitSet::empty(self.graph.vertex), - provides: TraversalPathBitSet::empty(self.graph.vertex), - }); - let provider = IslandId::from_u32(node.as_u32()); - self.data_providers[target] = Some(provider); - - provider - } -} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs index 64c6bf4b868..661ba8f5302 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs @@ -29,8 +29,8 @@ use crate::{ visit::Visitor as _, }; -// pub(crate) mod graph; -mod graph2; +pub(crate) mod graph; +pub(crate) mod schedule; #[cfg(test)] mod tests; diff --git a/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs new file mode 100644 index 00000000000..5b7d4b23a29 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs @@ -0,0 +1,110 @@ +#[cfg(test)] +mod tests; + +use core::alloc::Allocator; + +use hashql_core::graph::{DirectedGraph as _, Predecessors as _, Successors as _}; + +use super::{IslandId, IslandVec, graph::IslandGraph}; + +/// An island with its assigned parallelism level. +/// +/// Islands at the same level have no dependencies between them and can execute concurrently. +/// Level 0 contains islands with no predecessors. +#[derive(Debug, Copy, Clone)] +pub struct ScheduledIsland { + /// The island this entry refers to. + pub island: IslandId, + /// The parallelism level. All islands at the same level are independent. + pub level: u32, +} + +/// Topological ordering of islands with parallelism levels. +/// +/// Produced by [`IslandGraph::schedule`]. Each island appears exactly once, +/// ordered so that all predecessors of an island appear before it. +#[derive(Debug)] +pub struct IslandSchedule { + entries: Vec, +} + +impl IslandSchedule { + /// Returns the scheduled entries in topological order. + #[inline] + #[must_use] + pub fn entries(&self) -> &[ScheduledIsland] { + &self.entries + } + + /// Returns the number of scheduled islands. + #[inline] + #[must_use] + pub const fn len(&self) -> usize { + self.entries.len() + } + + /// Returns `true` if the schedule contains no islands. + #[inline] + #[must_use] + pub const fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Iterates over the scheduled entries in topological order. + #[inline] + pub fn iter(&self) -> impl ExactSizeIterator { + self.entries.iter() + } +} + +impl IslandGraph { + /// Computes a topological schedule with level assignment for parallelism. + /// + /// Each island is assigned the lowest level such that all its predecessors are at + /// strictly lower levels. Islands at the same level have no direct dependencies and + /// can execute concurrently. + #[expect(clippy::cast_possible_truncation)] + pub fn schedule(&self, scratch: S) -> IslandSchedule + where + S: Allocator + Clone, + { + let node_count = self.node_count(); + + let mut in_degree = IslandVec::from_elem_in(0_u32, node_count, scratch.clone()); + let mut levels = IslandVec::from_elem_in(0_u32, node_count, scratch.clone()); + + for (island_id, _) in self.iter_nodes() { + in_degree[island_id] = self.predecessors(island_id).count() as u32; + } + + let mut queue: Vec = Vec::new_in(scratch.clone()); + for (island_id, _) in self.iter_nodes() { + if in_degree[island_id] == 0 { + queue.push(island_id); + } + } + + let mut entries = Vec::with_capacity_in(node_count, scratch); + let mut head = 0; + + while head < queue.len() { + let island_id = queue[head]; + head += 1; + + entries.push(ScheduledIsland { + island: island_id, + level: levels[island_id], + }); + + for successor in self.successors(island_id) { + levels[successor] = levels[successor].max(levels[island_id] + 1); + in_degree[successor] -= 1; + if in_degree[successor] == 0 { + queue.push(successor); + } + } + } + + IslandSchedule { entries } + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs new file mode 100644 index 00000000000..1bc8b6b9e69 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs @@ -0,0 +1,104 @@ +//! Tests for island schedule computation. +#![expect(clippy::min_ident_chars)] + +use alloc::alloc::Global; + +use hashql_core::{ + graph::DirectedGraph, heap::Heap, symbol::sym, r#type::environment::Environment, +}; + +use crate::{ + builder::body, + intern::Interner, + pass::execution::{ + island::graph::{IslandKind, tests::build_graph}, + target::TargetId, + }, +}; + +/// Data islands should be at a lower level than their consumers. +#[test] +fn data_island_before_consumer() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + return val; + } + }); + + let graph = build_graph(&body, &[TargetId::Interpreter]); + let schedule = graph.schedule(Global); + let entries = schedule.entries(); + + assert!(entries.len() >= 2); + + let exec_entry = entries + .iter() + .find(|entry| matches!(graph[entry.island].kind(), IslandKind::Exec(_))); + let data_entry = entries + .iter() + .find(|entry| matches!(graph[entry.island].kind(), IslandKind::Data)); + + if let (Some(exec_entry), Some(data_entry)) = (exec_entry, data_entry) { + assert!( + exec_entry.level > data_entry.level, + "exec island (level {}) should be after data island (level {})", + exec_entry.level, + data_entry.level + ); + } +} + +/// Every island in the graph appears exactly once in the schedule. +#[test] +fn covers_all_nodes() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + val: ?, cond: Bool; + @proj props = vertex.properties: ?, + enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + val = load props; + goto bb3(); + }, + bb2() { + val = load vecs; + goto bb3(); + }, + bb3() { + val = load vecs; + return val; + } + }); + + let graph = build_graph( + &body, + &[ + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Embedding, + TargetId::Postgres, + ], + ); + + let schedule = graph.schedule(Global); + assert_eq!(schedule.entries().len(), graph.node_count()); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 5e667b8ff92..2c8a56fbb1a 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -38,10 +38,8 @@ pub use self::{ cost::{ApproxCost, Cost}, island::{ Island, IslandId, IslandVec, - graph::{ - FetchIsland, IslandEdge, IslandEdgeKind, IslandGraph, IslandNode, IslandNodeId, - ScheduledIsland, - }, + graph::{ExecIsland, IslandEdge, IslandGraph, IslandKind, IslandNode}, + schedule::{IslandSchedule, ScheduledIsland}, }, placement::error::PlacementDiagnosticCategory, target::TargetId, diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index 3ba9e974a76..575f9c91c96 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -485,7 +485,8 @@ impl EntityPathBitSet { } /// Returns `true` if `path` is present in the bitset. - pub(crate) fn contains(&self, path: EntityPath) -> bool { + #[must_use] + pub(crate) const fn contains(self, path: EntityPath) -> bool { self.0.contains(path) } diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index 64c59a1b072..4b89cccb71e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -127,7 +127,8 @@ impl TraversalPathBitSet { } /// Returns `true` if `path` is present in the bitset. - pub fn contains(&self, path: TraversalPath) -> bool { + #[must_use] + pub const fn contains(self, path: TraversalPath) -> bool { match (self, path) { (Self::Entity(bitset), TraversalPath::Entity(path)) => bitset.contains(path), } From c3009a3191aab2477f2cae03cbe292e68df504bd Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 3 Mar 2026 20:06:03 +0100 Subject: [PATCH 4/9] feat: checkpoint --- .../src/pass/execution/island/graph/mod.rs | 6 +- .../src/pass/execution/island/graph/tests.rs | 200 ++++++++++++++---- .../pass/execution/island/schedule/tests.rs | 2 +- 3 files changed, 161 insertions(+), 47 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs index 29b1e312ae2..80a842fe57b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs @@ -429,10 +429,8 @@ impl<'graph, A: Allocator, S: Allocator + Clone> RequirementResolver<'graph, A, // If this island runs on an origin backend for the path, it self-provides. if origin.contains(island_target) { - if !self.merged_provides[island_id].contains(requirement) { - self.merged_provides[island_id].insert(requirement); - self.graph[island_id].provides.insert(requirement); - } + self.merged_provides[island_id].insert(requirement); + self.graph[island_id].provides.insert(requirement); continue; } diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs index 9ec41335fcd..01ea488fd90 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs @@ -40,6 +40,36 @@ pub(crate) fn build_graph(body: &Body<'_>, targets: &[TargetId]) -> IslandGraph< graph } +fn find_island(graph: &IslandGraph, target: TargetId) -> IslandId { + (0..graph.node_count()) + .map(IslandId::from_usize) + .find(|&island_id| graph[island_id].target() == target) + .unwrap_or_else(|| panic!("no island with target {target:?}")) +} + +fn find_data_island(graph: &IslandGraph, target: TargetId) -> IslandId { + (0..graph.node_count()) + .map(IslandId::from_usize) + .find(|&island_id| { + matches!(graph[island_id].kind(), IslandKind::Data) + && graph[island_id].target() == target + }) + .unwrap_or_else(|| panic!("no data island with target {target:?}")) +} + +fn has_edge( + graph: &IslandGraph, + source: IslandId, + target: IslandId, + kind: IslandEdge, +) -> bool { + graph.iter_edges().any(|edge| { + edge.source().as_u32() == source.as_u32() + && edge.target().as_u32() == target.as_u32() + && edge.data == kind + }) +} + /// Single Postgres island accessing properties: no fetch island needed because the island /// itself is on the origin backend for that path. #[test] @@ -59,10 +89,20 @@ fn single_island_no_fetch() { }); let graph = build_graph(&body, &[TargetId::Postgres]); + let island = &graph[IslandId::new(0)]; assert_eq!(graph.node_count(), 1); - assert_matches!(graph[IslandId::new(0)].kind(), IslandKind::Exec(_)); assert_eq!(graph.edge_count(), 0); + assert_matches!(island.kind(), IslandKind::Exec(_)); + assert_eq!(island.target(), TargetId::Postgres); + + let provides = island.provides(); + assert!( + provides + .as_entity() + .expect("entity vertex") + .contains(EntityPath::Properties), + ); } /// Postgres island followed by Interpreter island that needs properties. @@ -90,33 +130,40 @@ fn data_edge_from_predecessor() { let graph = build_graph(&body, &[TargetId::Postgres, TargetId::Interpreter]); - // Two exec nodes, no data islands. assert_eq!(graph.node_count(), 2); - let control_flow_count = graph - .iter_edges() - .filter(|edge| edge.data == IslandEdge::ControlFlow) - .count(); - assert_eq!(control_flow_count, 1); + let postgres = find_island(&graph, TargetId::Postgres); + let interpreter = find_island(&graph, TargetId::Interpreter); - let data_flow_count = graph - .iter_edges() - .filter(|edge| edge.data == IslandEdge::DataFlow) - .count(); - assert_eq!(data_flow_count, 1); + assert_matches!(graph[postgres].kind(), IslandKind::Exec(_)); + assert_matches!(graph[interpreter].kind(), IslandKind::Exec(_)); - // The Postgres island should provide Properties. - let postgres_island = (0..graph.node_count()) - .map(IslandId::from_usize) - .find(|&island_id| graph[island_id].target() == TargetId::Postgres) - .expect("postgres island exists"); + // Postgres self-provides Properties (it's the origin). assert!( - graph[postgres_island] + graph[postgres] .provides() .as_entity() .expect("entity vertex") .contains(EntityPath::Properties) ); + + // ControlFlow edge: Postgres → Interpreter (CFG successor). + assert!(has_edge( + &graph, + postgres, + interpreter, + IslandEdge::ControlFlow + )); + + // DataFlow edge: Postgres → Interpreter (Postgres provides Properties to Interpreter). + assert!(has_edge( + &graph, + postgres, + interpreter, + IslandEdge::DataFlow + )); + + assert_eq!(graph.edge_count(), 2); } /// Interpreter island needs embedding data but has no Embedding predecessor. @@ -140,22 +187,25 @@ fn fetch_island_for_unsatisfied_requirement() { let graph = build_graph(&body, &[TargetId::Interpreter]); - // One exec node plus one data island for Embedding. assert_eq!(graph.node_count(), 2); - let data_island = (0..graph.node_count()) - .map(IslandId::from_usize) - .find(|&island_id| matches!(graph[island_id].kind(), IslandKind::Data)) - .expect("data island exists"); + let exec = find_island(&graph, TargetId::Interpreter); + let data = find_data_island(&graph, TargetId::Embedding); - assert_eq!(graph[data_island].target(), TargetId::Embedding); + assert_matches!(graph[exec].kind(), IslandKind::Exec(_)); + assert_matches!(graph[data].kind(), IslandKind::Data); + + // Data island provides Vectors. assert!( - graph[data_island] + graph[data] .provides() .as_entity() .expect("entity vertex") .contains(EntityPath::Vectors) ); + + // DataFlow edge: Embedding data island → Interpreter exec island. + assert!(has_edge(&graph, data, exec, IslandEdge::DataFlow)); } /// Diamond CFG: Postgres branches to Interpreter and Embedding, both merge into a @@ -203,20 +253,49 @@ fn diamond_branch_needs_fetch() { ], ); - // The Embedding island (bb2) only runs on one branch and doesn't dominate bb3, - // so a data island for Embedding must be inserted. - let has_embedding_data_island = - (0..graph.node_count()) - .map(IslandId::from_usize) - .any(|island_id| { - matches!(graph[island_id].kind(), IslandKind::Data) - && graph[island_id].target() == TargetId::Embedding - }); + // 4 exec islands + 1 data island for Embedding. + assert_eq!(graph.node_count(), 5); + let data = find_data_island(&graph, TargetId::Embedding); + assert_matches!(graph[data].kind(), IslandKind::Data); + + // The data island provides Vectors. assert!( - has_embedding_data_island, - "expected a data island for Embedding" + graph[data] + .provides() + .as_entity() + .expect("entity vertex") + .contains(EntityPath::Vectors) ); + + // bb3 (second Postgres island) consumes Vectors from the data island. + // Find both Postgres exec islands: bb0 is the entry, bb3 is the merge point. + let postgres_islands: Vec<_> = (0..graph.node_count()) + .map(IslandId::from_usize) + .filter(|&island_id| { + graph[island_id].target() == TargetId::Postgres + && matches!(graph[island_id].kind(), IslandKind::Exec(_)) + }) + .collect(); + assert_eq!(postgres_islands.len(), 2); + + // The merge-point Postgres island (bb3) should have a DataFlow edge from the data island. + let merge_postgres = postgres_islands + .iter() + .find(|&&island_id| has_edge(&graph, data, island_id, IslandEdge::DataFlow)) + .expect("data island should have DataFlow edge to a Postgres island"); + + // bb3 also inherits from bb0 (both Postgres, bb0 dominates bb3). + let entry_postgres = postgres_islands + .iter() + .find(|&&island_id| island_id != *merge_postgres) + .expect("two distinct Postgres islands"); + assert!(has_edge( + &graph, + *entry_postgres, + *merge_postgres, + IslandEdge::Inherits, + )); } /// Inherits edge: when two same-target islands are in a dominator relationship, @@ -255,12 +334,49 @@ fn inherits_edge_same_target_dominator() { ], ); - let inherits_count = graph - .iter_edges() - .filter(|edge| edge.data == IslandEdge::Inherits) - .count(); + assert_eq!(graph.node_count(), 3); + + // Find the two Postgres exec islands. + let postgres_islands: Vec<_> = (0..graph.node_count()) + .map(IslandId::from_usize) + .filter(|&island_id| graph[island_id].target() == TargetId::Postgres) + .collect(); + assert_eq!(postgres_islands.len(), 2); + + // Determine which is the dominator (bb0) and which is the child (bb2). + // The Inherits edge points dominator → child. + let (dominator, child) = if has_edge( + &graph, + postgres_islands[0], + postgres_islands[1], + IslandEdge::Inherits, + ) { + (postgres_islands[0], postgres_islands[1]) + } else { + assert!(has_edge( + &graph, + postgres_islands[1], + postgres_islands[0], + IslandEdge::Inherits, + )); + (postgres_islands[1], postgres_islands[0]) + }; + + // The dominator self-provides Properties (origin backend, first to resolve it). + assert!( + graph[dominator] + .provides() + .as_entity() + .expect("entity vertex") + .contains(EntityPath::Properties), + ); + + // The child also self-provides Properties (it's on the origin backend and requires it). assert!( - inherits_count > 0, - "expected an Inherits edge between same-target dominating islands" + graph[child] + .provides() + .as_entity() + .expect("entity vertex") + .contains(EntityPath::Properties), ); } diff --git a/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs index 1bc8b6b9e69..345a1613de2 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs @@ -4,7 +4,7 @@ use alloc::alloc::Global; use hashql_core::{ - graph::DirectedGraph, heap::Heap, symbol::sym, r#type::environment::Environment, + graph::DirectedGraph as _, heap::Heap, symbol::sym, r#type::environment::Environment, }; use crate::{ From 06777f456c66ae032d0746b45d9c0910beb96b9d Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 3 Mar 2026 21:42:39 +0100 Subject: [PATCH 5/9] feat: checkpoint --- .../src/pass/execution/island/graph/mod.rs | 14 +- .../src/pass/execution/island/graph/tests.rs | 211 +++++++++++++++--- .../src/pass/execution/island/schedule/mod.rs | 44 ++-- .../pass/execution/island/schedule/tests.rs | 140 +++++++++++- 4 files changed, 346 insertions(+), 63 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs index 80a842fe57b..88bd30855d4 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs @@ -239,7 +239,7 @@ impl IslandGraph { } /// Resolves all island requirements and inserts data islands where needed. - pub(crate) fn resolve(&mut self, scratch: S) + fn resolve(&mut self, scratch: S) where S: Allocator + Clone, { @@ -427,10 +427,9 @@ impl<'graph, A: Allocator, S: Allocator + Clone> RequirementResolver<'graph, A, let origin = requirement.origin(); debug_assert!(!origin.is_empty()); - // If this island runs on an origin backend for the path, it self-provides. + // If this island runs on an origin backend for the path, the data is + // locally available and doesn't need to be provided to downstream consumers. if origin.contains(island_target) { - self.merged_provides[island_id].insert(requirement); - self.graph[island_id].provides.insert(requirement); continue; } @@ -476,8 +475,7 @@ impl<'graph, A: Allocator, S: Allocator + Clone> RequirementResolver<'graph, A, /// Returns an existing data island for the given origin backend, or creates one. fn get_or_create_data_island(&mut self, origin: TargetBitSet) -> IslandId { - // Check if *any* of the providers already have an initialised provider, if that's the case - // we create our own. + // Reuse an existing data island if any origin target already has one. if let Some(provider) = origin.iter().find_map(|target| self.data_providers[target]) { return provider; } @@ -486,10 +484,6 @@ impl<'graph, A: Allocator, S: Allocator + Clone> RequirementResolver<'graph, A, // (note that interpreter is technically first, but never a target for data). let target = origin.first_set().unwrap_or_else(|| unreachable!()); - if let Some(provider) = self.data_providers[target] { - return provider; - } - let node = self.graph.inner.add_node(IslandNode { kind: IslandKind::Data, target, diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs index 01ea488fd90..d5d89e3116c 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs @@ -35,9 +35,7 @@ pub(crate) fn make_targets(assignments: &[TargetId]) -> BasicBlockVec, targets: &[TargetId]) -> IslandGraph { let target_vec = make_targets(targets); let islands = IslandPlacement::new().run(body, VertexType::Entity, &target_vec, Global); - let mut graph = IslandGraph::build_in(body, VertexType::Entity, islands, Global, Global); - graph.resolve(Global); - graph + IslandGraph::new_in(body, VertexType::Entity, islands, Global, Global) } fn find_island(graph: &IslandGraph, target: TargetId) -> IslandId { @@ -96,12 +94,13 @@ fn single_island_no_fetch() { assert_matches!(island.kind(), IslandKind::Exec(_)); assert_eq!(island.target(), TargetId::Postgres); - let provides = island.provides(); + // Properties are locally available on Postgres (origin), so provides is empty. assert!( - provides + island + .provides() .as_entity() .expect("entity vertex") - .contains(EntityPath::Properties), + .is_empty() ); } @@ -138,14 +137,11 @@ fn data_edge_from_predecessor() { assert_matches!(graph[postgres].kind(), IslandKind::Exec(_)); assert_matches!(graph[interpreter].kind(), IslandKind::Exec(_)); - // Postgres self-provides Properties (it's the origin). - assert!( - graph[postgres] - .provides() - .as_entity() - .expect("entity vertex") - .contains(EntityPath::Properties) - ); + // Postgres self-provides exactly Properties (it's the origin). + let provides = graph[postgres].provides(); + let provides = provides.as_entity().expect("entity vertex"); + assert_eq!(provides.len(), 1); + assert!(provides.contains(EntityPath::Properties)); // ControlFlow edge: Postgres → Interpreter (CFG successor). assert!(has_edge( @@ -195,17 +191,14 @@ fn fetch_island_for_unsatisfied_requirement() { assert_matches!(graph[exec].kind(), IslandKind::Exec(_)); assert_matches!(graph[data].kind(), IslandKind::Data); - // Data island provides Vectors. - assert!( - graph[data] - .provides() - .as_entity() - .expect("entity vertex") - .contains(EntityPath::Vectors) - ); - // DataFlow edge: Embedding data island → Interpreter exec island. assert!(has_edge(&graph, data, exec, IslandEdge::DataFlow)); + + // Data island provides exactly Vectors. + let provides = graph[data].provides(); + let provides = provides.as_entity().expect("entity vertex"); + assert_eq!(provides.len(), 1); + assert!(provides.contains(EntityPath::Vectors)); } /// Diamond CFG: Postgres branches to Interpreter and Embedding, both merge into a @@ -259,14 +252,11 @@ fn diamond_branch_needs_fetch() { let data = find_data_island(&graph, TargetId::Embedding); assert_matches!(graph[data].kind(), IslandKind::Data); - // The data island provides Vectors. - assert!( - graph[data] - .provides() - .as_entity() - .expect("entity vertex") - .contains(EntityPath::Vectors) - ); + // The data island provides exactly Vectors. + let provides = graph[data].provides(); + let provides = provides.as_entity().expect("entity vertex"); + assert_eq!(provides.len(), 1); + assert!(provides.contains(EntityPath::Vectors)); // bb3 (second Postgres island) consumes Vectors from the data island. // Find both Postgres exec islands: bb0 is the entry, bb3 is the merge point. @@ -362,21 +352,170 @@ fn inherits_edge_same_target_dominator() { (postgres_islands[1], postgres_islands[0]) }; - // The dominator self-provides Properties (origin backend, first to resolve it). + // Both islands access Properties locally (origin backend), so neither needs to + // provide it to anyone. provides is empty on both. assert!( graph[dominator] .provides() .as_entity() .expect("entity vertex") - .contains(EntityPath::Properties), + .is_empty() ); - - // The child also self-provides Properties (it's on the origin backend and requires it). assert!( graph[child] .provides() .as_entity() .expect("entity vertex") - .contains(EntityPath::Properties), + .is_empty() + ); +} + +/// Two Interpreter islands both need Vectors (origin: Embedding). Only one data island +/// should be created, and both consumers get a `DataFlow` edge from it. +#[test] +fn data_island_reused_across_consumers() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + goto bb1(); + }, + bb1() { + goto bb2(); + }, + bb2() { + val = load vecs; + return val; + } + }); + + // bb0=Interpreter, bb1=Postgres (separates the two Interpreter islands), bb2=Interpreter. + let graph = build_graph( + &body, + &[ + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Interpreter, + ], + ); + + // Two Interpreter exec islands + one Postgres exec island + exactly one data island. + let data_islands: Vec<_> = (0..graph.node_count()) + .map(IslandId::from_usize) + .filter(|&island_id| matches!(graph[island_id].kind(), IslandKind::Data)) + .collect(); + assert_eq!(data_islands.len(), 1); + + let data = data_islands[0]; + assert_eq!(graph[data].target(), TargetId::Embedding); + + // Both Interpreter exec islands get a DataFlow edge from the single data island. + let interpreter_islands: Vec<_> = (0..graph.node_count()) + .map(IslandId::from_usize) + .filter(|&island_id| { + matches!(graph[island_id].kind(), IslandKind::Exec(_)) + && graph[island_id].target() == TargetId::Interpreter + }) + .collect(); + assert_eq!(interpreter_islands.len(), 2); + + for &exec in &interpreter_islands { + assert!( + has_edge(&graph, data, exec, IslandEdge::DataFlow), + "expected DataFlow edge from data island to exec island {exec:?}", + ); + } +} + +/// Entry island on a non-origin backend: the dominator walk reaches the root without +/// finding an origin, so a data island is created. +#[test] +fn entry_island_needs_fetch() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj props = vertex.properties: ?; + + bb0() { + val = load props; + return val; + } + }); + + // Entry island is Interpreter, but Properties origin is Postgres. + // Dominator walk from the entry hits the root (itself), so no dominator found. + let graph = build_graph(&body, &[TargetId::Interpreter]); + + assert_eq!(graph.node_count(), 2); + + let exec = find_island(&graph, TargetId::Interpreter); + let data = find_data_island(&graph, TargetId::Postgres); + + assert!(has_edge(&graph, data, exec, IslandEdge::DataFlow)); + + let provides = graph[data].provides(); + let provides = provides.as_entity().expect("entity vertex"); + assert_eq!(provides.len(), 1); + assert!(provides.contains(EntityPath::Properties)); +} + +/// Control flow edge dedup: two blocks in the same island both have a successor in +/// another island, but only one `ControlFlow` edge should be created. +#[test] +fn control_flow_edge_dedup() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + val: ?, cond: Bool; + + bb0() { + cond = load true; + goto bb1(); + }, + bb1() { + if cond then bb2() else bb2(); + }, + bb2() { + val = load true; + return val; + } + }); + + // bb0 and bb1 are both Interpreter (same island). bb2 is Postgres. + // bb1→bb2 appears twice (both arms of the branch), but the BitMatrix dedup + // ensures only one ControlFlow edge from the Interpreter island to Postgres. + let graph = build_graph( + &body, + &[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Postgres, + ], ); + + assert_eq!(graph.node_count(), 2); + assert_eq!(graph.edge_count(), 1); + + let interpreter = find_island(&graph, TargetId::Interpreter); + let postgres = find_island(&graph, TargetId::Postgres); + + assert!(has_edge( + &graph, + interpreter, + postgres, + IslandEdge::ControlFlow + )); } diff --git a/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs index 5b7d4b23a29..26099f9396d 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs @@ -1,7 +1,8 @@ #[cfg(test)] mod tests; -use core::alloc::Allocator; +use alloc::{alloc::Global, collections::VecDeque}; +use core::{alloc::Allocator, cmp}; use hashql_core::graph::{DirectedGraph as _, Predecessors as _, Successors as _}; @@ -55,16 +56,35 @@ impl IslandSchedule { pub fn iter(&self) -> impl ExactSizeIterator { self.entries.iter() } + + #[inline] + pub fn level_count(&self) -> usize { + self.entries + .last() + .map_or(0, |entry| entry.level as usize + 1) + } + + #[inline] + pub fn levels(&self) -> impl Iterator { + self.entries.chunk_by(|lhs, rhs| lhs.level == rhs.level) + } } -impl IslandGraph { +impl IslandGraph { + #[must_use] + pub fn schedule(&self) -> IslandSchedule { + self.schedule_in(Global, Global) + } +} + +impl IslandGraph { /// Computes a topological schedule with level assignment for parallelism. /// /// Each island is assigned the lowest level such that all its predecessors are at /// strictly lower levels. Islands at the same level have no direct dependencies and /// can execute concurrently. #[expect(clippy::cast_possible_truncation)] - pub fn schedule(&self, scratch: S) -> IslandSchedule + pub fn schedule_in(&self, scratch: S, alloc: A) -> IslandSchedule where S: Allocator + Clone, { @@ -77,34 +97,32 @@ impl IslandGraph { in_degree[island_id] = self.predecessors(island_id).count() as u32; } - let mut queue: Vec = Vec::new_in(scratch.clone()); + let mut queue: VecDeque = VecDeque::new_in(scratch); for (island_id, _) in self.iter_nodes() { if in_degree[island_id] == 0 { - queue.push(island_id); + queue.push_back(island_id); } } - let mut entries = Vec::with_capacity_in(node_count, scratch); - let mut head = 0; - - while head < queue.len() { - let island_id = queue[head]; - head += 1; + let mut entries = Vec::with_capacity_in(node_count, alloc); + while let Some(island_id) = queue.pop_front() { entries.push(ScheduledIsland { island: island_id, level: levels[island_id], }); for successor in self.successors(island_id) { - levels[successor] = levels[successor].max(levels[island_id] + 1); + levels[successor] = cmp::max(levels[successor], levels[island_id] + 1); in_degree[successor] -= 1; + if in_degree[successor] == 0 { - queue.push(successor); + queue.push_back(successor); } } } + entries.sort_by_key(|entry| entry.level); IslandSchedule { entries } } } diff --git a/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs index 345a1613de2..53270208cde 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs @@ -1,8 +1,6 @@ //! Tests for island schedule computation. #![expect(clippy::min_ident_chars)] -use alloc::alloc::Global; - use hashql_core::{ graph::DirectedGraph as _, heap::Heap, symbol::sym, r#type::environment::Environment, }; @@ -35,7 +33,7 @@ fn data_island_before_consumer() { }); let graph = build_graph(&body, &[TargetId::Interpreter]); - let schedule = graph.schedule(Global); + let schedule = graph.schedule(); let entries = schedule.entries(); assert!(entries.len() >= 2); @@ -99,6 +97,140 @@ fn covers_all_nodes() { ], ); - let schedule = graph.schedule(Global); + let schedule = graph.schedule(); assert_eq!(schedule.entries().len(), graph.node_count()); } + +/// Two levels: data island at level 0, exec island at level 1. +#[test] +fn level_count_with_data_dependency() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + return val; + } + }); + + let graph = build_graph(&body, &[TargetId::Interpreter]); + let schedule = graph.schedule(); + + assert_eq!(schedule.level_count(), 2); + assert_eq!(schedule.levels().count(), 2); +} + +/// Each level slice contains islands with the same level, and levels are ascending. +#[test] +fn levels_are_contiguous_and_ascending() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + val: ?, cond: Bool; + @proj props = vertex.properties: ?, + enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + val = load props; + goto bb3(); + }, + bb2() { + val = load vecs; + goto bb3(); + }, + bb3() { + val = load vecs; + return val; + } + }); + + let graph = build_graph( + &body, + &[ + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Embedding, + TargetId::Postgres, + ], + ); + + let schedule = graph.schedule(); + let mut prev_level = None; + let mut total_entries = 0; + + for level_slice in schedule.levels() { + assert!(!level_slice.is_empty()); + + let expected_level = level_slice[0].level; + for entry in level_slice { + assert_eq!(entry.level, expected_level, "mixed levels within a slice"); + } + + if let Some(prev) = prev_level { + assert!(expected_level > prev, "levels not strictly ascending"); + } + prev_level = Some(expected_level); + total_entries += level_slice.len(); + } + + assert_eq!( + total_entries, + schedule.len(), + "levels must cover all entries" + ); +} + +/// Data islands appear in an earlier level slice than their exec consumers. +#[test] +fn levels_order_data_before_exec() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj enc = vertex.encodings: ?, + vecs = enc.vectors: ?; + + bb0() { + val = load vecs; + return val; + } + }); + + let graph = build_graph(&body, &[TargetId::Interpreter]); + let schedule = graph.schedule(); + + let level_slices: Vec<_> = schedule.levels().collect(); + assert_eq!(level_slices.len(), 2); + + for entry in level_slices[0] { + assert_eq!(entry.level, 0); + assert!( + matches!(graph[entry.island].kind(), IslandKind::Data), + "level 0 should contain the data island" + ); + } + + for entry in level_slices[1] { + assert_eq!(entry.level, 1); + assert!( + matches!(graph[entry.island].kind(), IslandKind::Exec(_)), + "level 1 should contain the exec island" + ); + } +} From f18aa60afbe9ac8af8684b87efd18514484dcb2f Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 3 Mar 2026 21:44:50 +0100 Subject: [PATCH 6/9] feat: checkpoint --- libs/@local/hashql/mir/src/pass/execution/island/tests.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs index d24ef1b7f8e..542013c8cd6 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs @@ -343,6 +343,7 @@ fn island_joins_traversal_paths() { let island = &islands[IslandId::new(0)]; let traversal_paths = island.traversals(); let joined = traversal_paths.as_entity().expect("entity vertex"); + assert_eq!(joined.len(), 2); assert!(joined.contains(EntityPath::Properties)); assert!(joined.contains(EntityPath::ProvenanceEdition)); } From 6e0a41d9968a74220abf9b7ec724c23e2a6d5525 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 3 Mar 2026 21:45:34 +0100 Subject: [PATCH 7/9] feat: checkpoint --- .../pass/execution/island/schedule/tests.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs index 53270208cde..5439d7da89a 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/schedule/tests.rs @@ -36,23 +36,19 @@ fn data_island_before_consumer() { let schedule = graph.schedule(); let entries = schedule.entries(); - assert!(entries.len() >= 2); + assert_eq!(entries.len(), 2); let exec_entry = entries .iter() - .find(|entry| matches!(graph[entry.island].kind(), IslandKind::Exec(_))); + .find(|entry| matches!(graph[entry.island].kind(), IslandKind::Exec(_))) + .expect("should have an exec island"); let data_entry = entries .iter() - .find(|entry| matches!(graph[entry.island].kind(), IslandKind::Data)); + .find(|entry| matches!(graph[entry.island].kind(), IslandKind::Data)) + .expect("should have a data island"); - if let (Some(exec_entry), Some(data_entry)) = (exec_entry, data_entry) { - assert!( - exec_entry.level > data_entry.level, - "exec island (level {}) should be after data island (level {})", - exec_entry.level, - data_entry.level - ); - } + assert_eq!(data_entry.level, 0); + assert_eq!(exec_entry.level, 1); } /// Every island in the graph appears exactly once in the schedule. From e5a7c7e0ac03a429ce7e2b5fe8f289c6537f8c23 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 8 Mar 2026 17:14:53 +0100 Subject: [PATCH 8/9] fix: suggestions from code review --- .../src/pass/execution/island/graph/mod.rs | 19 ++++-- .../src/pass/execution/island/graph/tests.rs | 58 +++++++++++++++++++ 2 files changed, 72 insertions(+), 5 deletions(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs index 88bd30855d4..7b0dd9d82f8 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs @@ -466,11 +466,20 @@ impl<'graph, A: Allocator, S: Allocator + Clone> RequirementResolver<'graph, A, self.graph[provider].provides.insert(path); } - self.graph.inner.add_edge( - NodeId::from_u32(provider.as_u32()), - NodeId::from_u32(consumer.as_u32()), - IslandEdge::DataFlow, - ); + let provider_node = NodeId::from_u32(provider.as_u32()); + let consumer_node = NodeId::from_u32(consumer.as_u32()); + + let exists = self + .graph + .inner + .outgoing_edges(provider_node) + .any(|edge| edge.target() == consumer_node && edge.data == IslandEdge::DataFlow); + + if !exists { + self.graph + .inner + .add_edge(provider_node, consumer_node, IslandEdge::DataFlow); + } } /// Returns an existing data island for the given origin backend, or creates one. diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs index d5d89e3116c..4c3260f19fb 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs @@ -469,6 +469,64 @@ fn entry_island_needs_fetch() { assert!(provides.contains(EntityPath::Properties)); } +/// DataFlow edge dedup: an Interpreter island accesses two paths that both originate +/// from Postgres (`Properties` and `EntityUuid`). Both resolve to the same Postgres +/// dominator, but only one `DataFlow` edge should exist between them. +#[test] +fn data_flow_edge_dedup() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val1: ?, val2: ?; + @proj props = vertex.properties: ?, + meta = vertex.metadata: ?, + rec = meta.record_id: ?, + eid = rec.entity_id: ?, + uuid = eid.entity_uuid: ?; + + bb0() { + val1 = load props; + goto bb1(); + }, + bb1() { + val1 = load props; + val2 = load uuid; + return val1; + } + }); + + let graph = build_graph(&body, &[TargetId::Postgres, TargetId::Interpreter]); + assert_eq!(graph.node_count(), 2); + + let postgres = find_island(&graph, TargetId::Postgres); + let interpreter = find_island(&graph, TargetId::Interpreter); + + // Postgres provides both paths to the Interpreter island. + let provides = graph[postgres].provides(); + let provides = provides.as_entity().expect("entity vertex"); + assert!(provides.contains(EntityPath::Properties)); + assert!(provides.contains(EntityPath::EntityUuid)); + assert_eq!(provides.len(), 2); + + // Exactly two edges: one ControlFlow and one DataFlow, both Postgres -> Interpreter. + // Without dedup, the two paths would produce two DataFlow edges to the same consumer. + assert_eq!(graph.edge_count(), 2); + assert!(has_edge( + &graph, + postgres, + interpreter, + IslandEdge::ControlFlow + )); + assert!(has_edge( + &graph, + postgres, + interpreter, + IslandEdge::DataFlow + )); +} + /// Control flow edge dedup: two blocks in the same island both have a successor in /// another island, but only one `ControlFlow` edge should be created. #[test] From 572197cb751bd4cea59618b505154ffeb5c55cfa Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Sun, 8 Mar 2026 17:17:09 +0100 Subject: [PATCH 9/9] fix: lint --- libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs index 4c3260f19fb..800a9ff063c 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs @@ -469,7 +469,7 @@ fn entry_island_needs_fetch() { assert!(provides.contains(EntityPath::Properties)); } -/// DataFlow edge dedup: an Interpreter island accesses two paths that both originate +/// `DataFlow` edge dedup: an Interpreter island accesses two paths that both originate /// from Postgres (`Properties` and `EntityUuid`). Both resolve to the same Postgres /// dominator, but only one `DataFlow` edge should exist between them. #[test]