Skip to main content

pattern_core/graph/transform/
para.rs

1//! para_graph and para_graph_fixed: shape-aware structural fold over a graph view.
2//!
3//! Ported from `Pattern.Graph.Transform.paraGraph` and `paraGraphFixed`
4//! in the Haskell reference implementation.
5//!
6//! ## Processing order: `topo_shape_sort`
7//!
8//! Elements are sorted in two passes before the fold begins:
9//!
10//! **Pass 1 — Inter-bucket ordering** (fixed shape class priority):
11//! ```text
12//! GNode < GRelationship < GWalk < GAnnotation < GOther
13//! ```
14//! This ensures cross-class containment dependencies are satisfied: nodes (atomic)
15//! before relationships (contain nodes), relationships before walks, walks before
16//! annotations (can reference any shape below them), and annotations before other
17//! (`GOther` is unconstrained).
18//!
19//! **Pass 2 — Within-bucket ordering** (Kahn's algorithm):
20//! Applied to the `GAnnotation` and `GOther` buckets only. Direct sub-elements
21//! (`Pattern::elements`) that belong to the same bucket are treated as dependencies
22//! — they must appear before the element that contains them.
23//!
24//! `GNode`, `GRelationship`, and `GWalk` require no within-bucket sort: by the
25//! definition of `classify_by_shape`, their sub-elements always belong to a
26//! lower-priority bucket.
27//!
28//! **Cycle handling**: If a dependency cycle is detected within a bucket (e.g.,
29//! annotation A references annotation B which references A), the cycle members
30//! are appended after all non-cycle elements in their encountered order. No error
31//! is raised. Cycle members receive `sub_results = &[]` for the other cycle
32//! members (soft-miss). Callers should treat `sub_results` as best-effort.
33
34use std::collections::{HashMap, VecDeque};
35use std::hash::Hash;
36
37use crate::graph::graph_classifier::{GraphClass, GraphValue};
38use crate::graph::graph_query::GraphQuery;
39use crate::graph::graph_view::GraphView;
40use crate::pattern::Pattern;
41
42// ============================================================================
43// topo_shape_sort
44// ============================================================================
45
46/// Sort element indices into bottom-up containment order for `para_graph`.
47///
48/// Returns indices into `elems` in two-pass order:
49/// 1. Partitioned by shape class: GNode, GRelationship, GWalk, GAnnotation, GOther.
50/// 2. Within GAnnotation and GOther buckets: topologically sorted by in-bucket
51///    dependencies via Kahn's algorithm; cycle members appended last.
52///
53/// Mirrors `topoShapeSort` in the Haskell reference implementation.
54fn topo_shape_sort<Extra, V: GraphValue>(elems: &[(GraphClass<Extra>, Pattern<V>)]) -> Vec<usize>
55where
56    V::Id: Eq + Hash + Clone,
57{
58    let mut buckets: [Vec<usize>; 5] = [Vec::new(), Vec::new(), Vec::new(), Vec::new(), Vec::new()];
59
60    for (i, (cls, _)) in elems.iter().enumerate() {
61        let rank = match cls {
62            GraphClass::GNode => 0,
63            GraphClass::GRelationship => 1,
64            GraphClass::GWalk => 2,
65            GraphClass::GAnnotation => 3,
66            GraphClass::GOther(_) => 4,
67        };
68        buckets[rank].push(i);
69    }
70
71    let mut result = Vec::with_capacity(elems.len());
72
73    for (rank, bucket) in buckets.iter().enumerate() {
74        if rank >= 3 {
75            // GAnnotation (3) and GOther (4): within-bucket topological sort
76            result.extend(within_bucket_topo_sort(elems, bucket));
77        } else {
78            // GNode, GRelationship, GWalk: no within-bucket deps possible
79            result.extend_from_slice(bucket);
80        }
81    }
82
83    result
84}
85
86// ============================================================================
87// within_bucket_topo_sort
88// ============================================================================
89
90/// Kahn's topological sort within a single bucket.
91///
92/// `bucket` is a slice of indices into `elems`. For each element `p` in the
93/// bucket, any sub-element (`p.elements`) whose identity also belongs to the
94/// bucket is treated as a dependency: it must be processed before `p`.
95///
96/// Returns bucket indices sorted so dependencies come first. Cycle members
97/// are appended after all non-cycle elements in their encountered order.
98fn within_bucket_topo_sort<Extra, V: GraphValue>(
99    elems: &[(GraphClass<Extra>, Pattern<V>)],
100    bucket: &[usize],
101) -> Vec<usize>
102where
103    V::Id: Eq + Hash + Clone,
104{
105    if bucket.is_empty() {
106        return Vec::new();
107    }
108
109    let n = bucket.len();
110
111    // Map element identity → position within bucket (0..n)
112    let id_to_bucket_pos: HashMap<V::Id, usize> = bucket
113        .iter()
114        .enumerate()
115        .map(|(pos, &idx)| (elems[idx].1.value.identify().clone(), pos))
116        .collect();
117
118    // in_degree[pos] = number of in-bucket deps for element at bucket[pos]
119    // dependents[pos] = bucket positions whose element depends on element at pos
120    let mut in_degree = vec![0usize; n];
121    let mut dependents: Vec<Vec<usize>> = vec![Vec::new(); n];
122
123    for pos in 0..n {
124        let (_, p) = &elems[bucket[pos]];
125        for e in &p.elements {
126            let eid = e.value.identify();
127            if let Some(&dep_pos) = id_to_bucket_pos.get(eid) {
128                // p (at pos) depends on e (at dep_pos)
129                in_degree[pos] += 1;
130                dependents[dep_pos].push(pos);
131            }
132        }
133    }
134
135    // Kahn's: initialize queue with zero-in-degree positions (encountered order)
136    let mut queue: VecDeque<usize> = (0..n).filter(|&pos| in_degree[pos] == 0).collect();
137    let mut sorted_positions: Vec<usize> = Vec::with_capacity(n);
138
139    while let Some(pos) = queue.pop_front() {
140        sorted_positions.push(pos);
141        for &succ_pos in &dependents[pos] {
142            in_degree[succ_pos] -= 1;
143            if in_degree[succ_pos] == 0 {
144                queue.push_back(succ_pos);
145            }
146        }
147    }
148
149    // Append cycle members in encountered order
150    let mut in_sorted = vec![false; n];
151    for &pos in &sorted_positions {
152        in_sorted[pos] = true;
153    }
154    for (pos, &included) in in_sorted.iter().enumerate() {
155        if !included {
156            sorted_positions.push(pos);
157        }
158    }
159
160    // Convert bucket positions back to elems indices
161    sorted_positions.iter().map(|&pos| bucket[pos]).collect()
162}
163
164// ============================================================================
165// para_graph_with_seed (private helper)
166// ============================================================================
167
168/// Run one paramorphism round over `view`, seeding sub-element results from `seed`.
169///
170/// Processes all `view_elements` in `topo_shape_sort` order. For each element `p`,
171/// looks up results for its direct syntactic children (`p.elements`) in the
172/// accumulator (which starts as `seed` and grows within the round — Gauss-Seidel
173/// style).
174///
175/// Mirrors `paraGraphWithSeed` in the Haskell reference.
176fn para_graph_with_seed<Extra, V: GraphValue, R: Clone>(
177    f: &impl Fn(&GraphQuery<V>, &Pattern<V>, &[R]) -> R,
178    view: &GraphView<Extra, V>,
179    seed: HashMap<V::Id, R>,
180) -> HashMap<V::Id, R>
181where
182    V::Id: Eq + Hash + Clone,
183{
184    let query = &view.view_query;
185    let order = topo_shape_sort(&view.view_elements);
186    let mut acc = seed;
187
188    for i in order {
189        let (_, p) = &view.view_elements[i];
190        let sub_results: Vec<R> = p
191            .elements
192            .iter()
193            .filter_map(|e| acc.get(e.value.identify()).cloned())
194            .collect();
195        let r = f(query, p, &sub_results);
196        acc.insert(p.value.identify().clone(), r);
197    }
198
199    acc
200}
201
202// ============================================================================
203// para_graph
204// ============================================================================
205
206/// Single-pass shape-aware structural fold over a `GraphView`.
207///
208/// Processes ALL elements (nodes, relationships, walks, annotations, other) in
209/// `topo_shape_sort` order so that each element receives already-computed results
210/// for its direct syntactic children (`Pattern::elements`).
211///
212/// The `sub_results` slice passed to `f` is best-effort: it contains one result
213/// per direct sub-element that has already been processed. For cycle-free graphs
214/// this is always complete. For cycle members within the `GAnnotation` or `GOther`
215/// buckets, other members of the cycle will be absent from `sub_results`. Handle
216/// `sub_results = &[]` as a valid, non-error input.
217///
218/// Returns a map from element identity to computed result.
219///
220/// Mirrors `paraGraph` in the Haskell reference implementation.
221#[inline]
222pub fn para_graph<Extra, V: GraphValue, R: Clone>(
223    f: impl Fn(&GraphQuery<V>, &Pattern<V>, &[R]) -> R,
224    view: &GraphView<Extra, V>,
225) -> HashMap<V::Id, R>
226where
227    V::Id: Eq + Hash + Clone,
228{
229    para_graph_with_seed(&f, view, HashMap::new())
230}
231
232// ============================================================================
233// para_graph_fixed
234// ============================================================================
235
236/// Fixed-point shape-aware fold for iterative convergence.
237///
238/// Iterates rounds of `para_graph` until `converged(old, new)` returns `true`
239/// for all elements, then returns the final map. Each element starts with
240/// `init.clone()`.
241///
242/// Each round uses the same `topo_shape_sort` ordering (the `GraphView` is
243/// immutable). Within each round, already-processed elements' results are
244/// available to later elements (Gauss-Seidel style).
245///
246/// Mirrors `paraGraphFixed` in the Haskell reference implementation.
247#[inline]
248pub fn para_graph_fixed<Extra, V: GraphValue, R: Clone>(
249    converged: impl Fn(&R, &R) -> bool,
250    f: impl Fn(&GraphQuery<V>, &Pattern<V>, &[R]) -> R,
251    init: R,
252    view: &GraphView<Extra, V>,
253) -> HashMap<V::Id, R>
254where
255    V::Id: Eq + Hash + Clone,
256{
257    // Initialize all elements with init
258    let mut current: HashMap<V::Id, R> = view
259        .view_elements
260        .iter()
261        .map(|(_, p)| (p.value.identify().clone(), init.clone()))
262        .collect();
263
264    loop {
265        let next = para_graph_with_seed(&f, view, current.clone());
266
267        // Converged when conv(prev, new) holds for every key in next
268        let all_converged = next
269            .iter()
270            .all(|(k, new_r)| current.get(k).is_some_and(|old_r| converged(old_r, new_r)));
271
272        current = next;
273
274        if all_converged {
275            break;
276        }
277    }
278
279    current
280}