Skip to main content

sqry_db/queries/
scc.rs

1//! SCC (Strongly Connected Components) derived query.
2//!
3//! Wraps SCC computation as a [`DerivedQuery`] with `TRACKS_EDGE_REVISION = true`.
4//! The cached result is automatically invalidated when any file's edges change.
5//!
6//! The actual Tarjan implementation lives in `sqry-core::graph::unified::analysis::scc`.
7//! This wrapper provides the caching and invalidation layer. Phase 3C migration
8//! (DB14) will wire the existing `find_all_cycles_graph` and `find_cycle_containing_node`
9//! to use this cached query instead of computing SCC from scratch on every call.
10
11use std::collections::{HashMap, HashSet};
12use std::sync::Arc;
13
14use sqry_core::graph::unified::concurrent::GraphSnapshot;
15use sqry_core::graph::unified::edge::kind::EdgeKind;
16use sqry_core::graph::unified::node::id::NodeId;
17
18use crate::QueryDb;
19use crate::dependency::record_file_dep;
20use crate::query::DerivedQuery;
21
22// PN3 cold-start persistence: CachedSccData is serialized via postcard at
23// cache-insert time. HashMap<NodeId, u32> is serde-able because NodeId derives
24// Serialize/Deserialize. EdgeKind also derives Serialize/Deserialize.
25
26/// Type alias for the key used by [`SccQuery`] (an [`EdgeKind`] discriminant).
27/// `EdgeKind` already derives `Serialize`/`Deserialize` from sqry-core.
28pub type SccKey = EdgeKind;
29
30/// Type alias for the value produced by [`SccQuery`].
31/// `Arc` is serde-transparent when the workspace `serde` `rc` feature is enabled.
32pub type SccValue = std::sync::Arc<CachedSccData>;
33
34/// Cached SCC result mapping each node to its SCC component index.
35///
36/// Component indices are arbitrary (0-based). Two nodes in the same SCC
37/// have the same component index.
38// HashMap<NodeId, u32>: serde-able because NodeId derives Serialize/Deserialize.
39#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
40pub struct CachedSccData {
41    /// NodeId → SCC component index.
42    pub node_to_component: HashMap<NodeId, u32>,
43    /// SCC component index → member node IDs.
44    pub components: Vec<Vec<NodeId>>,
45    /// Edge kind this SCC was computed for.
46    pub edge_kind: EdgeKind,
47}
48
49impl CachedSccData {
50    /// Returns the SCC component index for a node, or `None` if not in any SCC.
51    #[must_use]
52    pub fn component_of(&self, node: NodeId) -> Option<u32> {
53        self.node_to_component.get(&node).copied()
54    }
55
56    /// Returns true if the given node is part of a non-trivial cycle (SCC size > 1).
57    #[must_use]
58    pub fn is_in_cycle(&self, node: NodeId) -> bool {
59        self.component_of(node)
60            .map(|idx| {
61                self.components
62                    .get(idx as usize)
63                    .is_some_and(|c| c.len() > 1)
64            })
65            .unwrap_or(false)
66    }
67
68    /// Returns the total number of SCC components.
69    #[must_use]
70    pub fn component_count(&self) -> usize {
71        self.components.len()
72    }
73}
74
75/// Computes strongly connected components for a given edge kind.
76///
77/// # Invalidation
78///
79/// `TRACKS_EDGE_REVISION = true`: invalidated when any edge changes.
80pub struct SccQuery;
81
82impl DerivedQuery for SccQuery {
83    type Key = EdgeKind;
84    type Value = Arc<CachedSccData>;
85    const QUERY_TYPE_ID: u32 = crate::queries::type_ids::SCC;
86    const TRACKS_EDGE_REVISION: bool = true;
87
88    fn execute(key: &EdgeKind, _db: &QueryDb, snapshot: &GraphSnapshot) -> Arc<CachedSccData> {
89        // Record all files as deps (global topology query)
90        for (fid, _seg) in snapshot.file_segments().iter() {
91            record_file_dep(fid);
92        }
93
94        // Iterative Tarjan SCC via BidirectionalEdgeStore::edges_from.
95        // This works on both CSR and delta buffer edges transparently.
96        let mut index_counter = 0u32;
97        let mut stack: Vec<NodeId> = Vec::new();
98        let mut on_stack: HashSet<NodeId> = HashSet::new();
99        let mut indices: HashMap<NodeId, u32> = HashMap::new();
100        let mut lowlinks: HashMap<NodeId, u32> = HashMap::new();
101        let mut components: Vec<Vec<NodeId>> = Vec::new();
102
103        // Collect all nodes.
104        // Gate 0d iter-2 fix: skip unified losers from SCC
105        // computation. They have no outgoing edges post-finalize
106        // (remapped to the winner via `NodeRemapTable`) so they would
107        // each be a trivial 1-element SCC, but that's still a
108        // publish-visible leak of loser IDs. See
109        // `NodeEntry::is_unified_loser`.
110        let all_nodes: Vec<NodeId> = snapshot
111            .nodes()
112            .iter()
113            .filter(|(_nid, entry)| !entry.is_unified_loser())
114            .map(|(nid, _)| nid)
115            .collect();
116
117        // Iterative Tarjan using an explicit work stack
118        for &start in &all_nodes {
119            if indices.contains_key(&start) {
120                continue;
121            }
122
123            // DFS work stack: (node, neighbor_iterator_position)
124            let mut work: Vec<(NodeId, usize)> = vec![(start, 0)];
125            indices.insert(start, index_counter);
126            lowlinks.insert(start, index_counter);
127            index_counter += 1;
128            stack.push(start);
129            on_stack.insert(start);
130
131            while let Some((node, pos)) = work.last_mut() {
132                let neighbors: Vec<NodeId> = snapshot
133                    .edges()
134                    .edges_from(*node)
135                    .iter()
136                    .filter(|e| std::mem::discriminant(&e.kind) == std::mem::discriminant(key))
137                    .map(|e| e.target)
138                    .collect();
139
140                if *pos < neighbors.len() {
141                    let neighbor = neighbors[*pos];
142                    *pos += 1;
143
144                    if let std::collections::hash_map::Entry::Vacant(e) = indices.entry(neighbor) {
145                        e.insert(index_counter);
146                        lowlinks.insert(neighbor, index_counter);
147                        index_counter += 1;
148                        stack.push(neighbor);
149                        on_stack.insert(neighbor);
150                        work.push((neighbor, 0));
151                    } else if on_stack.contains(&neighbor) {
152                        let node_copy = *node;
153                        let neighbor_idx = indices[&neighbor];
154                        let current_low = lowlinks[&node_copy];
155                        if neighbor_idx < current_low {
156                            lowlinks.insert(node_copy, neighbor_idx);
157                        }
158                    }
159                } else {
160                    // All neighbors processed; check if root of SCC
161                    let node_copy = *node;
162                    let node_idx = indices[&node_copy];
163                    let node_low = lowlinks[&node_copy];
164
165                    if node_low == node_idx {
166                        // Root of SCC — pop stack to form component
167                        let mut component = Vec::new();
168                        while let Some(w) = stack.pop() {
169                            on_stack.remove(&w);
170                            component.push(w);
171                            if w == node_copy {
172                                break;
173                            }
174                        }
175                        components.push(component);
176                    }
177
178                    // Propagate lowlink to parent
179                    work.pop();
180                    if let Some((parent, _)) = work.last() {
181                        let parent_copy = *parent;
182                        let parent_low = lowlinks[&parent_copy];
183                        if node_low < parent_low {
184                            lowlinks.insert(parent_copy, node_low);
185                        }
186                    }
187                }
188            }
189        }
190
191        // Build the node-to-component map
192        let mut node_to_component = HashMap::with_capacity(all_nodes.len());
193        for (idx, component) in components.iter().enumerate() {
194            for &nid in component {
195                node_to_component.insert(nid, idx as u32);
196            }
197        }
198
199        Arc::new(CachedSccData {
200            node_to_component,
201            components,
202            edge_kind: key.clone(),
203        })
204    }
205}
206
207// ============================================================================
208// PN3 serde roundtrip tests
209// ============================================================================
210
211#[cfg(test)]
212mod serde_roundtrip {
213    use super::*;
214    use postcard::{from_bytes, to_allocvec};
215
216    #[test]
217    fn cached_scc_data_roundtrip() {
218        let mut node_to_component = HashMap::new();
219        node_to_component.insert(NodeId::new(1, 1), 0u32);
220        node_to_component.insert(NodeId::new(2, 1), 0u32);
221        node_to_component.insert(NodeId::new(3, 1), 1u32);
222        let original = CachedSccData {
223            node_to_component,
224            components: vec![
225                vec![NodeId::new(1, 1), NodeId::new(2, 1)],
226                vec![NodeId::new(3, 1)],
227            ],
228            edge_kind: EdgeKind::Calls {
229                argument_count: 0,
230                is_async: false,
231            },
232        };
233        let bytes = to_allocvec(&original).expect("serialize failed");
234        let decoded: CachedSccData = from_bytes(&bytes).expect("deserialize failed");
235        // Verify components round-trip exactly (ordered).
236        assert_eq!(decoded.components, original.components);
237        // Verify edge_kind round-trips exactly.
238        assert_eq!(decoded.edge_kind, original.edge_kind);
239        // Verify node_to_component round-trips (check each entry).
240        for (node, comp) in &original.node_to_component {
241            assert_eq!(decoded.node_to_component.get(node), Some(comp));
242        }
243    }
244
245    #[test]
246    fn scc_key_roundtrip() {
247        // SccKey = EdgeKind — already has serde; roundtrip confirms usage.
248        let original: SccKey = EdgeKind::Imports {
249            alias: None,
250            is_wildcard: false,
251        };
252        let bytes = to_allocvec(&original).expect("serialize failed");
253        let decoded: SccKey = from_bytes(&bytes).expect("deserialize failed");
254        assert_eq!(decoded, original);
255    }
256
257    #[test]
258    fn scc_value_roundtrip() {
259        // SccValue = Arc<CachedSccData>
260        let data = CachedSccData {
261            node_to_component: HashMap::new(),
262            components: vec![],
263            edge_kind: EdgeKind::References,
264        };
265        let original: SccValue = Arc::new(data);
266        let bytes = to_allocvec(&original).expect("serialize failed");
267        let decoded: SccValue = from_bytes(&bytes).expect("deserialize failed");
268        assert_eq!(decoded.components, original.components);
269        assert_eq!(decoded.edge_kind, original.edge_kind);
270    }
271}