Skip to main content

sqry_db/queries/
cycles.rs

1//! Cycle detection derived queries, built on top of [`super::SccQuery`].
2//!
3//! Migrates `sqry-core::query::executor::graph_cycles::find_all_cycles_graph`
4//! and `is_node_in_cycle` into cache-aware queries. The underlying Tarjan SCC
5//! implementation continues to live in [`super::scc`]; the queries in this
6//! module only apply cycle filtering (self-loop handling, `min_depth` /
7//! `max_depth` bounds, `max_results` cap) on top of the cached SCC data.
8//!
9//! # Why not call back into `graph_cycles`?
10//!
11//! The `sqry-core` functions build their own adjacency + Tarjan state from
12//! scratch on every call. With DB14, every analysis path that wants cycle
13//! detection should share the [`super::SccQuery`] cache so one Tarjan run
14//! per edge-revision bump is enough. These queries expose the filtered
15//! `Vec<Vec<NodeId>>` / boolean answers the sqry-core functions currently
16//! produce.
17//!
18//! # Self-loop semantics
19//!
20//! [`sqry_core::query::CircularConfig::should_include_self_loops`] gates
21//! whether single-node SCCs containing a self-edge of the relevant kind are
22//! reported. A node participates in a self-loop iff it has an edge of the
23//! relevant kind back to itself. The queries consult the snapshot's edge
24//! store directly — `SccQuery` alone cannot distinguish a size-1 SCC with a
25//! self-loop from an isolated node.
26
27use std::sync::Arc;
28
29use sqry_core::graph::unified::concurrent::GraphSnapshot;
30use sqry_core::graph::unified::edge::kind::EdgeKind;
31use sqry_core::graph::unified::node::id::NodeId;
32use sqry_core::query::CircularType;
33
34use crate::QueryDb;
35use crate::dependency::record_file_dep;
36use crate::query::DerivedQuery;
37
38use super::SccQuery;
39
40// ============================================================================
41// Key + result types
42// ============================================================================
43
44// PN3 cold-start persistence: CycleBounds, CyclesKey, IsInCycleKey are
45// serialized via postcard at cache-insert time. CircularType gains
46// Serialize/Deserialize in sqry-core/src/query/cycles_config.rs (PN3 SERDE_DERIVES).
47
48/// Type alias for the value produced by [`CyclesQuery`].
49/// Arc is serde-transparent when the workspace `serde` `rc` feature is enabled.
50pub type CyclesValue = std::sync::Arc<Vec<Vec<sqry_core::graph::unified::node::id::NodeId>>>;
51
52/// Type alias for the value produced by [`IsInCycleQuery`].
53pub type IsInCycleValue = bool;
54
55/// Bounds applied to SCC components before they are reported as cycles.
56///
57/// Mirrors [`sqry_core::query::CircularConfig`] but lives here so the type
58/// is [`Hash`]+[`Eq`]+[`Clone`] for use as a `DerivedQuery` cache key.
59#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
60pub struct CycleBounds {
61    /// Minimum cycle depth to report (default: `2`). A size-1 SCC only
62    /// counts as a cycle when `should_include_self_loops` is true.
63    pub min_depth: usize,
64    /// Maximum cycle depth to report (`None` = unbounded).
65    pub max_depth: Option<usize>,
66    /// Maximum number of cycles to return (truncates large result sets).
67    pub max_results: usize,
68    /// Whether size-1 SCCs that carry a self-edge count as cycles.
69    pub should_include_self_loops: bool,
70}
71
72impl Default for CycleBounds {
73    fn default() -> Self {
74        Self {
75            min_depth: 2,
76            max_depth: None,
77            max_results: 100,
78            should_include_self_loops: false,
79        }
80    }
81}
82
83/// Cache key for [`CyclesQuery`].
84#[derive(Debug, Clone, Hash, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
85pub struct CyclesKey {
86    /// Which edge kind to traverse: `Calls`, `Imports`, or `Modules`.
87    pub circular_type: CircularType,
88    /// Cycle filtering bounds.
89    pub bounds: CycleBounds,
90}
91
92/// Cache key for [`IsInCycleQuery`].
93#[derive(Debug, Clone, Hash, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
94pub struct IsInCycleKey {
95    /// The node to check.
96    pub node_id: NodeId,
97    /// Which edge kind to traverse.
98    pub circular_type: CircularType,
99    /// Cycle filtering bounds.
100    pub bounds: CycleBounds,
101}
102
103// ============================================================================
104// CyclesQuery
105// ============================================================================
106
107/// Returns every cycle in the graph as a sorted vector of `NodeId` lists.
108///
109/// Each inner `Vec<NodeId>` is a strongly connected component (SCC) whose
110/// size meets the [`CycleBounds`] criteria. Non-trivial SCCs are reported
111/// unconditionally; size-1 SCCs are included only when the node carries a
112/// self-edge and `should_include_self_loops` is true. The outer vector is
113/// truncated to `bounds.max_results`.
114///
115/// # Invalidation
116///
117/// `TRACKS_EDGE_REVISION = true`: any edge change rebuilds the SCC data
118/// (via [`SccQuery`]) and therefore the filtered cycle list.
119pub struct CyclesQuery;
120
121impl DerivedQuery for CyclesQuery {
122    type Key = CyclesKey;
123    type Value = Arc<Vec<Vec<NodeId>>>;
124    const QUERY_TYPE_ID: u32 = crate::queries::type_ids::CYCLES;
125    const TRACKS_EDGE_REVISION: bool = true;
126
127    fn execute(key: &CyclesKey, db: &QueryDb, snapshot: &GraphSnapshot) -> Arc<Vec<Vec<NodeId>>> {
128        for (fid, _) in snapshot.file_segments().iter() {
129            record_file_dep(fid);
130        }
131
132        let edge_probe = edge_probe_for(key.circular_type);
133        let scc = db.get::<SccQuery>(&edge_probe);
134
135        let mut cycles: Vec<Vec<NodeId>> = Vec::new();
136        for component in &scc.components {
137            if cycles.len() >= key.bounds.max_results {
138                break;
139            }
140            let size = component.len();
141            let is_self_loop =
142                size == 1 && node_has_self_loop(snapshot, component[0], key.circular_type);
143            if !should_include(size, is_self_loop, &key.bounds) {
144                continue;
145            }
146            cycles.push(component.clone());
147        }
148
149        Arc::new(cycles)
150    }
151}
152
153// ============================================================================
154// IsInCycleQuery
155// ============================================================================
156
157/// Returns `true` if `node_id` belongs to an SCC that satisfies the
158/// [`CycleBounds`] criteria.
159///
160/// Single-node SCCs are considered cycles iff the node has a self-edge of the
161/// relevant kind *and* `should_include_self_loops` is true, matching
162/// `graph_cycles::is_node_in_cycle`.
163pub struct IsInCycleQuery;
164
165impl DerivedQuery for IsInCycleQuery {
166    type Key = IsInCycleKey;
167    type Value = bool;
168    const QUERY_TYPE_ID: u32 = crate::queries::type_ids::IS_IN_CYCLE;
169    const TRACKS_EDGE_REVISION: bool = true;
170
171    fn execute(key: &IsInCycleKey, db: &QueryDb, snapshot: &GraphSnapshot) -> bool {
172        // Record deps for the node's file (narrow dep footprint when the edge
173        // revision counter hasn't changed and the node's file isn't edited).
174        if let Some(entry) = snapshot.nodes().get(key.node_id) {
175            record_file_dep(entry.file);
176        }
177
178        let self_loop = node_has_self_loop(snapshot, key.node_id, key.circular_type);
179        if self_loop && key.bounds.should_include_self_loops {
180            return true;
181        }
182
183        let edge_probe = edge_probe_for(key.circular_type);
184        let scc = db.get::<SccQuery>(&edge_probe);
185        let Some(component_idx) = scc.component_of(key.node_id) else {
186            return false;
187        };
188        let Some(component) = scc.components.get(component_idx as usize) else {
189            return false;
190        };
191
192        let size = component.len();
193        // Size-1 SCCs with a self-loop are already handled above. For other
194        // nodes, enforce `min_depth >= 2` implicitly — a size-1 SCC without a
195        // self-loop is not a cycle.
196        if size < 2 {
197            return false;
198        }
199        if size < key.bounds.min_depth {
200            return false;
201        }
202        if key.bounds.max_depth.is_some_and(|max| size > max) {
203            return false;
204        }
205        true
206    }
207}
208
209// ============================================================================
210// Helpers
211// ============================================================================
212
213fn edge_probe_for(circular_type: CircularType) -> EdgeKind {
214    match circular_type {
215        CircularType::Calls => EdgeKind::Calls {
216            argument_count: 0,
217            is_async: false,
218        },
219        CircularType::Imports | CircularType::Modules => EdgeKind::Imports {
220            alias: None,
221            is_wildcard: false,
222        },
223    }
224}
225
226fn node_has_self_loop(
227    snapshot: &GraphSnapshot,
228    node_id: NodeId,
229    circular_type: CircularType,
230) -> bool {
231    for edge in &snapshot.edges().edges_from(node_id) {
232        if edge.target != node_id {
233            continue;
234        }
235        let is_match = match circular_type {
236            CircularType::Calls => matches!(edge.kind, EdgeKind::Calls { .. }),
237            CircularType::Imports | CircularType::Modules => {
238                matches!(edge.kind, EdgeKind::Imports { .. })
239            }
240        };
241        if is_match {
242            return true;
243        }
244    }
245    false
246}
247
248fn should_include(size: usize, is_self_loop: bool, bounds: &CycleBounds) -> bool {
249    if is_self_loop {
250        return bounds.should_include_self_loops;
251    }
252    if size == 1 {
253        return false;
254    }
255    if size < bounds.min_depth {
256        return false;
257    }
258    if bounds.max_depth.is_some_and(|max| size > max) {
259        return false;
260    }
261    true
262}
263
264// ============================================================================
265// PN3 serde roundtrip tests
266// ============================================================================
267
268#[cfg(test)]
269mod serde_roundtrip {
270    use super::*;
271    use postcard::{from_bytes, to_allocvec};
272    use sqry_core::graph::unified::node::id::NodeId;
273    use sqry_core::query::CircularType;
274    use std::sync::Arc;
275
276    #[test]
277    fn cycle_bounds_roundtrip() {
278        let original = CycleBounds {
279            min_depth: 3,
280            max_depth: Some(10),
281            max_results: 50,
282            should_include_self_loops: true,
283        };
284        let bytes = to_allocvec(&original).expect("serialize failed");
285        let decoded: CycleBounds = from_bytes(&bytes).expect("deserialize failed");
286        assert_eq!(decoded, original);
287    }
288
289    #[test]
290    fn cycle_bounds_no_max_depth_roundtrip() {
291        let original = CycleBounds::default();
292        let bytes = to_allocvec(&original).expect("serialize failed");
293        let decoded: CycleBounds = from_bytes(&bytes).expect("deserialize failed");
294        assert_eq!(decoded, original);
295    }
296
297    #[test]
298    fn cycles_key_roundtrip() {
299        let original = CyclesKey {
300            circular_type: CircularType::Calls,
301            bounds: CycleBounds::default(),
302        };
303        let bytes = to_allocvec(&original).expect("serialize failed");
304        let decoded: CyclesKey = from_bytes(&bytes).expect("deserialize failed");
305        assert_eq!(decoded, original);
306    }
307
308    #[test]
309    fn is_in_cycle_key_roundtrip() {
310        let original = IsInCycleKey {
311            node_id: NodeId::new(42, 7),
312            circular_type: CircularType::Imports,
313            bounds: CycleBounds {
314                min_depth: 2,
315                max_depth: None,
316                max_results: 100,
317                should_include_self_loops: false,
318            },
319        };
320        let bytes = to_allocvec(&original).expect("serialize failed");
321        let decoded: IsInCycleKey = from_bytes(&bytes).expect("deserialize failed");
322        assert_eq!(decoded, original);
323    }
324
325    #[test]
326    fn cycles_value_roundtrip() {
327        // CyclesValue = Arc<Vec<Vec<NodeId>>>
328        let original: CyclesValue = Arc::new(vec![vec![NodeId::new(1, 1), NodeId::new(2, 1)]]);
329        let bytes = to_allocvec(&original).expect("serialize failed");
330        let decoded: CyclesValue = from_bytes(&bytes).expect("deserialize failed");
331        assert_eq!(decoded.as_ref(), original.as_ref());
332    }
333
334    #[test]
335    fn is_in_cycle_value_roundtrip() {
336        // IsInCycleValue = bool
337        for val in [true, false] {
338            let bytes = to_allocvec(&val).expect("serialize failed");
339            let decoded: IsInCycleValue = from_bytes(&bytes).expect("deserialize failed");
340            assert_eq!(decoded, val);
341        }
342    }
343}
344
345// ============================================================================
346// Tests
347// ============================================================================
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352    use crate::QueryDbConfig;
353    use sqry_core::graph::unified::concurrent::CodeGraph;
354    use sqry_core::graph::unified::node::kind::NodeKind;
355    use sqry_core::graph::unified::storage::arena::NodeEntry;
356    use std::path::Path;
357
358    fn alloc_fn(graph: &mut CodeGraph, name: &str) -> NodeId {
359        let file = graph.files_mut().register(Path::new("x.rs")).unwrap();
360        let name_id = graph.strings_mut().intern(name).unwrap();
361        graph
362            .nodes_mut()
363            .alloc(NodeEntry::new(NodeKind::Function, name_id, file).with_qualified_name(name_id))
364            .unwrap()
365    }
366
367    fn add_call(graph: &mut CodeGraph, src: NodeId, tgt: NodeId) {
368        let file = graph.nodes().get(src).unwrap().file;
369        graph.edges_mut().add_edge(
370            src,
371            tgt,
372            EdgeKind::Calls {
373                argument_count: 0,
374                is_async: false,
375            },
376            file,
377        );
378    }
379
380    fn build_db(graph: CodeGraph) -> QueryDb {
381        let snapshot = Arc::new(graph.snapshot());
382        let mut db = QueryDb::new(snapshot, QueryDbConfig::default());
383        db.register::<SccQuery>();
384        db.register::<CyclesQuery>();
385        db.register::<IsInCycleQuery>();
386        db
387    }
388
389    #[test]
390    fn cycles_query_detects_two_node_cycle() {
391        let mut graph = CodeGraph::new();
392        let a = alloc_fn(&mut graph, "a");
393        let b = alloc_fn(&mut graph, "b");
394        add_call(&mut graph, a, b);
395        add_call(&mut graph, b, a);
396
397        let db = build_db(graph);
398        let key = CyclesKey {
399            circular_type: CircularType::Calls,
400            bounds: CycleBounds::default(),
401        };
402        let cycles = db.get::<CyclesQuery>(&key);
403        assert_eq!(cycles.len(), 1);
404        assert_eq!(cycles[0].len(), 2);
405    }
406
407    #[test]
408    fn is_in_cycle_self_loop_requires_opt_in() {
409        let mut graph = CodeGraph::new();
410        let a = alloc_fn(&mut graph, "a");
411        add_call(&mut graph, a, a);
412
413        let db = build_db(graph);
414        let default_key = IsInCycleKey {
415            node_id: a,
416            circular_type: CircularType::Calls,
417            bounds: CycleBounds::default(),
418        };
419        assert!(
420            !db.get::<IsInCycleQuery>(&default_key),
421            "self-loop excluded by default"
422        );
423        let opted_key = IsInCycleKey {
424            node_id: a,
425            circular_type: CircularType::Calls,
426            bounds: CycleBounds {
427                should_include_self_loops: true,
428                min_depth: 1,
429                ..CycleBounds::default()
430            },
431        };
432        assert!(
433            db.get::<IsInCycleQuery>(&opted_key),
434            "self-loop reported when opted-in"
435        );
436    }
437
438    #[test]
439    fn cycles_query_respects_max_results() {
440        let mut graph = CodeGraph::new();
441        let a = alloc_fn(&mut graph, "a");
442        let b = alloc_fn(&mut graph, "b");
443        let c = alloc_fn(&mut graph, "c");
444        let d = alloc_fn(&mut graph, "d");
445        // Two independent 2-cycles
446        add_call(&mut graph, a, b);
447        add_call(&mut graph, b, a);
448        add_call(&mut graph, c, d);
449        add_call(&mut graph, d, c);
450
451        let db = build_db(graph);
452        let key = CyclesKey {
453            circular_type: CircularType::Calls,
454            bounds: CycleBounds {
455                max_results: 1,
456                ..CycleBounds::default()
457            },
458        };
459        let cycles = db.get::<CyclesQuery>(&key);
460        assert_eq!(cycles.len(), 1);
461    }
462}