Skip to main content

sqry_core/query/executor/
graph_unused.rs

1//! Unused symbol detection for unified graph.
2//!
3//! This module provides unused/dead code detection using the unified graph API,
4//! replacing the legacy index-based unused detection.
5
6use crate::graph::unified::concurrent::CodeGraph;
7use crate::graph::unified::edge::EdgeKind;
8use crate::graph::unified::node::{NodeId, NodeKind};
9use std::collections::HashSet;
10use std::hash::BuildHasher;
11
12/// Scope for unused analysis
13///
14/// Determines which symbols are candidates for unused detection.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum UnusedScope {
17    /// Public symbols with no external references
18    Public,
19    /// Private symbols with no references
20    Private,
21    /// Unused functions (any visibility)
22    Function,
23    /// Unused structs/types (any visibility)
24    Struct,
25    /// All unused symbols
26    All,
27}
28
29impl UnusedScope {
30    /// Parse scope from query value string
31    #[must_use]
32    pub fn try_parse(s: &str) -> Option<Self> {
33        match s.to_lowercase().as_str() {
34            "public" => Some(Self::Public),
35            "private" => Some(Self::Private),
36            "function" => Some(Self::Function),
37            "struct" => Some(Self::Struct),
38            "all" | "" => Some(Self::All),
39            _ => None,
40        }
41    }
42}
43
44/// Check if a node entry qualifies as an entry point for reachability analysis.
45///
46/// A node is an entry point if it is:
47/// - Public (visibility = "public" or "pub")
48/// - A main function (name = "main")
49/// - A test function (name starts with "test_" or ends with "_test")
50/// - A `Test` or `Export` node kind
51fn is_entry_point_node(
52    entry: &crate::graph::unified::storage::arena::NodeEntry,
53    strings: &crate::graph::unified::storage::StringInterner,
54) -> bool {
55    let is_public = entry
56        .visibility
57        .and_then(|id| strings.resolve(id))
58        .is_some_and(|v| v.as_ref() == "public" || v.as_ref() == "pub");
59
60    let is_main_or_test = strings.resolve(entry.name).is_some_and(|name| {
61        name.as_ref() == "main" || name.starts_with("test_") || name.ends_with("_test")
62    });
63
64    let is_export = matches!(entry.kind, NodeKind::Export);
65    let is_test_node = matches!(entry.kind, NodeKind::Test);
66
67    is_public || is_main_or_test || is_export || is_test_node
68}
69
70/// Check if an edge kind propagates reachability during BFS traversal.
71fn is_reachability_edge(kind: &EdgeKind) -> bool {
72    matches!(
73        kind,
74        EdgeKind::Calls { .. }
75            | EdgeKind::References
76            | EdgeKind::Imports { .. }
77            | EdgeKind::Inherits
78            | EdgeKind::Implements
79            | EdgeKind::TypeOf { .. }
80    )
81}
82
83/// Compute the set of reachable nodes from entry points via BFS traversal.
84///
85/// Entry points are automatically identified as:
86/// - Public functions/methods (visibility = "public" or "pub")
87/// - Main functions (name = "main")
88/// - Test functions (name starts with "test_" or ends with "_test")
89/// - Nodes of kind `Test` or `Export`
90///
91/// The traversal follows all outgoing edges to discover transitively reachable nodes.
92///
93/// # Arguments
94///
95/// * `graph` - The code graph to analyze
96///
97/// # Returns
98///
99/// A `HashSet` of `NodeId`s representing all nodes reachable from entry points.
100#[must_use]
101pub fn compute_reachable_set_graph(graph: &CodeGraph) -> HashSet<NodeId> {
102    let mut reachable = HashSet::new();
103    let mut worklist: Vec<NodeId> = Vec::new();
104
105    let strings = graph.strings();
106
107    // Find entry points
108    for (node_id, entry) in graph.nodes().iter() {
109        if is_entry_point_node(entry, strings) {
110            worklist.push(node_id);
111            reachable.insert(node_id);
112        }
113    }
114
115    // BFS to find all reachable nodes
116    while let Some(node_id) = worklist.pop() {
117        for edge in graph.edges().edges_from(node_id) {
118            if !reachable.contains(&edge.target) && is_reachability_edge(&edge.kind) {
119                reachable.insert(edge.target);
120                worklist.push(edge.target);
121            }
122        }
123    }
124
125    reachable
126}
127
128/// Check if a specific node is unused based on the scope filter.
129///
130/// A node is considered unused if:
131/// 1. It matches the scope filter (All, Public, Private, Function, or Struct)
132/// 2. It is not an entry point (main, test, or export)
133/// 3. It is not reachable from any entry point
134///
135/// # Arguments
136///
137/// * `node_id` - The node to check
138/// * `scope` - Filter for which types of nodes to check
139/// * `graph` - The code graph to analyze
140/// * `reachable` - Optional pre-computed reachable set (for efficiency when checking many nodes)
141///
142/// # Returns
143///
144/// `true` if the node is unused and matches the scope, `false` otherwise.
145/// Returns `false` for invalid or non-existent nodes.
146#[must_use]
147pub fn is_node_unused<S: BuildHasher>(
148    node_id: NodeId,
149    scope: UnusedScope,
150    graph: &CodeGraph,
151    reachable: Option<&HashSet<NodeId, S>>,
152) -> bool {
153    let Some(entry) = graph.nodes().get(node_id) else {
154        return false;
155    };
156
157    let strings = graph.strings();
158
159    // Check scope filter
160    let matches_scope = match scope {
161        UnusedScope::All => true,
162        UnusedScope::Public => entry
163            .visibility
164            .and_then(|id| strings.resolve(id))
165            .is_some_and(|v| v.as_ref() == "public" || v.as_ref() == "pub"),
166        UnusedScope::Private => {
167            let vis = entry.visibility.and_then(|id| strings.resolve(id));
168            vis.is_none() || vis.is_some_and(|v| v.as_ref() != "public" && v.as_ref() != "pub")
169        }
170        UnusedScope::Function => {
171            matches!(entry.kind, NodeKind::Function | NodeKind::Method)
172        }
173        UnusedScope::Struct => {
174            matches!(entry.kind, NodeKind::Struct | NodeKind::Class)
175        }
176    };
177
178    if !matches_scope {
179        return false;
180    }
181
182    // Skip entry points (they're always "used")
183    let is_entry_point = {
184        let is_main_or_test = strings.resolve(entry.name).is_some_and(|name| {
185            name.as_ref() == "main" || name.starts_with("test_") || name.ends_with("_test")
186        });
187
188        let is_export = matches!(entry.kind, NodeKind::Export);
189        let is_test_node = matches!(entry.kind, NodeKind::Test);
190
191        is_main_or_test || is_export || is_test_node
192    };
193
194    if is_entry_point {
195        return false;
196    }
197
198    // Check reachability
199    if let Some(reachable_set) = reachable {
200        return !reachable_set.contains(&node_id);
201    }
202
203    let reachable_set = compute_reachable_set_graph(graph);
204    !reachable_set.contains(&node_id)
205}
206
207/// Find all unused nodes in the graph that match the given scope.
208///
209/// This function first computes the reachable set once, then iterates through
210/// all nodes checking which are unused and match the scope filter.
211///
212/// # Arguments
213///
214/// * `scope` - Filter for which types of nodes to check (All, Public, Private, Function, Struct)
215/// * `graph` - The code graph to analyze
216/// * `max_results` - Maximum number of unused nodes to return
217///
218/// # Returns
219///
220/// A vector of `NodeId`s for unused nodes, limited by `max_results`.
221#[must_use]
222pub fn find_unused_nodes(scope: UnusedScope, graph: &CodeGraph, max_results: usize) -> Vec<NodeId> {
223    let reachable = compute_reachable_set_graph(graph);
224    let mut unused = Vec::new();
225
226    for (node_id, _entry) in graph.nodes().iter() {
227        if unused.len() >= max_results {
228            break;
229        }
230
231        if is_node_unused(node_id, scope, graph, Some(&reachable)) {
232            unused.push(node_id);
233        }
234    }
235
236    unused
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242    use crate::graph::unified::storage::arena::NodeEntry;
243    use std::collections::hash_map::RandomState;
244    use std::path::Path;
245
246    /// Options for creating test nodes.
247    struct NodeOptions<'a> {
248        name: &'a str,
249        kind: NodeKind,
250        visibility: Option<&'a str>,
251    }
252
253    impl<'a> NodeOptions<'a> {
254        fn new(name: &'a str, kind: NodeKind) -> Self {
255            Self {
256                name,
257                kind,
258                visibility: None,
259            }
260        }
261
262        fn with_visibility(mut self, vis: &'a str) -> Self {
263            self.visibility = Some(vis);
264            self
265        }
266    }
267
268    /// Helper to create a test graph with configurable nodes and edges.
269    fn create_test_graph(
270        nodes: &[NodeOptions],
271        edges: &[(usize, usize, EdgeKind)],
272    ) -> (CodeGraph, Vec<NodeId>) {
273        let mut graph = CodeGraph::new();
274        let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
275        let mut node_ids = Vec::new();
276
277        for opts in nodes {
278            let name_id = graph.strings_mut().intern(opts.name).unwrap();
279            let mut entry =
280                NodeEntry::new(opts.kind, name_id, file_id).with_qualified_name(name_id);
281
282            if let Some(vis) = opts.visibility {
283                let vis_id = graph.strings_mut().intern(vis).unwrap();
284                entry = entry.with_visibility(vis_id);
285            }
286
287            let node_id = graph.nodes_mut().alloc(entry).unwrap();
288            node_ids.push(node_id);
289        }
290
291        for (source_idx, target_idx, kind) in edges {
292            let source = node_ids[*source_idx];
293            let target = node_ids[*target_idx];
294            graph
295                .edges_mut()
296                .add_edge(source, target, kind.clone(), file_id);
297        }
298
299        (graph, node_ids)
300    }
301
302    #[test]
303    fn test_empty_graph() {
304        let graph = CodeGraph::new();
305        let reachable = compute_reachable_set_graph(&graph);
306        assert!(reachable.is_empty());
307    }
308
309    #[test]
310    fn test_find_unused_empty() {
311        let graph = CodeGraph::new();
312        let unused = find_unused_nodes(UnusedScope::All, &graph, 100);
313        assert!(unused.is_empty());
314    }
315
316    #[test]
317    fn test_public_function_is_entry_point() {
318        // Public function should be reachable
319        let nodes = [NodeOptions::new("public_func", NodeKind::Function).with_visibility("public")];
320        let (graph, node_ids) = create_test_graph(&nodes, &[]);
321
322        let reachable = compute_reachable_set_graph(&graph);
323        assert!(
324            reachable.contains(&node_ids[0]),
325            "Public function should be reachable"
326        );
327    }
328
329    #[test]
330    fn test_main_function_is_entry_point() {
331        // main function should be reachable regardless of visibility
332        let nodes = [NodeOptions::new("main", NodeKind::Function)];
333        let (graph, node_ids) = create_test_graph(&nodes, &[]);
334
335        let reachable = compute_reachable_set_graph(&graph);
336        assert!(
337            reachable.contains(&node_ids[0]),
338            "main function should be reachable"
339        );
340    }
341
342    #[test]
343    fn test_test_function_is_entry_point() {
344        // test_ prefixed functions should be reachable
345        let nodes = [
346            NodeOptions::new("test_something", NodeKind::Function),
347            NodeOptions::new("something_test", NodeKind::Function),
348            NodeOptions::new("TestNode", NodeKind::Test),
349        ];
350        let (graph, node_ids) = create_test_graph(&nodes, &[]);
351
352        let reachable = compute_reachable_set_graph(&graph);
353        assert!(
354            reachable.contains(&node_ids[0]),
355            "test_ prefixed function should be reachable"
356        );
357        assert!(
358            reachable.contains(&node_ids[1]),
359            "_test suffixed function should be reachable"
360        );
361        assert!(
362            reachable.contains(&node_ids[2]),
363            "Test node kind should be reachable"
364        );
365    }
366
367    #[test]
368    fn test_export_is_entry_point() {
369        let nodes = [NodeOptions::new("exported_value", NodeKind::Export)];
370        let (graph, node_ids) = create_test_graph(&nodes, &[]);
371
372        let reachable = compute_reachable_set_graph(&graph);
373        assert!(
374            reachable.contains(&node_ids[0]),
375            "Export should be reachable"
376        );
377    }
378
379    #[test]
380    fn test_reachability_through_calls() {
381        // Entry point -> A -> B (all should be reachable)
382        let nodes = [
383            NodeOptions::new("main", NodeKind::Function),
384            NodeOptions::new("helper_a", NodeKind::Function),
385            NodeOptions::new("helper_b", NodeKind::Function),
386        ];
387        let edges = [
388            (
389                0,
390                1,
391                EdgeKind::Calls {
392                    argument_count: 0,
393                    is_async: false,
394                },
395            ),
396            (
397                1,
398                2,
399                EdgeKind::Calls {
400                    argument_count: 0,
401                    is_async: false,
402                },
403            ),
404        ];
405        let (graph, node_ids) = create_test_graph(&nodes, &edges);
406
407        let reachable = compute_reachable_set_graph(&graph);
408        assert!(reachable.contains(&node_ids[0]), "main should be reachable");
409        assert!(
410            reachable.contains(&node_ids[1]),
411            "helper_a should be reachable via call"
412        );
413        assert!(
414            reachable.contains(&node_ids[2]),
415            "helper_b should be reachable via transitive call"
416        );
417    }
418
419    #[test]
420    fn test_reachability_through_imports() {
421        let nodes = [
422            NodeOptions::new("main", NodeKind::Function),
423            NodeOptions::new("imported_module", NodeKind::Module),
424        ];
425        let edges = [(
426            0,
427            1,
428            EdgeKind::Imports {
429                alias: None,
430                is_wildcard: false,
431            },
432        )];
433        let (graph, node_ids) = create_test_graph(&nodes, &edges);
434
435        let reachable = compute_reachable_set_graph(&graph);
436        assert!(
437            reachable.contains(&node_ids[1]),
438            "Imported module should be reachable"
439        );
440    }
441
442    #[test]
443    fn test_reachability_through_references() {
444        let nodes = [
445            NodeOptions::new("main", NodeKind::Function),
446            NodeOptions::new("referenced_var", NodeKind::Variable),
447        ];
448        let edges = [(0, 1, EdgeKind::References)];
449        let (graph, node_ids) = create_test_graph(&nodes, &edges);
450
451        let reachable = compute_reachable_set_graph(&graph);
452        assert!(
453            reachable.contains(&node_ids[1]),
454            "Referenced variable should be reachable"
455        );
456    }
457
458    #[test]
459    fn test_reachability_through_inheritance() {
460        let nodes = [
461            NodeOptions::new("main", NodeKind::Function),
462            NodeOptions::new("BaseClass", NodeKind::Class).with_visibility("public"),
463            NodeOptions::new("DerivedClass", NodeKind::Class),
464        ];
465        let edges = [(2, 1, EdgeKind::Inherits)];
466        let (graph, node_ids) = create_test_graph(&nodes, &edges);
467
468        let reachable = compute_reachable_set_graph(&graph);
469        // BaseClass is public, so it's an entry point
470        // DerivedClass inherits from BaseClass, making it reachable
471        assert!(
472            reachable.contains(&node_ids[1]),
473            "BaseClass should be reachable (public)"
474        );
475        // Note: inheritance edge goes from derived -> base, so derived is not reachable
476        // unless it's also called/used from an entry point
477    }
478
479    #[test]
480    fn test_private_function_unreachable() {
481        // Private function not called by anyone should be unused
482        let nodes = [
483            NodeOptions::new("main", NodeKind::Function),
484            NodeOptions::new("private_helper", NodeKind::Function),
485        ];
486        let (graph, node_ids) = create_test_graph(&nodes, &[]);
487
488        let reachable = compute_reachable_set_graph(&graph);
489        assert!(
490            !reachable.contains(&node_ids[1]),
491            "Uncalled private function should be unreachable"
492        );
493    }
494
495    #[test]
496    fn test_is_node_unused_scope_all() {
497        let nodes = [
498            NodeOptions::new("main", NodeKind::Function),
499            NodeOptions::new("unused_helper", NodeKind::Function),
500        ];
501        let (graph, node_ids) = create_test_graph(&nodes, &[]);
502
503        assert!(
504            !is_node_unused::<RandomState>(node_ids[0], UnusedScope::All, &graph, None),
505            "main should not be unused"
506        );
507        assert!(
508            is_node_unused::<RandomState>(node_ids[1], UnusedScope::All, &graph, None),
509            "unused_helper should be unused"
510        );
511    }
512
513    #[test]
514    fn test_is_node_unused_scope_public() {
515        let nodes = [
516            NodeOptions::new("public_unused", NodeKind::Function).with_visibility("public"),
517            NodeOptions::new("private_unused", NodeKind::Function),
518        ];
519        // Neither is called, but we check scope filtering
520        let (graph, node_ids) = create_test_graph(&nodes, &[]);
521
522        // Public scope - only checks public symbols
523        // Note: public symbols are entry points, so they're reachable but scope still matches
524        assert!(
525            !is_node_unused::<RandomState>(node_ids[0], UnusedScope::Public, &graph, None),
526            "Public function is entry point, not unused"
527        );
528        assert!(
529            !is_node_unused::<RandomState>(node_ids[1], UnusedScope::Public, &graph, None),
530            "Private function doesn't match Public scope"
531        );
532    }
533
534    #[test]
535    fn test_is_node_unused_scope_private() {
536        let nodes = [
537            NodeOptions::new("main", NodeKind::Function),
538            NodeOptions::new("public_unused", NodeKind::Function).with_visibility("public"),
539            NodeOptions::new("private_unused", NodeKind::Function),
540        ];
541        let (graph, node_ids) = create_test_graph(&nodes, &[]);
542
543        // Private scope - only checks private symbols
544        assert!(
545            !is_node_unused::<RandomState>(node_ids[1], UnusedScope::Private, &graph, None),
546            "Public function doesn't match Private scope"
547        );
548        assert!(
549            is_node_unused::<RandomState>(node_ids[2], UnusedScope::Private, &graph, None),
550            "Private unused function should be unused"
551        );
552    }
553
554    #[test]
555    fn test_is_node_unused_scope_function() {
556        let nodes = [
557            NodeOptions::new("main", NodeKind::Function),
558            NodeOptions::new("unused_func", NodeKind::Function),
559            NodeOptions::new("unused_struct", NodeKind::Struct),
560        ];
561        let (graph, node_ids) = create_test_graph(&nodes, &[]);
562
563        // Function scope - only checks functions/methods
564        assert!(
565            is_node_unused::<RandomState>(node_ids[1], UnusedScope::Function, &graph, None),
566            "Unused function should match Function scope"
567        );
568        assert!(
569            !is_node_unused::<RandomState>(node_ids[2], UnusedScope::Function, &graph, None),
570            "Struct doesn't match Function scope"
571        );
572    }
573
574    #[test]
575    fn test_is_node_unused_scope_struct() {
576        let nodes = [
577            NodeOptions::new("main", NodeKind::Function),
578            NodeOptions::new("unused_struct", NodeKind::Struct),
579            NodeOptions::new("unused_class", NodeKind::Class),
580            NodeOptions::new("unused_func", NodeKind::Function),
581        ];
582        let (graph, node_ids) = create_test_graph(&nodes, &[]);
583
584        // Struct scope - only checks structs/classes
585        assert!(
586            is_node_unused::<RandomState>(node_ids[1], UnusedScope::Struct, &graph, None),
587            "Unused struct should match Struct scope"
588        );
589        assert!(
590            is_node_unused::<RandomState>(node_ids[2], UnusedScope::Struct, &graph, None),
591            "Unused class should match Struct scope"
592        );
593        assert!(
594            !is_node_unused::<RandomState>(node_ids[3], UnusedScope::Struct, &graph, None),
595            "Function doesn't match Struct scope"
596        );
597    }
598
599    #[test]
600    fn test_find_unused_nodes_basic() {
601        let nodes = [
602            NodeOptions::new("main", NodeKind::Function),
603            NodeOptions::new("used_helper", NodeKind::Function),
604            NodeOptions::new("unused_helper", NodeKind::Function),
605        ];
606        let edges = [(
607            0,
608            1,
609            EdgeKind::Calls {
610                argument_count: 0,
611                is_async: false,
612            },
613        )];
614        let (graph, node_ids) = create_test_graph(&nodes, &edges);
615
616        let unused = find_unused_nodes(UnusedScope::All, &graph, 100);
617        assert_eq!(unused.len(), 1);
618        assert!(unused.contains(&node_ids[2]));
619    }
620
621    #[test]
622    fn test_find_unused_nodes_max_results() {
623        // Create many unused nodes and verify limit is respected
624        let mut nodes = vec![NodeOptions::new("main", NodeKind::Function)];
625        for i in 0..10 {
626            nodes.push(NodeOptions::new(
627                Box::leak(format!("unused_{i}").into_boxed_str()),
628                NodeKind::Function,
629            ));
630        }
631        let (graph, _) = create_test_graph(&nodes, &[]);
632
633        let unused = find_unused_nodes(UnusedScope::All, &graph, 3);
634        assert_eq!(unused.len(), 3, "Should respect max_results limit");
635    }
636
637    #[test]
638    fn test_entry_point_not_unused() {
639        // Entry points should never be marked as unused
640        let nodes = [
641            NodeOptions::new("main", NodeKind::Function),
642            NodeOptions::new("test_something", NodeKind::Function),
643            NodeOptions::new("exported", NodeKind::Export),
644        ];
645        let (graph, node_ids) = create_test_graph(&nodes, &[]);
646
647        for &node_id in &node_ids {
648            assert!(
649                !is_node_unused::<RandomState>(node_id, UnusedScope::All, &graph, None),
650                "Entry points should not be unused"
651            );
652        }
653    }
654
655    #[test]
656    fn test_precomputed_reachable_set() {
657        // Test using a pre-computed reachable set for efficiency
658        let nodes = [
659            NodeOptions::new("main", NodeKind::Function),
660            NodeOptions::new("unused", NodeKind::Function),
661        ];
662        let (graph, node_ids) = create_test_graph(&nodes, &[]);
663
664        let reachable = compute_reachable_set_graph(&graph);
665        assert!(
666            is_node_unused::<RandomState>(node_ids[1], UnusedScope::All, &graph, Some(&reachable)),
667            "Should work with precomputed reachable set"
668        );
669    }
670
671    #[test]
672    fn test_invalid_node_not_unused() {
673        let graph = CodeGraph::new();
674        let invalid_node = NodeId::new(999, 0);
675
676        assert!(
677            !is_node_unused::<RandomState>(invalid_node, UnusedScope::All, &graph, None),
678            "Invalid node should return false (not unused)"
679        );
680    }
681
682    #[test]
683    fn test_typeof_edge_reachability() {
684        let nodes = [
685            NodeOptions::new("main", NodeKind::Function),
686            NodeOptions::new("MyType", NodeKind::Type),
687        ];
688        let edges = [(
689            0,
690            1,
691            EdgeKind::TypeOf {
692                context: None,
693                index: None,
694                name: None,
695            },
696        )];
697        let (graph, node_ids) = create_test_graph(&nodes, &edges);
698
699        let reachable = compute_reachable_set_graph(&graph);
700        assert!(
701            reachable.contains(&node_ids[1]),
702            "TypeOf edge should make type reachable"
703        );
704    }
705
706    #[test]
707    fn test_implements_edge_reachability() {
708        let nodes = [
709            NodeOptions::new("MyClass", NodeKind::Class).with_visibility("public"),
710            NodeOptions::new("MyInterface", NodeKind::Interface),
711        ];
712        let edges = [(0, 1, EdgeKind::Implements)];
713        let (graph, node_ids) = create_test_graph(&nodes, &edges);
714
715        let reachable = compute_reachable_set_graph(&graph);
716        // MyClass is public (entry point), and implements MyInterface
717        assert!(
718            reachable.contains(&node_ids[1]),
719            "Implements edge should make interface reachable"
720        );
721    }
722
723    #[test]
724    fn test_pub_visibility_variant() {
725        // Test "pub" (Rust style) vs "public" visibility
726        let nodes = [NodeOptions::new("rust_public", NodeKind::Function).with_visibility("pub")];
727        let (graph, node_ids) = create_test_graph(&nodes, &[]);
728
729        let reachable = compute_reachable_set_graph(&graph);
730        assert!(
731            reachable.contains(&node_ids[0]),
732            "'pub' visibility should be recognized as public"
733        );
734    }
735}