Skip to main content

codemem_engine/enrichment/
dead_code.rs

1//! Dead code detection: identifies symbols with zero inbound CALLS/IMPORTS edges,
2//! applying framework-aware exemptions (decorators, constructors, tests, etc.).
3
4use super::EnrichResult;
5use crate::CodememEngine;
6use codemem_core::config::DeadCodeConfig;
7use codemem_core::{CodememError, Edge, GraphNode, RelationshipType};
8use serde_json::json;
9use std::collections::HashSet;
10
11/// A symbol detected as potentially dead (unreferenced) code.
12#[derive(Debug, Clone)]
13pub struct DeadCodeEntry {
14    /// Graph node ID of the unreferenced symbol.
15    pub node_id: String,
16    /// Human-readable symbol name (from `node.label`).
17    pub label: String,
18    /// The symbol kind string from `node.payload["kind"]`.
19    pub kind: String,
20    /// Optional file path from `node.payload["file_path"]`.
21    pub file_path: Option<String>,
22}
23
24/// Keyword fragments in decorator/attribute values that signal framework entry points.
25const FRAMEWORK_KEYWORDS: &[&str] = &["route", "endpoint", "export", "api"];
26
27/// Analyze the graph for unreferenced symbols (dead code candidates).
28///
29/// Symbols are considered dead when they have no inbound `Calls`, `Imports`,
30/// `Inherits`, or `Implements` edges and do not match any exemption rule.
31pub fn find_dead_code(
32    nodes: &[GraphNode],
33    edges: &[Edge],
34    config: &DeadCodeConfig,
35) -> Vec<DeadCodeEntry> {
36    // Step 1: filter to symbol nodes (those with a "kind" key in payload).
37    let symbol_nodes: Vec<&GraphNode> = nodes
38        .iter()
39        .filter(|n| n.payload.contains_key("kind"))
40        .collect();
41
42    // Step 2: min_symbols threshold — avoid false positives on tiny graphs.
43    if symbol_nodes.len() < config.min_symbols {
44        return Vec::new();
45    }
46
47    // Step 3: build set of node IDs that have inbound referencing edges.
48    let referenced: HashSet<&str> = edges
49        .iter()
50        .filter(|e| {
51            matches!(
52                e.relationship,
53                RelationshipType::Calls
54                    | RelationshipType::Imports
55                    | RelationshipType::Inherits
56                    | RelationshipType::Implements
57            )
58        })
59        .map(|e| e.dst.as_str())
60        .collect();
61
62    // Step 4: collect unreferenced, non-exempt symbols.
63    let mut dead: Vec<DeadCodeEntry> = Vec::new();
64
65    for node in &symbol_nodes {
66        if referenced.contains(node.id.as_str()) {
67            continue;
68        }
69
70        if is_exempt(node, config) {
71            continue;
72        }
73
74        let kind = node
75            .payload
76            .get("kind")
77            .and_then(|v| v.as_str())
78            .unwrap_or("")
79            .to_string();
80
81        let file_path = node
82            .payload
83            .get("file_path")
84            .and_then(|v| v.as_str())
85            .map(String::from);
86
87        dead.push(DeadCodeEntry {
88            node_id: node.id.clone(),
89            label: node.label.clone(),
90            kind,
91            file_path,
92        });
93    }
94
95    dead
96}
97
98/// Check whether a symbol node should be exempt from dead code detection.
99fn is_exempt(node: &GraphNode, config: &DeadCodeConfig) -> bool {
100    // Kind exemption: payload["kind"] in config.exempt_kinds
101    if let Some(kind_val) = node.payload.get("kind").and_then(|v| v.as_str()) {
102        let kind_lower = kind_val.to_lowercase();
103        if config
104            .exempt_kinds
105            .iter()
106            .any(|k| k.to_lowercase() == kind_lower)
107        {
108            return true;
109        }
110    }
111
112    // Decorator/attribute exemption
113    if let Some(attrs) = node.payload.get("attributes").and_then(|v| v.as_array()) {
114        for attr in attrs {
115            if let Some(attr_str) = attr.as_str() {
116                let attr_lower = attr_str.to_lowercase();
117                // Exact match against configured exempt decorators
118                if config
119                    .exempt_decorators
120                    .iter()
121                    .any(|d| attr_lower.contains(&d.to_lowercase()))
122                {
123                    return true;
124                }
125                // Framework keyword match
126                if FRAMEWORK_KEYWORDS.iter().any(|kw| attr_lower.contains(kw)) {
127                    return true;
128                }
129            }
130        }
131    }
132
133    // Main entry point
134    if node.label == "main" || node.label == "Main" {
135        return true;
136    }
137
138    // Dunder methods (Python __init__, __str__, etc.)
139    if node.label.starts_with("__") && node.label.ends_with("__") {
140        return true;
141    }
142
143    // Public visibility — public symbols may be part of library API
144    if let Some(vis) = node.payload.get("visibility").and_then(|v| v.as_str()) {
145        if vis == "public" {
146            return true;
147        }
148    }
149
150    false
151}
152
153impl CodememEngine {
154    /// Run dead code detection on the current graph and store insights for
155    /// unreferenced symbols.
156    pub fn enrich_dead_code(&self, namespace: Option<&str>) -> Result<EnrichResult, CodememError> {
157        let config = &self.config.enrichment.dead_code;
158        if !config.enabled {
159            return Ok(EnrichResult {
160                insights_stored: 0,
161                details: json!({"skipped": true, "reason": "dead_code disabled"}),
162            });
163        }
164
165        // Collect all nodes from the in-memory graph and all edges from storage.
166        // Using storage.all_graph_edges() avoids N+1 per-node get_edges calls.
167        let all_nodes = self.lock_graph()?.get_all_nodes();
168        let all_edges = self.storage.all_graph_edges()?;
169
170        let dead_entries = find_dead_code(&all_nodes, &all_edges, config);
171
172        let mut insights_stored = 0;
173        for entry in &dead_entries {
174            let file_info = entry
175                .file_path
176                .as_deref()
177                .map(|fp| format!(" in {fp}"))
178                .unwrap_or_default();
179            let content = format!(
180                "Dead code candidate: `{}` ({}) has no callers or importers{}",
181                entry.label, entry.kind, file_info,
182            );
183            let links = vec![entry.node_id.clone()];
184            if self
185                .store_insight(
186                    &content,
187                    "dead-code",
188                    &["dead-code"],
189                    0.6,
190                    namespace,
191                    &links,
192                )
193                .is_some()
194            {
195                insights_stored += 1;
196            }
197        }
198
199        self.save_index();
200
201        Ok(EnrichResult {
202            insights_stored,
203            details: json!({
204                "dead_code_candidates": dead_entries.len(),
205                "insights_stored": insights_stored,
206            }),
207        })
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214    use codemem_core::{GraphNode, NodeKind};
215    use std::collections::HashMap;
216
217    /// Helper: create a GraphNode with the given label, kind payload, and optional attributes.
218    fn make_graph_node(name: &str, kind_str: &str, attrs: Option<Vec<&str>>) -> GraphNode {
219        let mut payload: HashMap<String, serde_json::Value> = HashMap::new();
220        payload.insert("kind".into(), json!(kind_str));
221        if let Some(attr_list) = attrs {
222            payload.insert("attributes".into(), json!(attr_list));
223        }
224        GraphNode {
225            id: format!("sym:{name}"),
226            kind: NodeKind::Function,
227            label: name.to_string(),
228            payload,
229            centrality: 0.0,
230            memory_id: None,
231            namespace: None,
232            valid_from: None,
233            valid_to: None,
234        }
235    }
236
237    /// Helper: create an Edge with the given src, dst, and relationship.
238    fn make_edge(src: &str, dst: &str, rel: RelationshipType) -> Edge {
239        Edge {
240            id: format!("{src}-{:?}-{dst}", rel),
241            src: src.to_string(),
242            dst: dst.to_string(),
243            relationship: rel,
244            weight: 1.0,
245            properties: HashMap::new(),
246            created_at: chrono::Utc::now(),
247            valid_from: None,
248            valid_to: None,
249        }
250    }
251
252    fn test_config() -> DeadCodeConfig {
253        DeadCodeConfig {
254            min_symbols: 2,
255            ..DeadCodeConfig::default()
256        }
257    }
258
259    #[test]
260    fn unreachable_function_detected() {
261        let nodes = vec![
262            make_graph_node("main", "function", None),
263            make_graph_node("helper", "function", None),
264            make_graph_node("unused_fn", "function", None),
265        ];
266        let edges = vec![make_edge("sym:main", "sym:helper", RelationshipType::Calls)];
267        let config = test_config();
268
269        let dead = find_dead_code(&nodes, &edges, &config);
270
271        // unused_fn has no callers and is not main → should be detected.
272        // main is exempt (entry point). helper is called by main → not dead.
273        let dead_labels: Vec<&str> = dead.iter().map(|d| d.label.as_str()).collect();
274        assert!(
275            dead_labels.contains(&"unused_fn"),
276            "unused_fn should be detected as dead code"
277        );
278        assert!(
279            !dead_labels.contains(&"helper"),
280            "helper is called by main, should not be dead"
281        );
282        assert!(
283            !dead_labels.contains(&"main"),
284            "main is exempt as entry point"
285        );
286    }
287
288    #[test]
289    fn decorated_symbols_exempt() {
290        let nodes = vec![
291            make_graph_node("index", "function", Some(vec!["app.route"])),
292            make_graph_node("unused", "function", None),
293            make_graph_node("api_handler", "function", Some(vec!["get_endpoint"])),
294        ];
295        let edges = vec![];
296        let config = test_config();
297
298        let dead = find_dead_code(&nodes, &edges, &config);
299
300        let dead_labels: Vec<&str> = dead.iter().map(|d| d.label.as_str()).collect();
301        assert!(
302            !dead_labels.contains(&"index"),
303            "app.route decorated should be exempt"
304        );
305        assert!(
306            !dead_labels.contains(&"api_handler"),
307            "endpoint keyword in attribute should be exempt"
308        );
309        assert!(
310            dead_labels.contains(&"unused"),
311            "unused with no decorators should be detected"
312        );
313    }
314
315    #[test]
316    fn constructors_and_tests_exempt() {
317        let nodes = vec![
318            make_graph_node("__init__", "constructor", None),
319            make_graph_node("test_foo", "test", None),
320            make_graph_node("orphan", "function", None),
321        ];
322        let edges = vec![];
323        let config = test_config();
324
325        let dead = find_dead_code(&nodes, &edges, &config);
326
327        let dead_labels: Vec<&str> = dead.iter().map(|d| d.label.as_str()).collect();
328        assert!(
329            !dead_labels.contains(&"__init__"),
330            "constructor kind should be exempt"
331        );
332        assert!(
333            !dead_labels.contains(&"test_foo"),
334            "test kind should be exempt"
335        );
336        assert!(
337            dead_labels.contains(&"orphan"),
338            "orphan function should be detected"
339        );
340    }
341
342    #[test]
343    fn min_symbols_threshold_respected() {
344        let nodes = vec![make_graph_node("lonely", "function", None)];
345        let edges = vec![];
346        let config = DeadCodeConfig {
347            min_symbols: 10,
348            ..DeadCodeConfig::default()
349        };
350
351        let dead = find_dead_code(&nodes, &edges, &config);
352        assert!(
353            dead.is_empty(),
354            "Should return empty when symbol count < min_symbols"
355        );
356    }
357
358    #[test]
359    fn public_symbols_exempt() {
360        let mut node = make_graph_node("pub_fn", "function", None);
361        node.payload.insert("visibility".into(), json!("public"));
362        let nodes = vec![node, make_graph_node("priv_fn", "function", None)];
363        let edges = vec![];
364        let config = test_config();
365
366        let dead = find_dead_code(&nodes, &edges, &config);
367        let dead_labels: Vec<&str> = dead.iter().map(|d| d.label.as_str()).collect();
368        assert!(
369            !dead_labels.contains(&"pub_fn"),
370            "public symbol should be exempt"
371        );
372        assert!(
373            dead_labels.contains(&"priv_fn"),
374            "private symbol with no callers should be detected"
375        );
376    }
377
378    #[test]
379    fn dunder_methods_exempt() {
380        let nodes = vec![
381            make_graph_node("__str__", "method", None),
382            make_graph_node("orphan_method", "method", None),
383        ];
384        let edges = vec![];
385        let config = test_config();
386
387        let dead = find_dead_code(&nodes, &edges, &config);
388        let dead_labels: Vec<&str> = dead.iter().map(|d| d.label.as_str()).collect();
389        assert!(
390            !dead_labels.contains(&"__str__"),
391            "dunder method should be exempt"
392        );
393        assert!(
394            dead_labels.contains(&"orphan_method"),
395            "non-dunder method should be detected"
396        );
397    }
398
399    #[test]
400    fn inherits_and_implements_count_as_references() {
401        let nodes = vec![
402            make_graph_node("BaseClass", "class", None),
403            make_graph_node("MyTrait", "trait", None),
404            make_graph_node("orphan_class", "class", None),
405        ];
406        let edges = vec![
407            make_edge(
408                "sym:orphan_class",
409                "sym:BaseClass",
410                RelationshipType::Inherits,
411            ),
412            make_edge(
413                "sym:orphan_class",
414                "sym:MyTrait",
415                RelationshipType::Implements,
416            ),
417        ];
418        let config = test_config();
419
420        let dead = find_dead_code(&nodes, &edges, &config);
421        let dead_labels: Vec<&str> = dead.iter().map(|d| d.label.as_str()).collect();
422        assert!(
423            !dead_labels.contains(&"BaseClass"),
424            "inherited class should not be dead"
425        );
426        assert!(
427            !dead_labels.contains(&"MyTrait"),
428            "implemented trait should not be dead"
429        );
430    }
431}