Skip to main content

cgx_engine/
deadcode.rs

1use crate::graph::{GraphDb, Node};
2
3/// A graph node flagged as a dead-code candidate.
4#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
5pub struct DeadNode {
6    pub node: Node,
7    pub reason: DeadReason,
8    pub confidence: Confidence,
9    /// Human-readable explanation of why this might be a false positive.
10    pub false_positive_risk: Option<String>,
11}
12
13/// Confidence level for a dead-code finding.
14#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
15#[serde(rename_all = "snake_case")]
16pub enum Confidence {
17    /// Very likely dead — no framework hooks, entry points, or external consumers detected.
18    High,
19    /// Probably dead but verify — e.g. unused variables or zombie files.
20    Medium,
21    /// Possibly dead but high false-positive risk (framework hook, entry point name, exported type).
22    Low,
23}
24
25impl Confidence {
26    pub fn as_str(&self) -> &'static str {
27        match self {
28            Confidence::High => "high",
29            Confidence::Medium => "medium",
30            Confidence::Low => "low",
31        }
32    }
33}
34
35/// Why a node was classified as dead code.
36#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
37#[serde(rename_all = "snake_case")]
38pub enum DeadReason {
39    /// Exported symbol with no inbound `CALLS` edges from other files.
40    UnreferencedExport,
41    /// Private function with no inbound `CALLS` edges at all.
42    Unreachable,
43    /// Variable node with no inbound `CALLS` edges.
44    UnusedVariable,
45    /// Node has neither inbound nor outbound `CALLS` edges.
46    Disconnected,
47    /// File node that is never imported and has no known consumers.
48    ZombieFile,
49}
50
51impl DeadReason {
52    pub fn as_str(&self) -> &'static str {
53        match self {
54            DeadReason::UnreferencedExport => "unreferenced_export",
55            DeadReason::Unreachable => "unreachable",
56            DeadReason::UnusedVariable => "unused_variable",
57            DeadReason::Disconnected => "disconnected",
58            DeadReason::ZombieFile => "zombie_file",
59        }
60    }
61}
62
63/// Aggregated dead-code analysis results, grouped by reason.
64#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
65pub struct DeadCodeReport {
66    pub unreferenced_exports: Vec<DeadNode>,
67    pub unreachable: Vec<DeadNode>,
68    pub unused_variables: Vec<DeadNode>,
69    pub disconnected: Vec<DeadNode>,
70    pub zombie_files: Vec<DeadNode>,
71}
72
73impl DeadCodeReport {
74    /// Iterate over every finding regardless of category.
75    pub fn all_items(&self) -> Vec<&DeadNode> {
76        let mut all = Vec::new();
77        all.extend(self.unreferenced_exports.iter());
78        all.extend(self.unreachable.iter());
79        all.extend(self.unused_variables.iter());
80        all.extend(self.disconnected.iter());
81        all.extend(self.zombie_files.iter());
82        all
83    }
84
85    /// Total number of dead-code findings across all categories.
86    pub fn total(&self) -> usize {
87        self.unreferenced_exports.len()
88            + self.unreachable.len()
89            + self.unused_variables.len()
90            + self.disconnected.len()
91            + self.zombie_files.len()
92    }
93
94    /// Count findings broken down by confidence level: `(high, medium, low)`.
95    pub fn count_by_confidence(&self) -> (usize, usize, usize) {
96        let mut high = 0;
97        let mut medium = 0;
98        let mut low = 0;
99        for item in self.all_items() {
100            match item.confidence {
101                Confidence::High => high += 1,
102                Confidence::Medium => medium += 1,
103                Confidence::Low => low += 1,
104            }
105        }
106        (high, medium, low)
107    }
108}
109
110fn query_nodes(db: &GraphDb, sql: &str) -> anyhow::Result<Vec<Node>> {
111    let mut stmt = db.conn.prepare(sql)?;
112    let rows = stmt.query_map([], |row| {
113        Ok(Node {
114            id: row.get(0)?,
115            kind: row.get(1)?,
116            name: row.get(2)?,
117            path: row.get(3)?,
118            line_start: row.get::<_, u32>(4)?,
119            line_end: row.get::<_, u32>(5)?,
120            language: row.get::<_, Option<String>>(6)?.unwrap_or_default(),
121            churn: row.get::<_, f64>(7)?,
122            coupling: row.get::<_, f64>(8)?,
123            community: row.get::<_, i64>(9)?,
124            in_degree: row.get::<_, i64>(10)?,
125            out_degree: row.get::<_, i64>(11)?,
126            exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
127            is_dead_candidate: false,
128            dead_reason: None,
129            complexity: 0.0,
130            is_test_file: false,
131            test_count: 0,
132            is_tested: false,
133        })
134    })?;
135    let mut result = Vec::new();
136    for row in rows {
137        result.push(row?);
138    }
139    Ok(result)
140}
141
142const FRAMEWORK_HOOKS: &[&str] = &[
143    "getServerSideProps",
144    "getStaticProps",
145    "loader",
146    "action",
147    "beforeEach",
148    "afterAll",
149    "getStaticPaths",
150];
151
152const ENTRY_POINT_NAMES: &[&str] = &["main", "init", "setup", "bootstrap", "start"];
153
154fn compute_confidence_and_fp(node: &Node, reason: &DeadReason) -> (Confidence, Option<String>) {
155    // Framework hooks -> Low
156    if FRAMEWORK_HOOKS.contains(&node.name.as_str()) {
157        return (
158            Confidence::Low,
159            Some("Framework hook — called by framework not by your code".to_string()),
160        );
161    }
162    // Entry point names
163    if ENTRY_POINT_NAMES.contains(&node.name.as_str()) {
164        return (
165            Confidence::Low,
166            Some("Common entry point name — verify before deleting".to_string()),
167        );
168    }
169    // Types/interfaces erased at runtime
170    if node.kind == "Type" || node.kind == "Interface" {
171        return (
172            Confidence::Low,
173            Some(
174                "Types erased at runtime — may be used by consuming TypeScript packages"
175                    .to_string(),
176            ),
177        );
178    }
179    // lib/ or dist/ files
180    if node.path.contains("/lib/")
181        || node.path.contains("/dist/")
182        || node.path.starts_with("lib/")
183        || node.path.starts_with("dist/")
184    {
185        return (
186            Confidence::Low,
187            Some("May be consumed externally by npm consumers".to_string()),
188        );
189    }
190
191    match reason {
192        DeadReason::Unreachable => (Confidence::High, None),
193        DeadReason::Disconnected => (Confidence::High, None),
194        DeadReason::UnreferencedExport => {
195            let filename = node.path.split('/').next_back().unwrap_or("");
196            if matches!(filename, "index.ts" | "index.js" | "lib.rs" | "mod.rs") {
197                (
198                    Confidence::Low,
199                    Some("May be consumed externally by npm consumers".to_string()),
200                )
201            } else {
202                (Confidence::High, None)
203            }
204        }
205        DeadReason::UnusedVariable => (Confidence::Medium, None),
206        DeadReason::ZombieFile => (Confidence::Medium, None),
207    }
208}
209
210/// Analyse the graph for dead code and return a categorised report.
211///
212/// Runs five SQL queries against the graph DB:
213/// unreferenced exports, unreachable private functions, unused variables,
214/// fully disconnected nodes, and zombie files.  Framework hooks and common
215/// entry-point names are flagged with [`Confidence::Low`] to reduce noise.
216pub fn detect_dead_code(db: &GraphDb) -> anyhow::Result<DeadCodeReport> {
217    let mut report = DeadCodeReport::default();
218
219    // Query 1: unreferenced exports — exported but no CALLS edges pointing at them.
220    // Uses NOT EXISTS on CALLS rather than in_degree=0 because all parsed nodes
221    // receive an EXPORTS edge from their containing file, making in_degree >= 1.
222    let unreferenced_exports = query_nodes(
223        db,
224        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
225                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
226         FROM nodes n
227         WHERE n.kind IN ('Function','Class','Variable','Type')
228         AND COALESCE(n.exported, 0) = 1
229         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
230         AND n.path NOT LIKE '%test%' AND n.path NOT LIKE '%spec%'
231         AND n.path NOT LIKE '%.d.ts'",
232    )?;
233
234    for node in unreferenced_exports {
235        let (confidence, fp_risk) =
236            compute_confidence_and_fp(&node, &DeadReason::UnreferencedExport);
237        report.unreferenced_exports.push(DeadNode {
238            node,
239            reason: DeadReason::UnreferencedExport,
240            confidence,
241            false_positive_risk: fp_risk,
242        });
243    }
244
245    // Query 2: unreachable private functions — not exported, no CALLS edges to them.
246    let unreachable = query_nodes(
247        db,
248        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
249                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
250         FROM nodes n
251         WHERE n.kind = 'Function'
252         AND n.name != 'constructor'
253         AND COALESCE(n.exported, 0) = 0
254         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
255         AND n.path NOT LIKE '%test%'",
256    )?;
257
258    for node in unreachable {
259        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::Unreachable);
260        report.unreachable.push(DeadNode {
261            node,
262            reason: DeadReason::Unreachable,
263            confidence,
264            false_positive_risk: fp_risk,
265        });
266    }
267
268    // Query 3: unused variables — no CALLS edges point to them.
269    let unused_vars = query_nodes(
270        db,
271        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
272                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
273         FROM nodes n
274         WHERE n.kind = 'Variable'
275         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
276         AND n.path NOT LIKE '%test%'",
277    )?;
278
279    for node in unused_vars {
280        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::UnusedVariable);
281        report.unused_variables.push(DeadNode {
282            node,
283            reason: DeadReason::UnusedVariable,
284            confidence,
285            false_positive_risk: fp_risk,
286        });
287    }
288
289    // Query 4: disconnected nodes — no CALLS edges in either direction.
290    let disconnected = query_nodes(
291        db,
292        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
293                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
294         FROM nodes n
295         WHERE NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
296         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.src = n.id AND e.kind = 'CALLS')
297         AND n.kind NOT IN ('File','Module','Author')
298         AND n.path NOT LIKE '%test%'",
299    )?;
300
301    // Collect IDs already in other categories to avoid duplication
302    let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
303    for dn in &report.unreferenced_exports {
304        seen.insert(dn.node.id.clone());
305    }
306    for dn in &report.unreachable {
307        seen.insert(dn.node.id.clone());
308    }
309    for dn in &report.unused_variables {
310        seen.insert(dn.node.id.clone());
311    }
312
313    for node in disconnected {
314        if seen.contains(&node.id) {
315            continue;
316        }
317        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::Disconnected);
318        report.disconnected.push(DeadNode {
319            node,
320            reason: DeadReason::Disconnected,
321            confidence,
322            false_positive_risk: fp_risk,
323        });
324    }
325
326    // Query 5: zombie files — no IMPORTS edges point to them, but they export something.
327    // File in_degree = count of IMPORTS edges from other files, so 0 means never imported.
328    let zombie_files = query_nodes(
329        db,
330        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
331                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
332         FROM nodes n
333         WHERE n.kind = 'File'
334         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'IMPORTS')
335         AND EXISTS (SELECT 1 FROM edges e WHERE e.src = n.id)
336         AND regexp_extract(n.name, '[^/]+$') NOT IN ('index.ts','index.js','main.ts','main.rs','lib.rs','mod.rs','app.ts','__init__.py')
337         AND n.path NOT LIKE '%test%'",
338    )?;
339
340    for node in zombie_files {
341        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::ZombieFile);
342        report.zombie_files.push(DeadNode {
343            node,
344            reason: DeadReason::ZombieFile,
345            confidence,
346            false_positive_risk: fp_risk,
347        });
348    }
349
350    Ok(report)
351}
352
353/// Persist dead-code findings from a report back into the graph DB.
354///
355/// Sets `is_dead_candidate = true` and `dead_reason` on each flagged node so
356/// the information is available to queries and the web UI without re-running analysis.
357pub fn mark_dead_candidates(db: &GraphDb, report: &DeadCodeReport) -> anyhow::Result<()> {
358    let mut items: Vec<(String, String)> = Vec::new();
359    for dn in &report.unreferenced_exports {
360        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
361    }
362    for dn in &report.unreachable {
363        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
364    }
365    for dn in &report.unused_variables {
366        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
367    }
368    for dn in &report.disconnected {
369        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
370    }
371    for dn in &report.zombie_files {
372        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
373    }
374    db.mark_dead_candidates(&items)
375}