Skip to main content

cgx_engine/
deadcode.rs

1use crate::graph::{GraphDb, Node};
2
3#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
4pub struct DeadNode {
5    pub node: Node,
6    pub reason: DeadReason,
7    pub confidence: Confidence,
8    pub false_positive_risk: Option<String>,
9}
10
11#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
12#[serde(rename_all = "snake_case")]
13pub enum Confidence {
14    High,
15    Medium,
16    Low,
17}
18
19impl Confidence {
20    pub fn as_str(&self) -> &'static str {
21        match self {
22            Confidence::High => "high",
23            Confidence::Medium => "medium",
24            Confidence::Low => "low",
25        }
26    }
27}
28
29#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
30#[serde(rename_all = "snake_case")]
31pub enum DeadReason {
32    UnreferencedExport,
33    Unreachable,
34    UnusedVariable,
35    Disconnected,
36    ZombieFile,
37}
38
39impl DeadReason {
40    pub fn as_str(&self) -> &'static str {
41        match self {
42            DeadReason::UnreferencedExport => "unreferenced_export",
43            DeadReason::Unreachable => "unreachable",
44            DeadReason::UnusedVariable => "unused_variable",
45            DeadReason::Disconnected => "disconnected",
46            DeadReason::ZombieFile => "zombie_file",
47        }
48    }
49}
50
51#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
52pub struct DeadCodeReport {
53    pub unreferenced_exports: Vec<DeadNode>,
54    pub unreachable: Vec<DeadNode>,
55    pub unused_variables: Vec<DeadNode>,
56    pub disconnected: Vec<DeadNode>,
57    pub zombie_files: Vec<DeadNode>,
58}
59
60impl DeadCodeReport {
61    pub fn all_items(&self) -> Vec<&DeadNode> {
62        let mut all = Vec::new();
63        all.extend(self.unreferenced_exports.iter());
64        all.extend(self.unreachable.iter());
65        all.extend(self.unused_variables.iter());
66        all.extend(self.disconnected.iter());
67        all.extend(self.zombie_files.iter());
68        all
69    }
70
71    pub fn total(&self) -> usize {
72        self.unreferenced_exports.len()
73            + self.unreachable.len()
74            + self.unused_variables.len()
75            + self.disconnected.len()
76            + self.zombie_files.len()
77    }
78
79    pub fn count_by_confidence(&self) -> (usize, usize, usize) {
80        let mut high = 0;
81        let mut medium = 0;
82        let mut low = 0;
83        for item in self.all_items() {
84            match item.confidence {
85                Confidence::High => high += 1,
86                Confidence::Medium => medium += 1,
87                Confidence::Low => low += 1,
88            }
89        }
90        (high, medium, low)
91    }
92}
93
94fn query_nodes(db: &GraphDb, sql: &str) -> anyhow::Result<Vec<Node>> {
95    let mut stmt = db.conn.prepare(sql)?;
96    let rows = stmt.query_map([], |row| {
97        Ok(Node {
98            id: row.get(0)?,
99            kind: row.get(1)?,
100            name: row.get(2)?,
101            path: row.get(3)?,
102            line_start: row.get::<_, u32>(4)?,
103            line_end: row.get::<_, u32>(5)?,
104            language: row.get::<_, Option<String>>(6)?.unwrap_or_default(),
105            churn: row.get::<_, f64>(7)?,
106            coupling: row.get::<_, f64>(8)?,
107            community: row.get::<_, i64>(9)?,
108            in_degree: row.get::<_, i64>(10)?,
109            out_degree: row.get::<_, i64>(11)?,
110            exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
111            is_dead_candidate: false,
112            dead_reason: None,
113            complexity: 0.0,
114            is_test_file: false,
115            test_count: 0,
116            is_tested: false,
117        })
118    })?;
119    let mut result = Vec::new();
120    for row in rows {
121        result.push(row?);
122    }
123    Ok(result)
124}
125
126const FRAMEWORK_HOOKS: &[&str] = &[
127    "getServerSideProps",
128    "getStaticProps",
129    "loader",
130    "action",
131    "beforeEach",
132    "afterAll",
133    "getStaticPaths",
134];
135
136const ENTRY_POINT_NAMES: &[&str] = &["main", "init", "setup", "bootstrap", "start"];
137
138fn compute_confidence_and_fp(node: &Node, reason: &DeadReason) -> (Confidence, Option<String>) {
139    // Framework hooks -> Low
140    if FRAMEWORK_HOOKS.contains(&node.name.as_str()) {
141        return (
142            Confidence::Low,
143            Some("Framework hook — called by framework not by your code".to_string()),
144        );
145    }
146    // Entry point names
147    if ENTRY_POINT_NAMES.contains(&node.name.as_str()) {
148        return (
149            Confidence::Low,
150            Some("Common entry point name — verify before deleting".to_string()),
151        );
152    }
153    // Types/interfaces erased at runtime
154    if node.kind == "Type" || node.kind == "Interface" {
155        return (
156            Confidence::Low,
157            Some(
158                "Types erased at runtime — may be used by consuming TypeScript packages"
159                    .to_string(),
160            ),
161        );
162    }
163    // lib/ or dist/ files
164    if node.path.contains("/lib/")
165        || node.path.contains("/dist/")
166        || node.path.starts_with("lib/")
167        || node.path.starts_with("dist/")
168    {
169        return (
170            Confidence::Low,
171            Some("May be consumed externally by npm consumers".to_string()),
172        );
173    }
174
175    match reason {
176        DeadReason::Unreachable => (Confidence::High, None),
177        DeadReason::Disconnected => (Confidence::High, None),
178        DeadReason::UnreferencedExport => {
179            let filename = node.path.split('/').next_back().unwrap_or("");
180            if matches!(filename, "index.ts" | "index.js" | "lib.rs" | "mod.rs") {
181                (
182                    Confidence::Low,
183                    Some("May be consumed externally by npm consumers".to_string()),
184                )
185            } else {
186                (Confidence::High, None)
187            }
188        }
189        DeadReason::UnusedVariable => (Confidence::Medium, None),
190        DeadReason::ZombieFile => (Confidence::Medium, None),
191    }
192}
193
194pub fn detect_dead_code(db: &GraphDb) -> anyhow::Result<DeadCodeReport> {
195    let mut report = DeadCodeReport::default();
196
197    // Query 1: unreferenced exports — exported but no CALLS edges pointing at them.
198    // Uses NOT EXISTS on CALLS rather than in_degree=0 because all parsed nodes
199    // receive an EXPORTS edge from their containing file, making in_degree >= 1.
200    let unreferenced_exports = query_nodes(
201        db,
202        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
203                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
204         FROM nodes n
205         WHERE n.kind IN ('Function','Class','Variable','Type')
206         AND COALESCE(n.exported, 0) = 1
207         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
208         AND n.path NOT LIKE '%test%' AND n.path NOT LIKE '%spec%'
209         AND n.path NOT LIKE '%.d.ts'",
210    )?;
211
212    for node in unreferenced_exports {
213        let (confidence, fp_risk) =
214            compute_confidence_and_fp(&node, &DeadReason::UnreferencedExport);
215        report.unreferenced_exports.push(DeadNode {
216            node,
217            reason: DeadReason::UnreferencedExport,
218            confidence,
219            false_positive_risk: fp_risk,
220        });
221    }
222
223    // Query 2: unreachable private functions — not exported, no CALLS edges to them.
224    let unreachable = query_nodes(
225        db,
226        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
227                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
228         FROM nodes n
229         WHERE n.kind = 'Function'
230         AND n.name != 'constructor'
231         AND COALESCE(n.exported, 0) = 0
232         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
233         AND n.path NOT LIKE '%test%'",
234    )?;
235
236    for node in unreachable {
237        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::Unreachable);
238        report.unreachable.push(DeadNode {
239            node,
240            reason: DeadReason::Unreachable,
241            confidence,
242            false_positive_risk: fp_risk,
243        });
244    }
245
246    // Query 3: unused variables — no CALLS edges point to them.
247    let unused_vars = query_nodes(
248        db,
249        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
250                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
251         FROM nodes n
252         WHERE n.kind = 'Variable'
253         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
254         AND n.path NOT LIKE '%test%'",
255    )?;
256
257    for node in unused_vars {
258        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::UnusedVariable);
259        report.unused_variables.push(DeadNode {
260            node,
261            reason: DeadReason::UnusedVariable,
262            confidence,
263            false_positive_risk: fp_risk,
264        });
265    }
266
267    // Query 4: disconnected nodes — no CALLS edges in either direction.
268    let disconnected = query_nodes(
269        db,
270        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
271                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
272         FROM nodes n
273         WHERE NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
274         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.src = n.id AND e.kind = 'CALLS')
275         AND n.kind NOT IN ('File','Module','Author')
276         AND n.path NOT LIKE '%test%'",
277    )?;
278
279    // Collect IDs already in other categories to avoid duplication
280    let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
281    for dn in &report.unreferenced_exports {
282        seen.insert(dn.node.id.clone());
283    }
284    for dn in &report.unreachable {
285        seen.insert(dn.node.id.clone());
286    }
287    for dn in &report.unused_variables {
288        seen.insert(dn.node.id.clone());
289    }
290
291    for node in disconnected {
292        if seen.contains(&node.id) {
293            continue;
294        }
295        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::Disconnected);
296        report.disconnected.push(DeadNode {
297            node,
298            reason: DeadReason::Disconnected,
299            confidence,
300            false_positive_risk: fp_risk,
301        });
302    }
303
304    // Query 5: zombie files — no IMPORTS edges point to them, but they export something.
305    // File in_degree = count of IMPORTS edges from other files, so 0 means never imported.
306    let zombie_files = query_nodes(
307        db,
308        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
309                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
310         FROM nodes n
311         WHERE n.kind = 'File'
312         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'IMPORTS')
313         AND EXISTS (SELECT 1 FROM edges e WHERE e.src = n.id)
314         AND regexp_extract(n.name, '[^/]+$') NOT IN ('index.ts','index.js','main.ts','main.rs','lib.rs','mod.rs','app.ts','__init__.py')
315         AND n.path NOT LIKE '%test%'",
316    )?;
317
318    for node in zombie_files {
319        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::ZombieFile);
320        report.zombie_files.push(DeadNode {
321            node,
322            reason: DeadReason::ZombieFile,
323            confidence,
324            false_positive_risk: fp_risk,
325        });
326    }
327
328    Ok(report)
329}
330
331pub fn mark_dead_candidates(db: &GraphDb, report: &DeadCodeReport) -> anyhow::Result<()> {
332    let mut items: Vec<(String, String)> = Vec::new();
333    for dn in &report.unreferenced_exports {
334        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
335    }
336    for dn in &report.unreachable {
337        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
338    }
339    for dn in &report.unused_variables {
340        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
341    }
342    for dn in &report.disconnected {
343        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
344    }
345    for dn in &report.zombie_files {
346        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
347    }
348    db.mark_dead_candidates(&items)
349}