Skip to main content

cgx_engine/
deadcode.rs

1use crate::graph::{GraphDb, Node};
2
3#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
4pub struct DeadNode {
5    pub node: Node,
6    pub reason: DeadReason,
7    pub confidence: Confidence,
8    pub false_positive_risk: Option<String>,
9}
10
11#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
12#[serde(rename_all = "snake_case")]
13pub enum Confidence {
14    High,
15    Medium,
16    Low,
17}
18
19impl Confidence {
20    pub fn as_str(&self) -> &'static str {
21        match self {
22            Confidence::High => "high",
23            Confidence::Medium => "medium",
24            Confidence::Low => "low",
25        }
26    }
27}
28
29#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
30#[serde(rename_all = "snake_case")]
31pub enum DeadReason {
32    UnreferencedExport,
33    Unreachable,
34    UnusedVariable,
35    Disconnected,
36    ZombieFile,
37}
38
39impl DeadReason {
40    pub fn as_str(&self) -> &'static str {
41        match self {
42            DeadReason::UnreferencedExport => "unreferenced_export",
43            DeadReason::Unreachable => "unreachable",
44            DeadReason::UnusedVariable => "unused_variable",
45            DeadReason::Disconnected => "disconnected",
46            DeadReason::ZombieFile => "zombie_file",
47        }
48    }
49}
50
51#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
52pub struct DeadCodeReport {
53    pub unreferenced_exports: Vec<DeadNode>,
54    pub unreachable: Vec<DeadNode>,
55    pub unused_variables: Vec<DeadNode>,
56    pub disconnected: Vec<DeadNode>,
57    pub zombie_files: Vec<DeadNode>,
58}
59
60impl DeadCodeReport {
61    pub fn all_items(&self) -> Vec<&DeadNode> {
62        let mut all = Vec::new();
63        all.extend(self.unreferenced_exports.iter());
64        all.extend(self.unreachable.iter());
65        all.extend(self.unused_variables.iter());
66        all.extend(self.disconnected.iter());
67        all.extend(self.zombie_files.iter());
68        all
69    }
70
71    pub fn total(&self) -> usize {
72        self.unreferenced_exports.len()
73            + self.unreachable.len()
74            + self.unused_variables.len()
75            + self.disconnected.len()
76            + self.zombie_files.len()
77    }
78
79    pub fn count_by_confidence(&self) -> (usize, usize, usize) {
80        let mut high = 0;
81        let mut medium = 0;
82        let mut low = 0;
83        for item in self.all_items() {
84            match item.confidence {
85                Confidence::High => high += 1,
86                Confidence::Medium => medium += 1,
87                Confidence::Low => low += 1,
88            }
89        }
90        (high, medium, low)
91    }
92}
93
94fn query_nodes(db: &GraphDb, sql: &str) -> anyhow::Result<Vec<Node>> {
95    let mut stmt = db.conn.prepare(sql)?;
96    let rows = stmt.query_map([], |row| {
97        Ok(Node {
98            id: row.get(0)?,
99            kind: row.get(1)?,
100            name: row.get(2)?,
101            path: row.get(3)?,
102            line_start: row.get::<_, u32>(4)?,
103            line_end: row.get::<_, u32>(5)?,
104            language: row.get::<_, Option<String>>(6)?.unwrap_or_default(),
105            churn: row.get::<_, f64>(7)?,
106            coupling: row.get::<_, f64>(8)?,
107            community: row.get::<_, i64>(9)?,
108            in_degree: row.get::<_, i64>(10)?,
109            out_degree: row.get::<_, i64>(11)?,
110            exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
111            is_dead_candidate: false,
112            dead_reason: None,
113        })
114    })?;
115    let mut result = Vec::new();
116    for row in rows {
117        result.push(row?);
118    }
119    Ok(result)
120}
121
122const FRAMEWORK_HOOKS: &[&str] = &[
123    "getServerSideProps",
124    "getStaticProps",
125    "loader",
126    "action",
127    "beforeEach",
128    "afterAll",
129    "getStaticPaths",
130];
131
132const ENTRY_POINT_NAMES: &[&str] = &["main", "init", "setup", "bootstrap", "start"];
133
134fn compute_confidence_and_fp(node: &Node, reason: &DeadReason) -> (Confidence, Option<String>) {
135    // Framework hooks -> Low
136    if FRAMEWORK_HOOKS.contains(&node.name.as_str()) {
137        return (
138            Confidence::Low,
139            Some("Framework hook — called by framework not by your code".to_string()),
140        );
141    }
142    // Entry point names
143    if ENTRY_POINT_NAMES.contains(&node.name.as_str()) {
144        return (
145            Confidence::Low,
146            Some("Common entry point name — verify before deleting".to_string()),
147        );
148    }
149    // Types/interfaces erased at runtime
150    if node.kind == "Type" || node.kind == "Interface" {
151        return (
152            Confidence::Low,
153            Some(
154                "Types erased at runtime — may be used by consuming TypeScript packages"
155                    .to_string(),
156            ),
157        );
158    }
159    // lib/ or dist/ files
160    if node.path.contains("/lib/")
161        || node.path.contains("/dist/")
162        || node.path.starts_with("lib/")
163        || node.path.starts_with("dist/")
164    {
165        return (
166            Confidence::Low,
167            Some("May be consumed externally by npm consumers".to_string()),
168        );
169    }
170
171    match reason {
172        DeadReason::Unreachable => (Confidence::High, None),
173        DeadReason::Disconnected => (Confidence::High, None),
174        DeadReason::UnreferencedExport => {
175            let filename = node.path.split('/').next_back().unwrap_or("");
176            if matches!(filename, "index.ts" | "index.js" | "lib.rs" | "mod.rs") {
177                (
178                    Confidence::Low,
179                    Some("May be consumed externally by npm consumers".to_string()),
180                )
181            } else {
182                (Confidence::High, None)
183            }
184        }
185        DeadReason::UnusedVariable => (Confidence::Medium, None),
186        DeadReason::ZombieFile => (Confidence::Medium, None),
187    }
188}
189
190pub fn detect_dead_code(db: &GraphDb) -> anyhow::Result<DeadCodeReport> {
191    let mut report = DeadCodeReport::default();
192
193    // Query 1: unreferenced exports — exported but no CALLS edges pointing at them.
194    // Uses NOT EXISTS on CALLS rather than in_degree=0 because all parsed nodes
195    // receive an EXPORTS edge from their containing file, making in_degree >= 1.
196    let unreferenced_exports = query_nodes(
197        db,
198        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
199                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
200         FROM nodes n
201         WHERE n.kind IN ('Function','Class','Variable','Type')
202         AND COALESCE(n.exported, 0) = 1
203         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
204         AND n.path NOT LIKE '%test%' AND n.path NOT LIKE '%spec%'
205         AND n.path NOT LIKE '%.d.ts'",
206    )?;
207
208    for node in unreferenced_exports {
209        let (confidence, fp_risk) =
210            compute_confidence_and_fp(&node, &DeadReason::UnreferencedExport);
211        report.unreferenced_exports.push(DeadNode {
212            node,
213            reason: DeadReason::UnreferencedExport,
214            confidence,
215            false_positive_risk: fp_risk,
216        });
217    }
218
219    // Query 2: unreachable private functions — not exported, no CALLS edges to them.
220    let unreachable = query_nodes(
221        db,
222        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
223                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
224         FROM nodes n
225         WHERE n.kind = 'Function'
226         AND n.name != 'constructor'
227         AND COALESCE(n.exported, 0) = 0
228         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
229         AND n.path NOT LIKE '%test%'",
230    )?;
231
232    for node in unreachable {
233        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::Unreachable);
234        report.unreachable.push(DeadNode {
235            node,
236            reason: DeadReason::Unreachable,
237            confidence,
238            false_positive_risk: fp_risk,
239        });
240    }
241
242    // Query 3: unused variables — no CALLS edges point to them.
243    let unused_vars = query_nodes(
244        db,
245        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
246                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
247         FROM nodes n
248         WHERE n.kind = 'Variable'
249         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
250         AND n.path NOT LIKE '%test%'",
251    )?;
252
253    for node in unused_vars {
254        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::UnusedVariable);
255        report.unused_variables.push(DeadNode {
256            node,
257            reason: DeadReason::UnusedVariable,
258            confidence,
259            false_positive_risk: fp_risk,
260        });
261    }
262
263    // Query 4: disconnected nodes — no CALLS edges in either direction.
264    let disconnected = query_nodes(
265        db,
266        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
267                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
268         FROM nodes n
269         WHERE NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'CALLS')
270         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.src = n.id AND e.kind = 'CALLS')
271         AND n.kind NOT IN ('File','Module','Author')
272         AND n.path NOT LIKE '%test%'",
273    )?;
274
275    // Collect IDs already in other categories to avoid duplication
276    let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
277    for dn in &report.unreferenced_exports {
278        seen.insert(dn.node.id.clone());
279    }
280    for dn in &report.unreachable {
281        seen.insert(dn.node.id.clone());
282    }
283    for dn in &report.unused_variables {
284        seen.insert(dn.node.id.clone());
285    }
286
287    for node in disconnected {
288        if seen.contains(&node.id) {
289            continue;
290        }
291        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::Disconnected);
292        report.disconnected.push(DeadNode {
293            node,
294            reason: DeadReason::Disconnected,
295            confidence,
296            false_positive_risk: fp_risk,
297        });
298    }
299
300    // Query 5: zombie files — no IMPORTS edges point to them, but they export something.
301    // File in_degree = count of IMPORTS edges from other files, so 0 means never imported.
302    let zombie_files = query_nodes(
303        db,
304        "SELECT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn,
305                n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, 0)
306         FROM nodes n
307         WHERE n.kind = 'File'
308         AND NOT EXISTS (SELECT 1 FROM edges e WHERE e.dst = n.id AND e.kind = 'IMPORTS')
309         AND EXISTS (SELECT 1 FROM edges e WHERE e.src = n.id)
310         AND regexp_extract(n.name, '[^/]+$') NOT IN ('index.ts','index.js','main.ts','main.rs','lib.rs','mod.rs','app.ts','__init__.py')
311         AND n.path NOT LIKE '%test%'",
312    )?;
313
314    for node in zombie_files {
315        let (confidence, fp_risk) = compute_confidence_and_fp(&node, &DeadReason::ZombieFile);
316        report.zombie_files.push(DeadNode {
317            node,
318            reason: DeadReason::ZombieFile,
319            confidence,
320            false_positive_risk: fp_risk,
321        });
322    }
323
324    Ok(report)
325}
326
327pub fn mark_dead_candidates(db: &GraphDb, report: &DeadCodeReport) -> anyhow::Result<()> {
328    let mut items: Vec<(String, String)> = Vec::new();
329    for dn in &report.unreferenced_exports {
330        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
331    }
332    for dn in &report.unreachable {
333        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
334    }
335    for dn in &report.unused_variables {
336        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
337    }
338    for dn in &report.disconnected {
339        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
340    }
341    for dn in &report.zombie_files {
342        items.push((dn.node.id.clone(), dn.reason.as_str().to_string()));
343    }
344    db.mark_dead_candidates(&items)
345}