Skip to main content

lean_ctx/core/
smells.rs

1//! Code smell detection engine.
2//!
3//! Runs structural rules against the Property Graph (SQLite) and tree-sitter
4//! data to identify dead code, high complexity, god files, fan-out skew, etc.
5//! Each rule is a pure function: `&Connection -> Vec<SmellFinding>`.
6
7use rusqlite::Connection;
8use serde::Serialize;
9
10#[derive(Debug, Clone, Serialize)]
11pub struct SmellFinding {
12    pub rule: &'static str,
13    pub severity: Severity,
14    pub file_path: String,
15    pub symbol: Option<String>,
16    pub line: Option<usize>,
17    pub message: String,
18    pub metric: Option<f64>,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
22#[serde(rename_all = "lowercase")]
23pub enum Severity {
24    Info,
25    Warning,
26    Error,
27}
28
29#[derive(Debug, Clone, Serialize)]
30pub struct SmellSummary {
31    pub rule: &'static str,
32    pub description: &'static str,
33    pub findings: usize,
34}
35
36pub struct SmellConfig {
37    pub long_function_lines: usize,
38    pub long_file_lines: usize,
39    pub god_file_symbols: usize,
40    pub fan_out_threshold: usize,
41}
42
43impl Default for SmellConfig {
44    fn default() -> Self {
45        Self {
46            long_function_lines: 100,
47            long_file_lines: 500,
48            god_file_symbols: 30,
49            fan_out_threshold: 15,
50        }
51    }
52}
53
54pub static RULES: &[(&str, &str)] = &[
55    ("dead_code", "Symbols defined but never referenced"),
56    ("long_function", "Functions exceeding line threshold"),
57    ("long_file", "Files exceeding line threshold"),
58    ("god_file", "Files with excessive symbol count"),
59    ("fan_out_skew", "Functions calling too many other symbols"),
60    (
61        "duplicate_definitions",
62        "Same symbol name defined in multiple files",
63    ),
64    (
65        "untested_function",
66        "Exported symbols without test coverage",
67    ),
68    (
69        "cyclomatic_complexity",
70        "Functions with high branching complexity",
71    ),
72];
73
74pub fn scan_all(conn: &Connection, cfg: &SmellConfig) -> Vec<SmellFinding> {
75    let mut all = Vec::new();
76    for &(rule, _) in RULES {
77        all.extend(scan_rule(conn, rule, cfg));
78    }
79    all
80}
81
82pub fn scan_rule(conn: &Connection, rule: &str, cfg: &SmellConfig) -> Vec<SmellFinding> {
83    match rule {
84        "dead_code" => detect_dead_code(conn),
85        "long_function" => detect_long_functions(conn, cfg.long_function_lines),
86        "long_file" => detect_long_files(conn, cfg.long_file_lines),
87        "god_file" => detect_god_files(conn, cfg.god_file_symbols),
88        "fan_out_skew" => detect_fan_out(conn, cfg.fan_out_threshold),
89        "duplicate_definitions" => detect_duplicate_definitions(conn),
90        "untested_function" => detect_untested(conn),
91        "cyclomatic_complexity" => detect_cyclomatic_complexity(conn),
92        _ => Vec::new(),
93    }
94}
95
96pub fn summarize(findings: &[SmellFinding]) -> Vec<SmellSummary> {
97    RULES
98        .iter()
99        .map(|&(rule, desc)| SmellSummary {
100            rule,
101            description: desc,
102            findings: findings.iter().filter(|f| f.rule == rule).count(),
103        })
104        .collect()
105}
106
107fn detect_dead_code(conn: &Connection) -> Vec<SmellFinding> {
108    let sql = "
109        SELECT n.name, n.file_path, n.line_start
110        FROM nodes n
111        WHERE n.kind = 'symbol'
112          AND n.file_path NOT LIKE '%test%'
113          AND n.file_path NOT LIKE '%spec%'
114          AND n.name NOT IN ('main', 'new', 'default', 'fmt', 'drop')
115          AND n.id NOT IN (
116              SELECT DISTINCT e.target_id FROM edges e
117              WHERE e.kind IN ('calls', 'type_ref', 'imports')
118          )
119        ORDER BY n.file_path, n.line_start
120        LIMIT 200
121    ";
122    query_findings(
123        conn,
124        sql,
125        "dead_code",
126        Severity::Warning,
127        |name, path, _line| format!("'{name}' defined in {path} but never referenced"),
128    )
129}
130
131fn detect_long_functions(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
132    let sql = format!(
133        "SELECT n.name, n.file_path, n.line_start,
134                (n.line_end - n.line_start) AS span
135         FROM nodes n
136         WHERE n.kind = 'symbol'
137           AND n.line_start IS NOT NULL
138           AND n.line_end IS NOT NULL
139           AND (n.line_end - n.line_start) > {threshold}
140         ORDER BY span DESC
141         LIMIT 100"
142    );
143    query_findings_with_metric(
144        conn,
145        &sql,
146        "long_function",
147        Severity::Warning,
148        |name, _path, _line, metric| {
149            format!("'{name}' is {metric:.0} lines (threshold: {threshold})")
150        },
151    )
152}
153
154fn detect_long_files(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
155    let sql = format!(
156        "SELECT n.name, n.file_path, NULL,
157                CAST(n.metadata AS INTEGER) AS line_count
158         FROM nodes n
159         WHERE n.kind = 'file'
160           AND n.metadata IS NOT NULL
161           AND CAST(n.metadata AS INTEGER) > {threshold}
162         ORDER BY line_count DESC
163         LIMIT 100"
164    );
165    query_findings_with_metric(
166        conn,
167        &sql,
168        "long_file",
169        Severity::Info,
170        |_name, path, _line, metric| {
171            format!("{path} has {metric:.0} lines (threshold: {threshold})")
172        },
173    )
174}
175
176fn detect_god_files(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
177    let sql = format!(
178        "SELECT COUNT(*) AS sym_count, n.file_path
179         FROM nodes n
180         WHERE n.kind = 'symbol'
181         GROUP BY n.file_path
182         HAVING sym_count > {threshold}
183         ORDER BY sym_count DESC
184         LIMIT 50"
185    );
186    let mut findings = Vec::new();
187    let Ok(mut stmt) = conn.prepare(&sql) else {
188        return findings;
189    };
190    let Ok(rows) = stmt.query_map([], |row| {
191        Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
192    }) else {
193        return findings;
194    };
195    for row in rows.flatten() {
196        let (count, path) = row;
197        findings.push(SmellFinding {
198            rule: "god_file",
199            severity: Severity::Warning,
200            file_path: path.clone(),
201            symbol: None,
202            line: None,
203            message: format!("{path} has {count} symbols (threshold: {threshold})"),
204            metric: Some(count as f64),
205        });
206    }
207    findings
208}
209
210fn detect_fan_out(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
211    let sql = format!(
212        "SELECT n.name, n.file_path, n.line_start, COUNT(e.id) AS call_count
213         FROM nodes n
214         JOIN edges e ON e.source_id = n.id AND e.kind = 'calls'
215         WHERE n.kind = 'symbol'
216         GROUP BY n.id
217         HAVING call_count > {threshold}
218         ORDER BY call_count DESC
219         LIMIT 100"
220    );
221    query_findings_with_metric(
222        conn,
223        &sql,
224        "fan_out_skew",
225        Severity::Warning,
226        |name, _path, _line, metric| {
227            format!("'{name}' calls {metric:.0} symbols (threshold: {threshold})")
228        },
229    )
230}
231
232fn detect_duplicate_definitions(conn: &Connection) -> Vec<SmellFinding> {
233    let sql = "
234        SELECT n.name, GROUP_CONCAT(n.file_path, ', ') AS files, COUNT(*) AS cnt
235        FROM nodes n
236        WHERE n.kind = 'symbol'
237          AND n.name NOT IN ('new', 'default', 'fmt', 'from', 'into', 'drop', 'clone', 'eq')
238        GROUP BY n.name
239        HAVING cnt > 1
240        ORDER BY cnt DESC
241        LIMIT 50
242    ";
243    let mut findings = Vec::new();
244    let Ok(mut stmt) = conn.prepare(sql) else {
245        return findings;
246    };
247    let Ok(rows) = stmt.query_map([], |row| {
248        Ok((
249            row.get::<_, String>(0)?,
250            row.get::<_, String>(1)?,
251            row.get::<_, i64>(2)?,
252        ))
253    }) else {
254        return findings;
255    };
256    for row in rows.flatten() {
257        let (name, files, count) = row;
258        findings.push(SmellFinding {
259            rule: "duplicate_definitions",
260            severity: Severity::Info,
261            file_path: files.clone(),
262            symbol: Some(name.clone()),
263            line: None,
264            message: format!("'{name}' defined in {count} files: {files}"),
265            metric: Some(count as f64),
266        });
267    }
268    findings
269}
270
271fn detect_untested(conn: &Connection) -> Vec<SmellFinding> {
272    let sql = "
273        SELECT n.name, n.file_path, n.line_start
274        FROM nodes n
275        WHERE n.kind = 'symbol'
276          AND n.file_path NOT LIKE '%test%'
277          AND n.file_path NOT LIKE '%spec%'
278          AND n.metadata LIKE '%export%'
279          AND n.id NOT IN (
280              SELECT DISTINCT e.source_id FROM edges e WHERE e.kind = 'tested_by'
281          )
282          AND n.id NOT IN (
283              SELECT DISTINCT e.target_id FROM edges e WHERE e.kind = 'tested_by'
284          )
285        ORDER BY n.file_path, n.line_start
286        LIMIT 100
287    ";
288    query_findings(
289        conn,
290        sql,
291        "untested_function",
292        Severity::Info,
293        |name, path, _line| format!("'{name}' in {path} has no test coverage"),
294    )
295}
296
297fn detect_cyclomatic_complexity(conn: &Connection) -> Vec<SmellFinding> {
298    // Cyclomatic complexity requires source access (tree-sitter AST).
299    // We approximate via the node span + number of outgoing call edges
300    // as a proxy: functions with many calls AND large spans tend to have
301    // high branching complexity.
302    let sql = "
303        SELECT n.name, n.file_path, n.line_start,
304               (n.line_end - n.line_start) AS span,
305               (SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.kind = 'calls') AS calls
306        FROM nodes n
307        WHERE n.kind = 'symbol'
308          AND n.line_start IS NOT NULL
309          AND n.line_end IS NOT NULL
310          AND (n.line_end - n.line_start) > 20
311        ORDER BY (span * 0.3 + calls * 0.7) DESC
312        LIMIT 100
313    ";
314    let mut findings = Vec::new();
315    let Ok(mut stmt) = conn.prepare(sql) else {
316        return findings;
317    };
318    let Ok(rows) = stmt.query_map([], |row| {
319        Ok((
320            row.get::<_, String>(0)?,
321            row.get::<_, String>(1)?,
322            row.get::<_, Option<i64>>(2)?,
323            row.get::<_, i64>(3)?,
324            row.get::<_, i64>(4)?,
325        ))
326    }) else {
327        return findings;
328    };
329    for row in rows.flatten() {
330        let (name, path, line, span, calls) = row;
331        let complexity_proxy = (span as f64) * 0.3 + (calls as f64) * 0.7;
332        if complexity_proxy < 10.0 {
333            continue;
334        }
335        let severity = if complexity_proxy > 30.0 {
336            Severity::Error
337        } else if complexity_proxy > 20.0 {
338            Severity::Warning
339        } else {
340            Severity::Info
341        };
342        findings.push(SmellFinding {
343            rule: "cyclomatic_complexity",
344            severity,
345            file_path: path,
346            symbol: Some(name.clone()),
347            line: line.map(|l| l as usize),
348            message: format!(
349                "'{name}' complexity proxy {complexity_proxy:.1} (span={span}, calls={calls})"
350            ),
351            metric: Some(complexity_proxy),
352        });
353    }
354    findings
355}
356
357fn query_findings(
358    conn: &Connection,
359    sql: &str,
360    rule: &'static str,
361    severity: Severity,
362    msg_fn: impl Fn(&str, &str, Option<usize>) -> String,
363) -> Vec<SmellFinding> {
364    let mut findings = Vec::new();
365    let Ok(mut stmt) = conn.prepare(sql) else {
366        return findings;
367    };
368    let Ok(rows) = stmt.query_map([], |row| {
369        Ok((
370            row.get::<_, String>(0)?,
371            row.get::<_, String>(1)?,
372            row.get::<_, Option<i64>>(2)?,
373        ))
374    }) else {
375        return findings;
376    };
377    for row in rows.flatten() {
378        let (name, path, line) = row;
379        let line_usize = line.map(|l| l as usize);
380        findings.push(SmellFinding {
381            rule,
382            severity,
383            file_path: path.clone(),
384            symbol: Some(name.clone()),
385            line: line_usize,
386            message: msg_fn(&name, &path, line_usize),
387            metric: None,
388        });
389    }
390    findings
391}
392
393fn query_findings_with_metric(
394    conn: &Connection,
395    sql: &str,
396    rule: &'static str,
397    severity: Severity,
398    msg_fn: impl Fn(&str, &str, Option<usize>, f64) -> String,
399) -> Vec<SmellFinding> {
400    let mut findings = Vec::new();
401    let Ok(mut stmt) = conn.prepare(sql) else {
402        return findings;
403    };
404    let Ok(rows) = stmt.query_map([], |row| {
405        Ok((
406            row.get::<_, String>(0)?,
407            row.get::<_, String>(1)?,
408            row.get::<_, Option<i64>>(2)?,
409            row.get::<_, f64>(3)?,
410        ))
411    }) else {
412        return findings;
413    };
414    for row in rows.flatten() {
415        let (name, path, line, metric) = row;
416        let line_usize = line.map(|l| l as usize);
417        findings.push(SmellFinding {
418            rule,
419            severity,
420            file_path: path.clone(),
421            symbol: Some(name.clone()),
422            line: line_usize,
423            message: msg_fn(&name, &path, line_usize, metric),
424            metric: Some(metric),
425        });
426    }
427    findings
428}
429
430#[cfg(test)]
431mod tests {
432    use super::*;
433    use crate::core::property_graph::{CodeGraph, Edge, EdgeKind, Node, NodeKind};
434
435    fn setup_graph() -> CodeGraph {
436        let g = CodeGraph::open_in_memory().unwrap();
437
438        let file_a = g.upsert_node(&Node::file("src/main.rs")).unwrap();
439        let file_b = g.upsert_node(&Node::file("src/lib.rs")).unwrap();
440        let file_c = g
441            .upsert_node(&Node::file("src/utils.rs").with_metadata("600"))
442            .unwrap();
443
444        let sym_used = g
445            .upsert_node(
446                &Node::symbol("process", "src/lib.rs", NodeKind::Symbol).with_lines(10, 50),
447            )
448            .unwrap();
449        let sym_dead = g
450            .upsert_node(
451                &Node::symbol("unused_helper", "src/lib.rs", NodeKind::Symbol).with_lines(60, 80),
452            )
453            .unwrap();
454        let sym_long = g
455            .upsert_node(
456                &Node::symbol("mega_function", "src/utils.rs", NodeKind::Symbol).with_lines(1, 200),
457            )
458            .unwrap();
459
460        g.upsert_edge(&Edge::new(file_a, file_b, EdgeKind::Imports))
461            .unwrap();
462        g.upsert_edge(&Edge::new(file_a, sym_used, EdgeKind::Calls))
463            .unwrap();
464
465        // sym_dead has no incoming edges -> dead code
466        let _ = sym_dead;
467        let _ = sym_long;
468        let _ = file_c;
469
470        g
471    }
472
473    #[test]
474    fn dead_code_detection() {
475        let g = setup_graph();
476        let findings = detect_dead_code(g.connection());
477        let dead: Vec<_> = findings
478            .iter()
479            .filter(|f| f.symbol.as_deref() == Some("unused_helper"))
480            .collect();
481        assert!(!dead.is_empty(), "Should detect unused_helper as dead code");
482    }
483
484    #[test]
485    fn long_function_detection() {
486        let g = setup_graph();
487        let findings = detect_long_functions(g.connection(), 100);
488        let long: Vec<_> = findings
489            .iter()
490            .filter(|f| f.symbol.as_deref() == Some("mega_function"))
491            .collect();
492        assert!(!long.is_empty(), "Should detect mega_function as too long");
493    }
494
495    #[test]
496    fn long_file_detection() {
497        let g = setup_graph();
498        let findings = detect_long_files(g.connection(), 500);
499        let long: Vec<_> = findings
500            .iter()
501            .filter(|f| f.file_path == "src/utils.rs")
502            .collect();
503        assert!(
504            !long.is_empty(),
505            "Should detect src/utils.rs as long file (600 lines)"
506        );
507    }
508
509    #[test]
510    fn scan_all_returns_findings() {
511        let g = setup_graph();
512        let cfg = SmellConfig::default();
513        let all = scan_all(g.connection(), &cfg);
514        assert!(!all.is_empty(), "Should find at least one smell");
515    }
516
517    #[test]
518    fn summarize_groups_by_rule() {
519        let g = setup_graph();
520        let cfg = SmellConfig::default();
521        let all = scan_all(g.connection(), &cfg);
522        let summary = summarize(&all);
523        assert_eq!(summary.len(), RULES.len());
524        for s in &summary {
525            assert!(!s.description.is_empty());
526        }
527    }
528}