Skip to main content

lean_ctx/core/
smells.rs

1//! Code smell detection engine.
2//!
3//! Runs structural rules against the Property Graph (SQLite) and tree-sitter
4//! data to identify dead code, high complexity, god files, fan-out skew, etc.
5//! Each rule is a pure function: `&Connection -> Vec<SmellFinding>`.
6
7use rusqlite::Connection;
8use serde::Serialize;
9
10#[derive(Debug, Clone, Serialize)]
11pub struct SmellFinding {
12    pub rule: &'static str,
13    pub severity: Severity,
14    pub file_path: String,
15    pub symbol: Option<String>,
16    pub line: Option<usize>,
17    pub message: String,
18    pub metric: Option<f64>,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
22#[serde(rename_all = "lowercase")]
23pub enum Severity {
24    Info,
25    Warning,
26    Error,
27}
28
29#[derive(Debug, Clone, Serialize)]
30pub struct SmellSummary {
31    pub rule: &'static str,
32    pub description: &'static str,
33    pub findings: usize,
34}
35
36pub struct SmellConfig {
37    pub long_function_lines: usize,
38    pub long_file_lines: usize,
39    pub god_file_symbols: usize,
40    pub fan_out_threshold: usize,
41}
42
43impl Default for SmellConfig {
44    fn default() -> Self {
45        Self {
46            long_function_lines: 100,
47            long_file_lines: 500,
48            god_file_symbols: 30,
49            fan_out_threshold: 15,
50        }
51    }
52}
53
54pub static RULES: &[(&str, &str)] = &[
55    ("dead_code", "Symbols defined but never referenced"),
56    ("long_function", "Functions exceeding line threshold"),
57    ("long_file", "Files exceeding line threshold"),
58    ("god_file", "Files with excessive symbol count"),
59    ("fan_out_skew", "Functions calling too many other symbols"),
60    (
61        "duplicate_definitions",
62        "Same symbol name defined in multiple files",
63    ),
64    (
65        "untested_function",
66        "Exported symbols without test coverage",
67    ),
68    (
69        "cyclomatic_complexity",
70        "Functions with high branching complexity",
71    ),
72];
73
74pub fn scan_all(conn: &Connection, cfg: &SmellConfig) -> Vec<SmellFinding> {
75    let mut all = Vec::new();
76    for &(rule, _) in RULES {
77        all.extend(scan_rule(conn, rule, cfg));
78    }
79    all
80}
81
82pub fn scan_rule(conn: &Connection, rule: &str, cfg: &SmellConfig) -> Vec<SmellFinding> {
83    match rule {
84        "dead_code" => detect_dead_code(conn),
85        "long_function" => detect_long_functions(conn, cfg.long_function_lines),
86        "long_file" => detect_long_files(conn, cfg.long_file_lines),
87        "god_file" => detect_god_files(conn, cfg.god_file_symbols),
88        "fan_out_skew" => detect_fan_out(conn, cfg.fan_out_threshold),
89        "duplicate_definitions" => detect_duplicate_definitions(conn),
90        "untested_function" => detect_untested(conn),
91        "cyclomatic_complexity" => detect_cyclomatic_complexity(conn),
92        _ => Vec::new(),
93    }
94}
95
96pub fn summarize(findings: &[SmellFinding]) -> Vec<SmellSummary> {
97    RULES
98        .iter()
99        .map(|&(rule, desc)| SmellSummary {
100            rule,
101            description: desc,
102            findings: findings.iter().filter(|f| f.rule == rule).count(),
103        })
104        .collect()
105}
106
107fn detect_dead_code(conn: &Connection) -> Vec<SmellFinding> {
108    let sql = "
109        SELECT n.name, n.file_path, n.line_start
110        FROM nodes n
111        WHERE n.kind = 'symbol'
112          AND n.file_path NOT LIKE '%test%'
113          AND n.file_path NOT LIKE '%spec%'
114          AND n.name NOT IN ('main', 'new', 'default', 'fmt', 'drop')
115          AND n.id NOT IN (
116              SELECT DISTINCT e.target_id FROM edges e
117              WHERE e.kind IN ('calls', 'type_ref', 'imports')
118          )
119        ORDER BY n.file_path, n.line_start
120        LIMIT 200
121    ";
122    query_findings(
123        conn,
124        sql,
125        "dead_code",
126        Severity::Warning,
127        |name, path, _line| format!("'{name}' defined in {path} but never referenced"),
128    )
129}
130
131fn detect_long_functions(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
132    let sql = format!(
133        "SELECT n.name, n.file_path, n.line_start,
134                (n.line_end - n.line_start) AS span
135         FROM nodes n
136         WHERE n.kind = 'symbol'
137           AND n.line_start IS NOT NULL
138           AND n.line_end IS NOT NULL
139           AND (n.line_end - n.line_start) > {threshold}
140         ORDER BY span DESC
141         LIMIT 100"
142    );
143    query_findings_with_metric(
144        conn,
145        &sql,
146        "long_function",
147        Severity::Warning,
148        |name, _path, _line, metric| {
149            format!("'{name}' is {metric:.0} lines (threshold: {threshold})")
150        },
151    )
152}
153
154fn detect_long_files(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
155    let sql = format!(
156        "SELECT n.name, n.file_path, NULL,
157                CAST(n.metadata AS INTEGER) AS line_count
158         FROM nodes n
159         WHERE n.kind = 'file'
160           AND n.metadata IS NOT NULL
161           AND CAST(n.metadata AS INTEGER) > {threshold}
162         ORDER BY line_count DESC
163         LIMIT 100"
164    );
165    query_findings_with_metric(
166        conn,
167        &sql,
168        "long_file",
169        Severity::Info,
170        |_name, path, _line, metric| {
171            format!("{path} has {metric:.0} lines (threshold: {threshold})")
172        },
173    )
174}
175
176fn detect_god_files(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
177    let sql = format!(
178        "SELECT COUNT(*) AS sym_count, n.file_path
179         FROM nodes n
180         WHERE n.kind = 'symbol'
181         GROUP BY n.file_path
182         HAVING sym_count > {threshold}
183         ORDER BY sym_count DESC
184         LIMIT 50"
185    );
186    let mut findings = Vec::new();
187    let Ok(mut stmt) = conn.prepare(&sql) else {
188        return findings;
189    };
190    let Ok(rows) = stmt.query_map([], |row| {
191        Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
192    }) else {
193        return findings;
194    };
195    for row in rows.flatten() {
196        let (count, path) = row;
197        findings.push(SmellFinding {
198            rule: "god_file",
199            severity: Severity::Warning,
200            file_path: path.clone(),
201            symbol: None,
202            line: None,
203            message: format!("{path} has {count} symbols (threshold: {threshold})"),
204            metric: Some(count as f64),
205        });
206    }
207    findings
208}
209
210fn detect_fan_out(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
211    let sql = format!(
212        "SELECT n.name, n.file_path, n.line_start, COUNT(e.id) AS call_count
213         FROM nodes n
214         JOIN edges e ON e.source_id = n.id AND e.kind = 'calls'
215         WHERE n.kind = 'symbol'
216         GROUP BY n.id
217         HAVING call_count > {threshold}
218         ORDER BY call_count DESC
219         LIMIT 100"
220    );
221    query_findings_with_metric(
222        conn,
223        &sql,
224        "fan_out_skew",
225        Severity::Warning,
226        |name, _path, _line, metric| {
227            format!("'{name}' calls {metric:.0} symbols (threshold: {threshold})")
228        },
229    )
230}
231
232fn detect_duplicate_definitions(conn: &Connection) -> Vec<SmellFinding> {
233    let sql = "
234        SELECT n.name, GROUP_CONCAT(n.file_path, ', ') AS files, COUNT(*) AS cnt
235        FROM nodes n
236        WHERE n.kind = 'symbol'
237          AND n.name NOT IN ('new', 'default', 'fmt', 'from', 'into', 'drop', 'clone', 'eq')
238        GROUP BY n.name
239        HAVING cnt > 1
240        ORDER BY cnt DESC
241        LIMIT 50
242    ";
243    let mut findings = Vec::new();
244    let Ok(mut stmt) = conn.prepare(sql) else {
245        return findings;
246    };
247    let Ok(rows) = stmt.query_map([], |row| {
248        Ok((
249            row.get::<_, String>(0)?,
250            row.get::<_, String>(1)?,
251            row.get::<_, i64>(2)?,
252        ))
253    }) else {
254        return findings;
255    };
256    for row in rows.flatten() {
257        let (name, files, count) = row;
258        findings.push(SmellFinding {
259            rule: "duplicate_definitions",
260            severity: Severity::Info,
261            file_path: files.clone(),
262            symbol: Some(name.clone()),
263            line: None,
264            message: format!("'{name}' defined in {count} files: {files}"),
265            metric: Some(count as f64),
266        });
267    }
268    findings
269}
270
271fn detect_untested(conn: &Connection) -> Vec<SmellFinding> {
272    let sql = "
273        SELECT n.name, n.file_path, n.line_start
274        FROM nodes n
275        WHERE n.kind = 'symbol'
276          AND n.file_path NOT LIKE '%test%'
277          AND n.file_path NOT LIKE '%spec%'
278          AND n.metadata LIKE '%export%'
279          AND n.id NOT IN (
280              SELECT DISTINCT e.source_id FROM edges e WHERE e.kind = 'tested_by'
281          )
282          AND n.id NOT IN (
283              SELECT DISTINCT e.target_id FROM edges e WHERE e.kind = 'tested_by'
284          )
285        ORDER BY n.file_path, n.line_start
286        LIMIT 100
287    ";
288    query_findings(
289        conn,
290        sql,
291        "untested_function",
292        Severity::Info,
293        |name, path, _line| format!("'{name}' in {path} has no test coverage"),
294    )
295}
296
297fn detect_cyclomatic_complexity(conn: &Connection) -> Vec<SmellFinding> {
298    #[cfg(feature = "tree-sitter")]
299    {
300        detect_cyclomatic_tree_sitter(conn)
301    }
302    #[cfg(not(feature = "tree-sitter"))]
303    {
304        detect_cyclomatic_heuristic(conn)
305    }
306}
307
308/// Span × calls proxy when the `tree-sitter` feature is off (no AST available).
309#[cfg(not(feature = "tree-sitter"))]
310fn detect_cyclomatic_heuristic(conn: &Connection) -> Vec<SmellFinding> {
311    let sql = "
312        SELECT n.name, n.file_path, n.line_start,
313               (n.line_end - n.line_start) AS span,
314               (SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.kind = 'calls') AS calls
315        FROM nodes n
316        WHERE n.kind = 'symbol'
317          AND n.line_start IS NOT NULL
318          AND n.line_end IS NOT NULL
319          AND (n.line_end - n.line_start) > 20
320        ORDER BY (span * 0.3 + calls * 0.7) DESC
321        LIMIT 100
322    ";
323    let mut findings = Vec::new();
324    let Ok(mut stmt) = conn.prepare(sql) else {
325        return findings;
326    };
327    let Ok(rows) = stmt.query_map([], |row| {
328        Ok((
329            row.get::<_, String>(0)?,
330            row.get::<_, String>(1)?,
331            row.get::<_, Option<i64>>(2)?,
332            row.get::<_, i64>(3)?,
333            row.get::<_, i64>(4)?,
334        ))
335    }) else {
336        return findings;
337    };
338    for row in rows.flatten() {
339        let (name, path, line, span, calls) = row;
340        let complexity_proxy = (span as f64) * 0.3 + (calls as f64) * 0.7;
341        if complexity_proxy < 10.0 {
342            continue;
343        }
344        let severity = if complexity_proxy > 30.0 {
345            Severity::Error
346        } else if complexity_proxy > 20.0 {
347            Severity::Warning
348        } else {
349            Severity::Info
350        };
351        findings.push(SmellFinding {
352            rule: "cyclomatic_complexity",
353            severity,
354            file_path: path,
355            symbol: Some(name.clone()),
356            line: line.map(|l| l as usize),
357            message: format!(
358                "'{name}' complexity proxy {complexity_proxy:.1} (span={span}, calls={calls})"
359            ),
360            metric: Some(complexity_proxy),
361        });
362    }
363    findings
364}
365
366#[cfg(feature = "tree-sitter")]
367fn detect_cyclomatic_tree_sitter(conn: &Connection) -> Vec<SmellFinding> {
368    use std::collections::HashMap;
369    use std::path::Path;
370
371    const WARN_CC: u32 = 11;
372    const ERR_CC: u32 = 21;
373
374    let sql = "
375        SELECT DISTINCT n.file_path
376        FROM nodes n
377        WHERE n.kind = 'symbol'
378          AND n.file_path IS NOT NULL
379          AND length(trim(n.file_path)) > 0
380        LIMIT 400
381    ";
382    let mut paths = Vec::new();
383    let Ok(mut stmt) = conn.prepare(sql) else {
384        return Vec::new();
385    };
386    let Ok(rows) = stmt.query_map([], |row| row.get::<_, String>(0)) else {
387        return Vec::new();
388    };
389    for row in rows.flatten() {
390        paths.push(row);
391    }
392
393    let mut per_file: HashMap<String, Vec<crate::core::cyclomatic::FunctionComplexity>> =
394        HashMap::new();
395
396    for path in paths {
397        if per_file.contains_key(&path) {
398            continue;
399        }
400        let Ok(content) = std::fs::read_to_string(&path) else {
401            continue;
402        };
403        let Some(ext) = Path::new(&path).extension().and_then(|e| e.to_str()) else {
404            continue;
405        };
406        let Some(metrics) = crate::core::cyclomatic::cyclomatic_per_function(&content, ext) else {
407            continue;
408        };
409        per_file.insert(path, metrics);
410    }
411
412    let mut findings = Vec::new();
413    for (path, metrics) in per_file {
414        for m in metrics {
415            if m.cyclomatic < WARN_CC {
416                continue;
417            }
418            let severity = if m.cyclomatic >= ERR_CC {
419                Severity::Error
420            } else {
421                Severity::Warning
422            };
423            findings.push(SmellFinding {
424                rule: "cyclomatic_complexity",
425                severity,
426                file_path: path.clone(),
427                symbol: Some(m.name.clone()),
428                line: Some(m.line),
429                message: format!(
430                    "'{}' cyclomatic complexity {} (thresholds: warning {WARN_CC}, error {ERR_CC})",
431                    m.name, m.cyclomatic
432                ),
433                metric: Some(f64::from(m.cyclomatic)),
434            });
435        }
436    }
437
438    findings.sort_by(|a, b| {
439        b.metric
440            .unwrap_or(0.0)
441            .partial_cmp(&a.metric.unwrap_or(0.0))
442            .unwrap_or(std::cmp::Ordering::Equal)
443    });
444    findings.truncate(100);
445    findings
446}
447
448fn query_findings(
449    conn: &Connection,
450    sql: &str,
451    rule: &'static str,
452    severity: Severity,
453    msg_fn: impl Fn(&str, &str, Option<usize>) -> String,
454) -> Vec<SmellFinding> {
455    let mut findings = Vec::new();
456    let Ok(mut stmt) = conn.prepare(sql) else {
457        return findings;
458    };
459    let Ok(rows) = stmt.query_map([], |row| {
460        Ok((
461            row.get::<_, String>(0)?,
462            row.get::<_, String>(1)?,
463            row.get::<_, Option<i64>>(2)?,
464        ))
465    }) else {
466        return findings;
467    };
468    for row in rows.flatten() {
469        let (name, path, line) = row;
470        let line_usize = line.map(|l| l as usize);
471        findings.push(SmellFinding {
472            rule,
473            severity,
474            file_path: path.clone(),
475            symbol: Some(name.clone()),
476            line: line_usize,
477            message: msg_fn(&name, &path, line_usize),
478            metric: None,
479        });
480    }
481    findings
482}
483
484fn query_findings_with_metric(
485    conn: &Connection,
486    sql: &str,
487    rule: &'static str,
488    severity: Severity,
489    msg_fn: impl Fn(&str, &str, Option<usize>, f64) -> String,
490) -> Vec<SmellFinding> {
491    let mut findings = Vec::new();
492    let Ok(mut stmt) = conn.prepare(sql) else {
493        return findings;
494    };
495    let Ok(rows) = stmt.query_map([], |row| {
496        Ok((
497            row.get::<_, String>(0)?,
498            row.get::<_, String>(1)?,
499            row.get::<_, Option<i64>>(2)?,
500            row.get::<_, f64>(3)?,
501        ))
502    }) else {
503        return findings;
504    };
505    for row in rows.flatten() {
506        let (name, path, line, metric) = row;
507        let line_usize = line.map(|l| l as usize);
508        findings.push(SmellFinding {
509            rule,
510            severity,
511            file_path: path.clone(),
512            symbol: Some(name.clone()),
513            line: line_usize,
514            message: msg_fn(&name, &path, line_usize, metric),
515            metric: Some(metric),
516        });
517    }
518    findings
519}
520
521#[cfg(test)]
522mod tests {
523    use super::*;
524    use crate::core::property_graph::{CodeGraph, Edge, EdgeKind, Node, NodeKind};
525
526    fn setup_graph() -> CodeGraph {
527        let g = CodeGraph::open_in_memory().unwrap();
528
529        let file_a = g.upsert_node(&Node::file("src/main.rs")).unwrap();
530        let file_b = g.upsert_node(&Node::file("src/lib.rs")).unwrap();
531        let file_c = g
532            .upsert_node(&Node::file("src/utils.rs").with_metadata("600"))
533            .unwrap();
534
535        let sym_used = g
536            .upsert_node(
537                &Node::symbol("process", "src/lib.rs", NodeKind::Symbol).with_lines(10, 50),
538            )
539            .unwrap();
540        let sym_dead = g
541            .upsert_node(
542                &Node::symbol("unused_helper", "src/lib.rs", NodeKind::Symbol).with_lines(60, 80),
543            )
544            .unwrap();
545        let sym_long = g
546            .upsert_node(
547                &Node::symbol("mega_function", "src/utils.rs", NodeKind::Symbol).with_lines(1, 200),
548            )
549            .unwrap();
550
551        g.upsert_edge(&Edge::new(file_a, file_b, EdgeKind::Imports))
552            .unwrap();
553        g.upsert_edge(&Edge::new(file_a, sym_used, EdgeKind::Calls))
554            .unwrap();
555
556        // sym_dead has no incoming edges -> dead code
557        let _ = sym_dead;
558        let _ = sym_long;
559        let _ = file_c;
560
561        g
562    }
563
564    #[test]
565    fn dead_code_detection() {
566        let g = setup_graph();
567        let findings = detect_dead_code(g.connection());
568        let dead: Vec<_> = findings
569            .iter()
570            .filter(|f| f.symbol.as_deref() == Some("unused_helper"))
571            .collect();
572        assert!(!dead.is_empty(), "Should detect unused_helper as dead code");
573    }
574
575    #[test]
576    fn long_function_detection() {
577        let g = setup_graph();
578        let findings = detect_long_functions(g.connection(), 100);
579        let long: Vec<_> = findings
580            .iter()
581            .filter(|f| f.symbol.as_deref() == Some("mega_function"))
582            .collect();
583        assert!(!long.is_empty(), "Should detect mega_function as too long");
584    }
585
586    #[test]
587    fn long_file_detection() {
588        let g = setup_graph();
589        let findings = detect_long_files(g.connection(), 500);
590        let long: Vec<_> = findings
591            .iter()
592            .filter(|f| f.file_path == "src/utils.rs")
593            .collect();
594        assert!(
595            !long.is_empty(),
596            "Should detect src/utils.rs as long file (600 lines)"
597        );
598    }
599
600    #[test]
601    fn scan_all_returns_findings() {
602        let g = setup_graph();
603        let cfg = SmellConfig::default();
604        let all = scan_all(g.connection(), &cfg);
605        assert!(!all.is_empty(), "Should find at least one smell");
606    }
607
608    #[test]
609    fn summarize_groups_by_rule() {
610        let g = setup_graph();
611        let cfg = SmellConfig::default();
612        let all = scan_all(g.connection(), &cfg);
613        let summary = summarize(&all);
614        assert_eq!(summary.len(), RULES.len());
615        for s in &summary {
616            assert!(!s.description.is_empty());
617        }
618    }
619}