1use rusqlite::Connection;
8use serde::Serialize;
9
10#[derive(Debug, Clone, Serialize)]
11pub struct SmellFinding {
12 pub rule: &'static str,
13 pub severity: Severity,
14 pub file_path: String,
15 pub symbol: Option<String>,
16 pub line: Option<usize>,
17 pub message: String,
18 pub metric: Option<f64>,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
22#[serde(rename_all = "lowercase")]
23pub enum Severity {
24 Info,
25 Warning,
26 Error,
27}
28
29#[derive(Debug, Clone, Serialize)]
30pub struct SmellSummary {
31 pub rule: &'static str,
32 pub description: &'static str,
33 pub findings: usize,
34}
35
36pub struct SmellConfig {
37 pub long_function_lines: usize,
38 pub long_file_lines: usize,
39 pub god_file_symbols: usize,
40 pub fan_out_threshold: usize,
41}
42
43impl Default for SmellConfig {
44 fn default() -> Self {
45 Self {
46 long_function_lines: 100,
47 long_file_lines: 500,
48 god_file_symbols: 30,
49 fan_out_threshold: 15,
50 }
51 }
52}
53
54pub static RULES: &[(&str, &str)] = &[
55 ("dead_code", "Symbols defined but never referenced"),
56 ("long_function", "Functions exceeding line threshold"),
57 ("long_file", "Files exceeding line threshold"),
58 ("god_file", "Files with excessive symbol count"),
59 ("fan_out_skew", "Functions calling too many other symbols"),
60 (
61 "duplicate_definitions",
62 "Same symbol name defined in multiple files",
63 ),
64 (
65 "untested_function",
66 "Exported symbols without test coverage",
67 ),
68 (
69 "cyclomatic_complexity",
70 "Functions with high branching complexity",
71 ),
72];
73
74pub fn scan_all(conn: &Connection, cfg: &SmellConfig) -> Vec<SmellFinding> {
75 let mut all = Vec::new();
76 for &(rule, _) in RULES {
77 all.extend(scan_rule(conn, rule, cfg));
78 }
79 all
80}
81
82pub fn scan_rule(conn: &Connection, rule: &str, cfg: &SmellConfig) -> Vec<SmellFinding> {
83 match rule {
84 "dead_code" => detect_dead_code(conn),
85 "long_function" => detect_long_functions(conn, cfg.long_function_lines),
86 "long_file" => detect_long_files(conn, cfg.long_file_lines),
87 "god_file" => detect_god_files(conn, cfg.god_file_symbols),
88 "fan_out_skew" => detect_fan_out(conn, cfg.fan_out_threshold),
89 "duplicate_definitions" => detect_duplicate_definitions(conn),
90 "untested_function" => detect_untested(conn),
91 "cyclomatic_complexity" => detect_cyclomatic_complexity(conn),
92 _ => Vec::new(),
93 }
94}
95
96pub fn summarize(findings: &[SmellFinding]) -> Vec<SmellSummary> {
97 RULES
98 .iter()
99 .map(|&(rule, desc)| SmellSummary {
100 rule,
101 description: desc,
102 findings: findings.iter().filter(|f| f.rule == rule).count(),
103 })
104 .collect()
105}
106
107fn detect_dead_code(conn: &Connection) -> Vec<SmellFinding> {
108 let sql = "
109 SELECT n.name, n.file_path, n.line_start
110 FROM nodes n
111 WHERE n.kind = 'symbol'
112 AND n.file_path NOT LIKE '%test%'
113 AND n.file_path NOT LIKE '%spec%'
114 AND n.name NOT IN ('main', 'new', 'default', 'fmt', 'drop')
115 AND n.id NOT IN (
116 SELECT DISTINCT e.target_id FROM edges e
117 WHERE e.kind IN ('calls', 'type_ref', 'imports')
118 )
119 ORDER BY n.file_path, n.line_start
120 LIMIT 200
121 ";
122 query_findings(
123 conn,
124 sql,
125 "dead_code",
126 Severity::Warning,
127 |name, path, _line| format!("'{name}' defined in {path} but never referenced"),
128 )
129}
130
131fn detect_long_functions(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
132 let sql = format!(
133 "SELECT n.name, n.file_path, n.line_start,
134 (n.line_end - n.line_start) AS span
135 FROM nodes n
136 WHERE n.kind = 'symbol'
137 AND n.line_start IS NOT NULL
138 AND n.line_end IS NOT NULL
139 AND (n.line_end - n.line_start) > {threshold}
140 ORDER BY span DESC
141 LIMIT 100"
142 );
143 query_findings_with_metric(
144 conn,
145 &sql,
146 "long_function",
147 Severity::Warning,
148 |name, _path, _line, metric| {
149 format!("'{name}' is {metric:.0} lines (threshold: {threshold})")
150 },
151 )
152}
153
154fn detect_long_files(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
155 let sql = format!(
156 "SELECT n.name, n.file_path, NULL,
157 CAST(n.metadata AS INTEGER) AS line_count
158 FROM nodes n
159 WHERE n.kind = 'file'
160 AND n.metadata IS NOT NULL
161 AND CAST(n.metadata AS INTEGER) > {threshold}
162 ORDER BY line_count DESC
163 LIMIT 100"
164 );
165 query_findings_with_metric(
166 conn,
167 &sql,
168 "long_file",
169 Severity::Info,
170 |_name, path, _line, metric| {
171 format!("{path} has {metric:.0} lines (threshold: {threshold})")
172 },
173 )
174}
175
176fn detect_god_files(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
177 let sql = format!(
178 "SELECT COUNT(*) AS sym_count, n.file_path
179 FROM nodes n
180 WHERE n.kind = 'symbol'
181 GROUP BY n.file_path
182 HAVING sym_count > {threshold}
183 ORDER BY sym_count DESC
184 LIMIT 50"
185 );
186 let mut findings = Vec::new();
187 let Ok(mut stmt) = conn.prepare(&sql) else {
188 return findings;
189 };
190 let Ok(rows) = stmt.query_map([], |row| {
191 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
192 }) else {
193 return findings;
194 };
195 for row in rows.flatten() {
196 let (count, path) = row;
197 findings.push(SmellFinding {
198 rule: "god_file",
199 severity: Severity::Warning,
200 file_path: path.clone(),
201 symbol: None,
202 line: None,
203 message: format!("{path} has {count} symbols (threshold: {threshold})"),
204 metric: Some(count as f64),
205 });
206 }
207 findings
208}
209
210fn detect_fan_out(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
211 let sql = format!(
212 "SELECT n.name, n.file_path, n.line_start, COUNT(e.id) AS call_count
213 FROM nodes n
214 JOIN edges e ON e.source_id = n.id AND e.kind = 'calls'
215 WHERE n.kind = 'symbol'
216 GROUP BY n.id
217 HAVING call_count > {threshold}
218 ORDER BY call_count DESC
219 LIMIT 100"
220 );
221 query_findings_with_metric(
222 conn,
223 &sql,
224 "fan_out_skew",
225 Severity::Warning,
226 |name, _path, _line, metric| {
227 format!("'{name}' calls {metric:.0} symbols (threshold: {threshold})")
228 },
229 )
230}
231
232fn detect_duplicate_definitions(conn: &Connection) -> Vec<SmellFinding> {
233 let sql = "
234 SELECT n.name, GROUP_CONCAT(n.file_path, ', ') AS files, COUNT(*) AS cnt
235 FROM nodes n
236 WHERE n.kind = 'symbol'
237 AND n.name NOT IN ('new', 'default', 'fmt', 'from', 'into', 'drop', 'clone', 'eq')
238 GROUP BY n.name
239 HAVING cnt > 1
240 ORDER BY cnt DESC
241 LIMIT 50
242 ";
243 let mut findings = Vec::new();
244 let Ok(mut stmt) = conn.prepare(sql) else {
245 return findings;
246 };
247 let Ok(rows) = stmt.query_map([], |row| {
248 Ok((
249 row.get::<_, String>(0)?,
250 row.get::<_, String>(1)?,
251 row.get::<_, i64>(2)?,
252 ))
253 }) else {
254 return findings;
255 };
256 for row in rows.flatten() {
257 let (name, files, count) = row;
258 findings.push(SmellFinding {
259 rule: "duplicate_definitions",
260 severity: Severity::Info,
261 file_path: files.clone(),
262 symbol: Some(name.clone()),
263 line: None,
264 message: format!("'{name}' defined in {count} files: {files}"),
265 metric: Some(count as f64),
266 });
267 }
268 findings
269}
270
271fn detect_untested(conn: &Connection) -> Vec<SmellFinding> {
272 let sql = "
273 SELECT n.name, n.file_path, n.line_start
274 FROM nodes n
275 WHERE n.kind = 'symbol'
276 AND n.file_path NOT LIKE '%test%'
277 AND n.file_path NOT LIKE '%spec%'
278 AND n.metadata LIKE '%export%'
279 AND n.id NOT IN (
280 SELECT DISTINCT e.source_id FROM edges e WHERE e.kind = 'tested_by'
281 )
282 AND n.id NOT IN (
283 SELECT DISTINCT e.target_id FROM edges e WHERE e.kind = 'tested_by'
284 )
285 ORDER BY n.file_path, n.line_start
286 LIMIT 100
287 ";
288 query_findings(
289 conn,
290 sql,
291 "untested_function",
292 Severity::Info,
293 |name, path, _line| format!("'{name}' in {path} has no test coverage"),
294 )
295}
296
297fn detect_cyclomatic_complexity(conn: &Connection) -> Vec<SmellFinding> {
298 #[cfg(feature = "tree-sitter")]
299 {
300 detect_cyclomatic_tree_sitter(conn)
301 }
302 #[cfg(not(feature = "tree-sitter"))]
303 {
304 detect_cyclomatic_heuristic(conn)
305 }
306}
307
308#[cfg(not(feature = "tree-sitter"))]
310fn detect_cyclomatic_heuristic(conn: &Connection) -> Vec<SmellFinding> {
311 let sql = "
312 SELECT n.name, n.file_path, n.line_start,
313 (n.line_end - n.line_start) AS span,
314 (SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.kind = 'calls') AS calls
315 FROM nodes n
316 WHERE n.kind = 'symbol'
317 AND n.line_start IS NOT NULL
318 AND n.line_end IS NOT NULL
319 AND (n.line_end - n.line_start) > 20
320 ORDER BY (span * 0.3 + calls * 0.7) DESC
321 LIMIT 100
322 ";
323 let mut findings = Vec::new();
324 let Ok(mut stmt) = conn.prepare(sql) else {
325 return findings;
326 };
327 let Ok(rows) = stmt.query_map([], |row| {
328 Ok((
329 row.get::<_, String>(0)?,
330 row.get::<_, String>(1)?,
331 row.get::<_, Option<i64>>(2)?,
332 row.get::<_, i64>(3)?,
333 row.get::<_, i64>(4)?,
334 ))
335 }) else {
336 return findings;
337 };
338 for row in rows.flatten() {
339 let (name, path, line, span, calls) = row;
340 let complexity_proxy = (span as f64) * 0.3 + (calls as f64) * 0.7;
341 if complexity_proxy < 10.0 {
342 continue;
343 }
344 let severity = if complexity_proxy > 30.0 {
345 Severity::Error
346 } else if complexity_proxy > 20.0 {
347 Severity::Warning
348 } else {
349 Severity::Info
350 };
351 findings.push(SmellFinding {
352 rule: "cyclomatic_complexity",
353 severity,
354 file_path: path,
355 symbol: Some(name.clone()),
356 line: line.map(|l| l as usize),
357 message: format!(
358 "'{name}' complexity proxy {complexity_proxy:.1} (span={span}, calls={calls})"
359 ),
360 metric: Some(complexity_proxy),
361 });
362 }
363 findings
364}
365
366#[cfg(feature = "tree-sitter")]
367fn detect_cyclomatic_tree_sitter(conn: &Connection) -> Vec<SmellFinding> {
368 use std::collections::HashMap;
369 use std::path::Path;
370
371 const WARN_CC: u32 = 11;
372 const ERR_CC: u32 = 21;
373
374 let sql = "
375 SELECT DISTINCT n.file_path
376 FROM nodes n
377 WHERE n.kind = 'symbol'
378 AND n.file_path IS NOT NULL
379 AND length(trim(n.file_path)) > 0
380 LIMIT 400
381 ";
382 let mut paths = Vec::new();
383 let Ok(mut stmt) = conn.prepare(sql) else {
384 return Vec::new();
385 };
386 let Ok(rows) = stmt.query_map([], |row| row.get::<_, String>(0)) else {
387 return Vec::new();
388 };
389 for row in rows.flatten() {
390 paths.push(row);
391 }
392
393 let mut per_file: HashMap<String, Vec<crate::core::cyclomatic::FunctionComplexity>> =
394 HashMap::new();
395
396 for path in paths {
397 if per_file.contains_key(&path) {
398 continue;
399 }
400 let Ok(content) = std::fs::read_to_string(&path) else {
401 continue;
402 };
403 let Some(ext) = Path::new(&path).extension().and_then(|e| e.to_str()) else {
404 continue;
405 };
406 let Some(metrics) = crate::core::cyclomatic::cyclomatic_per_function(&content, ext) else {
407 continue;
408 };
409 per_file.insert(path, metrics);
410 }
411
412 let mut findings = Vec::new();
413 for (path, metrics) in per_file {
414 for m in metrics {
415 if m.cyclomatic < WARN_CC {
416 continue;
417 }
418 let severity = if m.cyclomatic >= ERR_CC {
419 Severity::Error
420 } else {
421 Severity::Warning
422 };
423 findings.push(SmellFinding {
424 rule: "cyclomatic_complexity",
425 severity,
426 file_path: path.clone(),
427 symbol: Some(m.name.clone()),
428 line: Some(m.line),
429 message: format!(
430 "'{}' cyclomatic complexity {} (thresholds: warning {WARN_CC}, error {ERR_CC})",
431 m.name, m.cyclomatic
432 ),
433 metric: Some(f64::from(m.cyclomatic)),
434 });
435 }
436 }
437
438 findings.sort_by(|a, b| {
439 b.metric
440 .unwrap_or(0.0)
441 .partial_cmp(&a.metric.unwrap_or(0.0))
442 .unwrap_or(std::cmp::Ordering::Equal)
443 });
444 findings.truncate(100);
445 findings
446}
447
448fn query_findings(
449 conn: &Connection,
450 sql: &str,
451 rule: &'static str,
452 severity: Severity,
453 msg_fn: impl Fn(&str, &str, Option<usize>) -> String,
454) -> Vec<SmellFinding> {
455 let mut findings = Vec::new();
456 let Ok(mut stmt) = conn.prepare(sql) else {
457 return findings;
458 };
459 let Ok(rows) = stmt.query_map([], |row| {
460 Ok((
461 row.get::<_, String>(0)?,
462 row.get::<_, String>(1)?,
463 row.get::<_, Option<i64>>(2)?,
464 ))
465 }) else {
466 return findings;
467 };
468 for row in rows.flatten() {
469 let (name, path, line) = row;
470 let line_usize = line.map(|l| l as usize);
471 findings.push(SmellFinding {
472 rule,
473 severity,
474 file_path: path.clone(),
475 symbol: Some(name.clone()),
476 line: line_usize,
477 message: msg_fn(&name, &path, line_usize),
478 metric: None,
479 });
480 }
481 findings
482}
483
484fn query_findings_with_metric(
485 conn: &Connection,
486 sql: &str,
487 rule: &'static str,
488 severity: Severity,
489 msg_fn: impl Fn(&str, &str, Option<usize>, f64) -> String,
490) -> Vec<SmellFinding> {
491 let mut findings = Vec::new();
492 let Ok(mut stmt) = conn.prepare(sql) else {
493 return findings;
494 };
495 let Ok(rows) = stmt.query_map([], |row| {
496 Ok((
497 row.get::<_, String>(0)?,
498 row.get::<_, String>(1)?,
499 row.get::<_, Option<i64>>(2)?,
500 row.get::<_, f64>(3)?,
501 ))
502 }) else {
503 return findings;
504 };
505 for row in rows.flatten() {
506 let (name, path, line, metric) = row;
507 let line_usize = line.map(|l| l as usize);
508 findings.push(SmellFinding {
509 rule,
510 severity,
511 file_path: path.clone(),
512 symbol: Some(name.clone()),
513 line: line_usize,
514 message: msg_fn(&name, &path, line_usize, metric),
515 metric: Some(metric),
516 });
517 }
518 findings
519}
520
521#[cfg(test)]
522mod tests {
523 use super::*;
524 use crate::core::property_graph::{CodeGraph, Edge, EdgeKind, Node, NodeKind};
525
526 fn setup_graph() -> CodeGraph {
527 let g = CodeGraph::open_in_memory().unwrap();
528
529 let file_a = g.upsert_node(&Node::file("src/main.rs")).unwrap();
530 let file_b = g.upsert_node(&Node::file("src/lib.rs")).unwrap();
531 let file_c = g
532 .upsert_node(&Node::file("src/utils.rs").with_metadata("600"))
533 .unwrap();
534
535 let sym_used = g
536 .upsert_node(
537 &Node::symbol("process", "src/lib.rs", NodeKind::Symbol).with_lines(10, 50),
538 )
539 .unwrap();
540 let sym_dead = g
541 .upsert_node(
542 &Node::symbol("unused_helper", "src/lib.rs", NodeKind::Symbol).with_lines(60, 80),
543 )
544 .unwrap();
545 let sym_long = g
546 .upsert_node(
547 &Node::symbol("mega_function", "src/utils.rs", NodeKind::Symbol).with_lines(1, 200),
548 )
549 .unwrap();
550
551 g.upsert_edge(&Edge::new(file_a, file_b, EdgeKind::Imports))
552 .unwrap();
553 g.upsert_edge(&Edge::new(file_a, sym_used, EdgeKind::Calls))
554 .unwrap();
555
556 let _ = sym_dead;
558 let _ = sym_long;
559 let _ = file_c;
560
561 g
562 }
563
564 #[test]
565 fn dead_code_detection() {
566 let g = setup_graph();
567 let findings = detect_dead_code(g.connection());
568 let dead: Vec<_> = findings
569 .iter()
570 .filter(|f| f.symbol.as_deref() == Some("unused_helper"))
571 .collect();
572 assert!(!dead.is_empty(), "Should detect unused_helper as dead code");
573 }
574
575 #[test]
576 fn long_function_detection() {
577 let g = setup_graph();
578 let findings = detect_long_functions(g.connection(), 100);
579 let long: Vec<_> = findings
580 .iter()
581 .filter(|f| f.symbol.as_deref() == Some("mega_function"))
582 .collect();
583 assert!(!long.is_empty(), "Should detect mega_function as too long");
584 }
585
586 #[test]
587 fn long_file_detection() {
588 let g = setup_graph();
589 let findings = detect_long_files(g.connection(), 500);
590 let long: Vec<_> = findings
591 .iter()
592 .filter(|f| f.file_path == "src/utils.rs")
593 .collect();
594 assert!(
595 !long.is_empty(),
596 "Should detect src/utils.rs as long file (600 lines)"
597 );
598 }
599
600 #[test]
601 fn scan_all_returns_findings() {
602 let g = setup_graph();
603 let cfg = SmellConfig::default();
604 let all = scan_all(g.connection(), &cfg);
605 assert!(!all.is_empty(), "Should find at least one smell");
606 }
607
608 #[test]
609 fn summarize_groups_by_rule() {
610 let g = setup_graph();
611 let cfg = SmellConfig::default();
612 let all = scan_all(g.connection(), &cfg);
613 let summary = summarize(&all);
614 assert_eq!(summary.len(), RULES.len());
615 for s in &summary {
616 assert!(!s.description.is_empty());
617 }
618 }
619}