1use rusqlite::Connection;
8use serde::Serialize;
9
10#[derive(Debug, Clone, Serialize)]
11pub struct SmellFinding {
12 pub rule: &'static str,
13 pub severity: Severity,
14 pub file_path: String,
15 pub symbol: Option<String>,
16 pub line: Option<usize>,
17 pub message: String,
18 pub metric: Option<f64>,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
22#[serde(rename_all = "lowercase")]
23pub enum Severity {
24 Info,
25 Warning,
26 Error,
27}
28
29#[derive(Debug, Clone, Serialize)]
30pub struct SmellSummary {
31 pub rule: &'static str,
32 pub description: &'static str,
33 pub findings: usize,
34}
35
36pub struct SmellConfig {
37 pub long_function_lines: usize,
38 pub long_file_lines: usize,
39 pub god_file_symbols: usize,
40 pub fan_out_threshold: usize,
41}
42
43impl Default for SmellConfig {
44 fn default() -> Self {
45 Self {
46 long_function_lines: 100,
47 long_file_lines: 500,
48 god_file_symbols: 30,
49 fan_out_threshold: 15,
50 }
51 }
52}
53
54pub static RULES: &[(&str, &str)] = &[
55 ("dead_code", "Symbols defined but never referenced"),
56 ("long_function", "Functions exceeding line threshold"),
57 ("long_file", "Files exceeding line threshold"),
58 ("god_file", "Files with excessive symbol count"),
59 ("fan_out_skew", "Functions calling too many other symbols"),
60 (
61 "duplicate_definitions",
62 "Same symbol name defined in multiple files",
63 ),
64 (
65 "untested_function",
66 "Exported symbols without test coverage",
67 ),
68 (
69 "cyclomatic_complexity",
70 "Functions with high branching complexity",
71 ),
72];
73
74pub fn scan_all(conn: &Connection, cfg: &SmellConfig) -> Vec<SmellFinding> {
75 let mut all = Vec::new();
76 for &(rule, _) in RULES {
77 all.extend(scan_rule(conn, rule, cfg));
78 }
79 all
80}
81
82pub fn scan_rule(conn: &Connection, rule: &str, cfg: &SmellConfig) -> Vec<SmellFinding> {
83 match rule {
84 "dead_code" => detect_dead_code(conn),
85 "long_function" => detect_long_functions(conn, cfg.long_function_lines),
86 "long_file" => detect_long_files(conn, cfg.long_file_lines),
87 "god_file" => detect_god_files(conn, cfg.god_file_symbols),
88 "fan_out_skew" => detect_fan_out(conn, cfg.fan_out_threshold),
89 "duplicate_definitions" => detect_duplicate_definitions(conn),
90 "untested_function" => detect_untested(conn),
91 "cyclomatic_complexity" => detect_cyclomatic_complexity(conn),
92 _ => Vec::new(),
93 }
94}
95
96pub fn summarize(findings: &[SmellFinding]) -> Vec<SmellSummary> {
97 RULES
98 .iter()
99 .map(|&(rule, desc)| SmellSummary {
100 rule,
101 description: desc,
102 findings: findings.iter().filter(|f| f.rule == rule).count(),
103 })
104 .collect()
105}
106
107fn detect_dead_code(conn: &Connection) -> Vec<SmellFinding> {
108 let sql = "
109 SELECT n.name, n.file_path, n.line_start
110 FROM nodes n
111 WHERE n.kind = 'symbol'
112 AND n.file_path NOT LIKE '%test%'
113 AND n.file_path NOT LIKE '%spec%'
114 AND n.name NOT IN ('main', 'new', 'default', 'fmt', 'drop')
115 AND n.id NOT IN (
116 SELECT DISTINCT e.target_id FROM edges e
117 WHERE e.kind IN ('calls', 'type_ref', 'imports')
118 )
119 ORDER BY n.file_path, n.line_start
120 LIMIT 200
121 ";
122 query_findings(
123 conn,
124 sql,
125 "dead_code",
126 Severity::Warning,
127 |name, path, _line| format!("'{name}' defined in {path} but never referenced"),
128 )
129}
130
131fn detect_long_functions(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
132 let sql = format!(
133 "SELECT n.name, n.file_path, n.line_start,
134 (n.line_end - n.line_start) AS span
135 FROM nodes n
136 WHERE n.kind = 'symbol'
137 AND n.line_start IS NOT NULL
138 AND n.line_end IS NOT NULL
139 AND (n.line_end - n.line_start) > {threshold}
140 ORDER BY span DESC
141 LIMIT 100"
142 );
143 query_findings_with_metric(
144 conn,
145 &sql,
146 "long_function",
147 Severity::Warning,
148 |name, _path, _line, metric| {
149 format!("'{name}' is {metric:.0} lines (threshold: {threshold})")
150 },
151 )
152}
153
154fn detect_long_files(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
155 let sql = format!(
156 "SELECT n.name, n.file_path, NULL,
157 CAST(n.metadata AS INTEGER) AS line_count
158 FROM nodes n
159 WHERE n.kind = 'file'
160 AND n.metadata IS NOT NULL
161 AND CAST(n.metadata AS INTEGER) > {threshold}
162 ORDER BY line_count DESC
163 LIMIT 100"
164 );
165 query_findings_with_metric(
166 conn,
167 &sql,
168 "long_file",
169 Severity::Info,
170 |_name, path, _line, metric| {
171 format!("{path} has {metric:.0} lines (threshold: {threshold})")
172 },
173 )
174}
175
176fn detect_god_files(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
177 let sql = format!(
178 "SELECT COUNT(*) AS sym_count, n.file_path
179 FROM nodes n
180 WHERE n.kind = 'symbol'
181 GROUP BY n.file_path
182 HAVING sym_count > {threshold}
183 ORDER BY sym_count DESC
184 LIMIT 50"
185 );
186 let mut findings = Vec::new();
187 let Ok(mut stmt) = conn.prepare(&sql) else {
188 return findings;
189 };
190 let Ok(rows) = stmt.query_map([], |row| {
191 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
192 }) else {
193 return findings;
194 };
195 for row in rows.flatten() {
196 let (count, path) = row;
197 findings.push(SmellFinding {
198 rule: "god_file",
199 severity: Severity::Warning,
200 file_path: path.clone(),
201 symbol: None,
202 line: None,
203 message: format!("{path} has {count} symbols (threshold: {threshold})"),
204 metric: Some(count as f64),
205 });
206 }
207 findings
208}
209
210fn detect_fan_out(conn: &Connection, threshold: usize) -> Vec<SmellFinding> {
211 let sql = format!(
212 "SELECT n.name, n.file_path, n.line_start, COUNT(e.id) AS call_count
213 FROM nodes n
214 JOIN edges e ON e.source_id = n.id AND e.kind = 'calls'
215 WHERE n.kind = 'symbol'
216 GROUP BY n.id
217 HAVING call_count > {threshold}
218 ORDER BY call_count DESC
219 LIMIT 100"
220 );
221 query_findings_with_metric(
222 conn,
223 &sql,
224 "fan_out_skew",
225 Severity::Warning,
226 |name, _path, _line, metric| {
227 format!("'{name}' calls {metric:.0} symbols (threshold: {threshold})")
228 },
229 )
230}
231
232fn detect_duplicate_definitions(conn: &Connection) -> Vec<SmellFinding> {
233 let sql = "
234 SELECT n.name, GROUP_CONCAT(n.file_path, ', ') AS files, COUNT(*) AS cnt
235 FROM nodes n
236 WHERE n.kind = 'symbol'
237 AND n.name NOT IN ('new', 'default', 'fmt', 'from', 'into', 'drop', 'clone', 'eq')
238 GROUP BY n.name
239 HAVING cnt > 1
240 ORDER BY cnt DESC
241 LIMIT 50
242 ";
243 let mut findings = Vec::new();
244 let Ok(mut stmt) = conn.prepare(sql) else {
245 return findings;
246 };
247 let Ok(rows) = stmt.query_map([], |row| {
248 Ok((
249 row.get::<_, String>(0)?,
250 row.get::<_, String>(1)?,
251 row.get::<_, i64>(2)?,
252 ))
253 }) else {
254 return findings;
255 };
256 for row in rows.flatten() {
257 let (name, files, count) = row;
258 findings.push(SmellFinding {
259 rule: "duplicate_definitions",
260 severity: Severity::Info,
261 file_path: files.clone(),
262 symbol: Some(name.clone()),
263 line: None,
264 message: format!("'{name}' defined in {count} files: {files}"),
265 metric: Some(count as f64),
266 });
267 }
268 findings
269}
270
271fn detect_untested(conn: &Connection) -> Vec<SmellFinding> {
272 let sql = "
273 SELECT n.name, n.file_path, n.line_start
274 FROM nodes n
275 WHERE n.kind = 'symbol'
276 AND n.file_path NOT LIKE '%test%'
277 AND n.file_path NOT LIKE '%spec%'
278 AND n.metadata LIKE '%export%'
279 AND n.id NOT IN (
280 SELECT DISTINCT e.source_id FROM edges e WHERE e.kind = 'tested_by'
281 )
282 AND n.id NOT IN (
283 SELECT DISTINCT e.target_id FROM edges e WHERE e.kind = 'tested_by'
284 )
285 ORDER BY n.file_path, n.line_start
286 LIMIT 100
287 ";
288 query_findings(
289 conn,
290 sql,
291 "untested_function",
292 Severity::Info,
293 |name, path, _line| format!("'{name}' in {path} has no test coverage"),
294 )
295}
296
297fn detect_cyclomatic_complexity(conn: &Connection) -> Vec<SmellFinding> {
298 let sql = "
303 SELECT n.name, n.file_path, n.line_start,
304 (n.line_end - n.line_start) AS span,
305 (SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.kind = 'calls') AS calls
306 FROM nodes n
307 WHERE n.kind = 'symbol'
308 AND n.line_start IS NOT NULL
309 AND n.line_end IS NOT NULL
310 AND (n.line_end - n.line_start) > 20
311 ORDER BY (span * 0.3 + calls * 0.7) DESC
312 LIMIT 100
313 ";
314 let mut findings = Vec::new();
315 let Ok(mut stmt) = conn.prepare(sql) else {
316 return findings;
317 };
318 let Ok(rows) = stmt.query_map([], |row| {
319 Ok((
320 row.get::<_, String>(0)?,
321 row.get::<_, String>(1)?,
322 row.get::<_, Option<i64>>(2)?,
323 row.get::<_, i64>(3)?,
324 row.get::<_, i64>(4)?,
325 ))
326 }) else {
327 return findings;
328 };
329 for row in rows.flatten() {
330 let (name, path, line, span, calls) = row;
331 let complexity_proxy = (span as f64) * 0.3 + (calls as f64) * 0.7;
332 if complexity_proxy < 10.0 {
333 continue;
334 }
335 let severity = if complexity_proxy > 30.0 {
336 Severity::Error
337 } else if complexity_proxy > 20.0 {
338 Severity::Warning
339 } else {
340 Severity::Info
341 };
342 findings.push(SmellFinding {
343 rule: "cyclomatic_complexity",
344 severity,
345 file_path: path,
346 symbol: Some(name.clone()),
347 line: line.map(|l| l as usize),
348 message: format!(
349 "'{name}' complexity proxy {complexity_proxy:.1} (span={span}, calls={calls})"
350 ),
351 metric: Some(complexity_proxy),
352 });
353 }
354 findings
355}
356
357fn query_findings(
358 conn: &Connection,
359 sql: &str,
360 rule: &'static str,
361 severity: Severity,
362 msg_fn: impl Fn(&str, &str, Option<usize>) -> String,
363) -> Vec<SmellFinding> {
364 let mut findings = Vec::new();
365 let Ok(mut stmt) = conn.prepare(sql) else {
366 return findings;
367 };
368 let Ok(rows) = stmt.query_map([], |row| {
369 Ok((
370 row.get::<_, String>(0)?,
371 row.get::<_, String>(1)?,
372 row.get::<_, Option<i64>>(2)?,
373 ))
374 }) else {
375 return findings;
376 };
377 for row in rows.flatten() {
378 let (name, path, line) = row;
379 let line_usize = line.map(|l| l as usize);
380 findings.push(SmellFinding {
381 rule,
382 severity,
383 file_path: path.clone(),
384 symbol: Some(name.clone()),
385 line: line_usize,
386 message: msg_fn(&name, &path, line_usize),
387 metric: None,
388 });
389 }
390 findings
391}
392
393fn query_findings_with_metric(
394 conn: &Connection,
395 sql: &str,
396 rule: &'static str,
397 severity: Severity,
398 msg_fn: impl Fn(&str, &str, Option<usize>, f64) -> String,
399) -> Vec<SmellFinding> {
400 let mut findings = Vec::new();
401 let Ok(mut stmt) = conn.prepare(sql) else {
402 return findings;
403 };
404 let Ok(rows) = stmt.query_map([], |row| {
405 Ok((
406 row.get::<_, String>(0)?,
407 row.get::<_, String>(1)?,
408 row.get::<_, Option<i64>>(2)?,
409 row.get::<_, f64>(3)?,
410 ))
411 }) else {
412 return findings;
413 };
414 for row in rows.flatten() {
415 let (name, path, line, metric) = row;
416 let line_usize = line.map(|l| l as usize);
417 findings.push(SmellFinding {
418 rule,
419 severity,
420 file_path: path.clone(),
421 symbol: Some(name.clone()),
422 line: line_usize,
423 message: msg_fn(&name, &path, line_usize, metric),
424 metric: Some(metric),
425 });
426 }
427 findings
428}
429
430#[cfg(test)]
431mod tests {
432 use super::*;
433 use crate::core::property_graph::{CodeGraph, Edge, EdgeKind, Node, NodeKind};
434
435 fn setup_graph() -> CodeGraph {
436 let g = CodeGraph::open_in_memory().unwrap();
437
438 let file_a = g.upsert_node(&Node::file("src/main.rs")).unwrap();
439 let file_b = g.upsert_node(&Node::file("src/lib.rs")).unwrap();
440 let file_c = g
441 .upsert_node(&Node::file("src/utils.rs").with_metadata("600"))
442 .unwrap();
443
444 let sym_used = g
445 .upsert_node(
446 &Node::symbol("process", "src/lib.rs", NodeKind::Symbol).with_lines(10, 50),
447 )
448 .unwrap();
449 let sym_dead = g
450 .upsert_node(
451 &Node::symbol("unused_helper", "src/lib.rs", NodeKind::Symbol).with_lines(60, 80),
452 )
453 .unwrap();
454 let sym_long = g
455 .upsert_node(
456 &Node::symbol("mega_function", "src/utils.rs", NodeKind::Symbol).with_lines(1, 200),
457 )
458 .unwrap();
459
460 g.upsert_edge(&Edge::new(file_a, file_b, EdgeKind::Imports))
461 .unwrap();
462 g.upsert_edge(&Edge::new(file_a, sym_used, EdgeKind::Calls))
463 .unwrap();
464
465 let _ = sym_dead;
467 let _ = sym_long;
468 let _ = file_c;
469
470 g
471 }
472
473 #[test]
474 fn dead_code_detection() {
475 let g = setup_graph();
476 let findings = detect_dead_code(g.connection());
477 let dead: Vec<_> = findings
478 .iter()
479 .filter(|f| f.symbol.as_deref() == Some("unused_helper"))
480 .collect();
481 assert!(!dead.is_empty(), "Should detect unused_helper as dead code");
482 }
483
484 #[test]
485 fn long_function_detection() {
486 let g = setup_graph();
487 let findings = detect_long_functions(g.connection(), 100);
488 let long: Vec<_> = findings
489 .iter()
490 .filter(|f| f.symbol.as_deref() == Some("mega_function"))
491 .collect();
492 assert!(!long.is_empty(), "Should detect mega_function as too long");
493 }
494
495 #[test]
496 fn long_file_detection() {
497 let g = setup_graph();
498 let findings = detect_long_files(g.connection(), 500);
499 let long: Vec<_> = findings
500 .iter()
501 .filter(|f| f.file_path == "src/utils.rs")
502 .collect();
503 assert!(
504 !long.is_empty(),
505 "Should detect src/utils.rs as long file (600 lines)"
506 );
507 }
508
509 #[test]
510 fn scan_all_returns_findings() {
511 let g = setup_graph();
512 let cfg = SmellConfig::default();
513 let all = scan_all(g.connection(), &cfg);
514 assert!(!all.is_empty(), "Should find at least one smell");
515 }
516
517 #[test]
518 fn summarize_groups_by_rule() {
519 let g = setup_graph();
520 let cfg = SmellConfig::default();
521 let all = scan_all(g.connection(), &cfg);
522 let summary = summarize(&all);
523 assert_eq!(summary.len(), RULES.len());
524 for s in &summary {
525 assert!(!s.description.is_empty());
526 }
527 }
528}