1use std::collections::{HashMap, HashSet};
4use std::fs;
5use std::path::{Path, PathBuf};
6use std::process::Command;
7
8use anyhow::{bail, Context, Result};
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use tree_sitter::{Node, Parser};
12
13const MAX_FILE_BYTES: u64 = 1024 * 1024;
14
15#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
16pub enum BullshitKind {
17 FakeComplexity,
18 CargoCult,
19 OverEngineering,
20 ArcAbuse,
21 RwLockAbuse,
22 SleepAbuse,
23 UnwrapAbuse,
24 DynTraitAbuse,
25 CloneAbuse,
26 MutexAbuse,
27}
28
29impl BullshitKind {
30 fn label(self) -> &'static str {
31 match self {
32 Self::FakeComplexity => "fake complexity",
33 Self::CargoCult => "cargo cult",
34 Self::OverEngineering => "over-engineering",
35 Self::ArcAbuse => "Arc abuse",
36 Self::RwLockAbuse => "RwLock abuse",
37 Self::SleepAbuse => "sleep abuse",
38 Self::UnwrapAbuse => "unwrap abuse",
39 Self::DynTraitAbuse => "dyn trait abuse",
40 Self::CloneAbuse => "clone abuse",
41 Self::MutexAbuse => "mutex abuse",
42 }
43 }
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct BullshitAlert {
48 pub kind: BullshitKind,
49 pub confidence: f32,
50 pub severity: f32,
51 pub file: PathBuf,
52 pub line: usize,
53 pub column: usize,
54 pub context_snippet: String,
55 pub why_bs: String,
56 pub suggestion: String,
57}
58
59#[derive(Debug, Clone)]
60pub struct CodeAuditConfig {
61 pub confidence_threshold: f32,
62 pub max_file_bytes: u64,
63 pub ignore_paths: Vec<String>,
64 pub ignore_kinds: HashSet<String>,
65}
66
67impl Default for CodeAuditConfig {
68 fn default() -> Self {
69 Self {
70 confidence_threshold: 0.60,
71 max_file_bytes: MAX_FILE_BYTES,
72 ignore_paths: Vec::new(),
73 ignore_kinds: HashSet::new(),
74 }
75 }
76}
77
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct CodeAuditReport {
80 pub files_scanned: usize,
81 pub alerts: Vec<BullshitAlert>,
82}
83
84impl CodeAuditReport {
85 pub fn is_clean(&self) -> bool {
86 self.alerts.is_empty()
87 }
88}
89
90pub fn scan_project(
91 manifest_path: Option<&Path>,
92 config: &CodeAuditConfig,
93) -> Result<CodeAuditReport> {
94 scan_project_with_filter(manifest_path, config, None)
95}
96
97pub fn scan_git_diff(
98 manifest_path: Option<&Path>,
99 config: &CodeAuditConfig,
100) -> Result<CodeAuditReport> {
101 let base_dir = project_base_dir(manifest_path);
102 let filter = DiffFilter::from_git_diff(base_dir)?;
103 scan_project_with_filter(manifest_path, config, Some(&filter))
104}
105
106fn scan_project_with_filter(
107 manifest_path: Option<&Path>,
108 config: &CodeAuditConfig,
109 diff_filter: Option<&DiffFilter>,
110) -> Result<CodeAuditReport> {
111 let base_dir = manifest_path
112 .and_then(Path::parent)
113 .filter(|p| !p.as_os_str().is_empty())
114 .unwrap_or_else(|| Path::new("."));
115
116 let mut files = Vec::new();
117 for dir in ["src", "tests", "examples", "benches"] {
118 collect_rust_files(&base_dir.join(dir), config, &mut files)?;
119 }
120
121 let mut alerts = Vec::new();
122 for file in &files {
123 if is_ignored_path(file, config) {
124 continue;
125 }
126 let code = fs::read_to_string(file)
127 .with_context(|| format!("failed to read {}", file.display()))?;
128 let mut file_alerts = scan_code(&code, file, config)?;
129 if let Some(filter) = diff_filter {
130 file_alerts.retain(|alert| filter.includes(alert));
131 }
132 alerts.extend(file_alerts);
133 }
134
135 alerts.sort_by(|a, b| {
136 b.severity
137 .partial_cmp(&a.severity)
138 .unwrap_or(std::cmp::Ordering::Equal)
139 .then_with(|| a.file.cmp(&b.file))
140 .then_with(|| a.line.cmp(&b.line))
141 });
142
143 Ok(CodeAuditReport {
144 files_scanned: files.len(),
145 alerts,
146 })
147}
148
149pub fn scan_code(
150 code: &str,
151 file: impl Into<PathBuf>,
152 config: &CodeAuditConfig,
153) -> Result<Vec<BullshitAlert>> {
154 let file = file.into();
155 if is_ignored_path(&file, config) {
156 return Ok(Vec::new());
157 }
158
159 let ignored_ranges = parse_ignored_ranges(code).unwrap_or_default();
160 let masked = mask_ranges(code, &ignored_ranges);
161 let mut alerts = Vec::new();
162
163 scan_regex_patterns(&masked, &file, &mut alerts)?;
164 scan_line_patterns(&masked, &file, &mut alerts);
165 scan_function_complexity(&masked, &file, &mut alerts);
166
167 alerts.retain(|alert| alert.confidence >= config.confidence_threshold);
168 alerts.retain(|alert| !config.ignore_kinds.contains(&format!("{:?}", alert.kind)));
169 dedupe_alerts(&mut alerts);
170 Ok(alerts)
171}
172
173pub fn config_from_policy(policy: Option<&crate::policy::Policy>) -> CodeAuditConfig {
174 let mut config = CodeAuditConfig::default();
175 if let Some(policy) = policy {
176 config.ignore_paths = policy.code_audit.ignore_paths.clone();
177 config.ignore_kinds = policy.code_audit.ignore_kinds.iter().cloned().collect();
178 }
179 config
180}
181
182fn project_base_dir(manifest_path: Option<&Path>) -> &Path {
183 manifest_path
184 .and_then(Path::parent)
185 .filter(|p| !p.as_os_str().is_empty())
186 .unwrap_or_else(|| Path::new("."))
187}
188
189fn is_ignored_path(path: &Path, config: &CodeAuditConfig) -> bool {
190 let path = path.to_string_lossy();
191 config
192 .ignore_paths
193 .iter()
194 .any(|pattern| path.contains(pattern))
195}
196
197fn collect_rust_files(
198 dir: &Path,
199 config: &CodeAuditConfig,
200 files: &mut Vec<PathBuf>,
201) -> Result<()> {
202 if !dir.exists() {
203 return Ok(());
204 }
205
206 for entry in fs::read_dir(dir).with_context(|| format!("failed to read {}", dir.display()))? {
207 let entry = entry?;
208 let path = entry.path();
209 let name = entry.file_name();
210 let name = name.to_string_lossy();
211
212 if path.is_dir() {
213 if should_skip_dir(&name) {
214 continue;
215 }
216 collect_rust_files(&path, config, files)?;
217 continue;
218 }
219
220 if path.extension().and_then(|e| e.to_str()) != Some("rs") {
221 continue;
222 }
223
224 let metadata = entry.metadata()?;
225 if metadata.len() <= config.max_file_bytes {
226 files.push(path);
227 }
228 }
229
230 Ok(())
231}
232
233fn should_skip_dir(name: &str) -> bool {
234 name.starts_with('.')
235 || matches!(
236 name,
237 "target" | "vendor" | "node_modules" | "dist" | "build" | "third_party"
238 )
239}
240
241#[derive(Debug)]
242struct DiffFilter {
243 base_dir: PathBuf,
244 changed_lines: HashMap<PathBuf, Vec<(usize, usize)>>,
245}
246
247impl DiffFilter {
248 fn from_git_diff(base_dir: &Path) -> Result<Self> {
249 let output = Command::new("git")
250 .arg("-C")
251 .arg(base_dir)
252 .arg("diff")
253 .arg("HEAD")
254 .arg("--unified=0")
255 .arg("--")
256 .output()
257 .with_context(|| "failed to run git diff HEAD --unified=0")?;
258
259 if !output.status.success() {
260 bail!(
261 "git diff failed: {}",
262 String::from_utf8_lossy(&output.stderr).trim()
263 );
264 }
265
266 Ok(Self {
267 base_dir: base_dir.to_path_buf(),
268 changed_lines: parse_changed_lines(&String::from_utf8_lossy(&output.stdout)),
269 })
270 }
271
272 fn includes(&self, alert: &BullshitAlert) -> bool {
273 let path = alert
274 .file
275 .strip_prefix(&self.base_dir)
276 .map(Path::to_path_buf)
277 .unwrap_or_else(|_| alert.file.clone());
278 let path = normalize_diff_path(&path);
279 self.changed_lines.get(&path).is_some_and(|ranges| {
280 ranges
281 .iter()
282 .any(|(start, end)| alert.line >= *start && alert.line <= *end)
283 })
284 }
285}
286
287fn parse_changed_lines(diff: &str) -> HashMap<PathBuf, Vec<(usize, usize)>> {
288 let mut current_file: Option<PathBuf> = None;
289 let mut changed = HashMap::<PathBuf, Vec<(usize, usize)>>::new();
290
291 for line in diff.lines() {
292 if let Some(path) = line.strip_prefix("+++ b/") {
293 current_file = Some(PathBuf::from(path));
294 continue;
295 }
296 if line.starts_with("+++ /dev/null") {
297 current_file = None;
298 continue;
299 }
300
301 if let (Some(file), Some(range)) = (current_file.as_ref(), parse_hunk_new_range(line)) {
302 changed.entry(file.clone()).or_default().push(range);
303 }
304 }
305
306 changed
307}
308
309fn parse_hunk_new_range(line: &str) -> Option<(usize, usize)> {
310 let hunk = line.strip_prefix("@@ ")?;
311 let plus = hunk.split_whitespace().find(|part| part.starts_with('+'))?;
312 let plus = plus.trim_start_matches('+');
313 let (start, count) = plus
314 .split_once(',')
315 .map(|(start, count)| (start, count.parse::<usize>().ok()))
316 .unwrap_or((plus, Some(1)));
317 let start = start.parse::<usize>().ok()?;
318 let count = count?;
319 if count == 0 {
320 None
321 } else {
322 Some((start, start + count - 1))
323 }
324}
325
326fn normalize_diff_path(path: &Path) -> PathBuf {
327 let mut normalized = PathBuf::new();
328 for component in path.components() {
329 match component {
330 std::path::Component::CurDir => {}
331 other => normalized.push(other.as_os_str()),
332 }
333 }
334 normalized
335}
336
337fn parse_ignored_ranges(code: &str) -> Result<Vec<(usize, usize)>> {
338 let mut parser = Parser::new();
339 parser
340 .set_language(&tree_sitter_rust::LANGUAGE.into())
341 .map_err(|err| anyhow::anyhow!("failed to load Rust tree-sitter grammar: {err}"))?;
342 let tree = parser
343 .parse(code, None)
344 .ok_or_else(|| anyhow::anyhow!("tree-sitter failed to parse Rust source"))?;
345
346 let mut ranges = Vec::new();
347 collect_ignored_ranges(tree.root_node(), &mut ranges);
348 Ok(ranges)
349}
350
351fn collect_ignored_ranges(node: Node<'_>, ranges: &mut Vec<(usize, usize)>) {
352 if is_ignored_node(node.kind()) {
353 ranges.push((node.start_byte(), node.end_byte()));
354 return;
355 }
356
357 let mut cursor = node.walk();
358 for child in node.children(&mut cursor) {
359 collect_ignored_ranges(child, ranges);
360 }
361}
362
363fn is_ignored_node(kind: &str) -> bool {
364 matches!(
365 kind,
366 "line_comment" | "block_comment" | "string_literal" | "raw_string_literal" | "char_literal"
367 )
368}
369
370fn mask_ranges(code: &str, ranges: &[(usize, usize)]) -> String {
371 let mut bytes = code.as_bytes().to_vec();
372 for (start, end) in ranges {
373 for idx in *start..*end {
374 if let Some(byte) = bytes.get_mut(idx) {
375 if *byte != b'\n' {
376 *byte = b' ';
377 }
378 }
379 }
380 }
381 String::from_utf8(bytes).unwrap_or_else(|_| code.to_string())
382}
383
384fn scan_regex_patterns(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) -> Result<()> {
385 let patterns = [
386 (
387 r"Arc\s*<\s*RwLock\s*<",
388 BullshitKind::OverEngineering,
389 0.86,
390 "Arc<RwLock<...>> is often shared mutable state wearing a tuxedo.",
391 "Try explicit ownership, message passing, or a narrower shared state boundary.",
392 ),
393 (
394 r"Arc\s*<\s*Mutex\s*<",
395 BullshitKind::OverEngineering,
396 0.82,
397 "Arc<Mutex<...>> can be valid, but it is also a classic complexity magnet.",
398 "Check whether ownership can stay local or the locked data can be smaller.",
399 ),
400 (
401 r"Mutex\s*<\s*HashMap\s*<",
402 BullshitKind::MutexAbuse,
403 0.76,
404 "A Mutex<HashMap<...>> is a blunt concurrency primitive.",
405 "Consider sharding, DashMap, or reducing shared mutable state.",
406 ),
407 (
408 r"RwLock\s*<",
409 BullshitKind::RwLockAbuse,
410 0.64,
411 "RwLock adds coordination cost and can hide unclear ownership.",
412 "Use it only when read-heavy sharing is real and measured.",
413 ),
414 (
415 r"\b(std::thread::sleep|tokio::time::sleep)\s*\(",
416 BullshitKind::SleepAbuse,
417 0.78,
418 "Sleep calls are often timing bullshit instead of synchronization.",
419 "Replace sleeps with explicit readiness, timeouts, retries, or test clocks.",
420 ),
421 ];
422
423 for (pattern, kind, confidence, why, suggestion) in patterns {
424 let regex = Regex::new(pattern)?;
425 for mat in regex.find_iter(code) {
426 alerts.push(make_alert(
427 kind,
428 confidence,
429 file,
430 code,
431 mat.start(),
432 mat.end(),
433 why,
434 suggestion,
435 ));
436 }
437 }
438
439 Ok(())
440}
441
442fn scan_line_patterns(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) {
443 for (line_idx, line) in code.lines().enumerate() {
444 let trimmed = line.trim();
445
446 if let Some(col) = line.find(".unwrap()") {
447 alerts.push(alert_from_line(
448 BullshitKind::UnwrapAbuse,
449 0.72,
450 file,
451 line_idx + 1,
452 col + 1,
453 line,
454 "unwrap() is a runtime trap dressed up as confidence.",
455 "Propagate the error with ?, add context, or handle the failure explicitly.",
456 ));
457 }
458
459 let clone_count = line.matches(".clone()").count();
460 if clone_count >= 2 {
461 alerts.push(alert_from_line(
462 BullshitKind::CloneAbuse,
463 (0.60 + clone_count as f32 * 0.08).min(0.92),
464 file,
465 line_idx + 1,
466 line.find(".clone()").unwrap_or(0) + 1,
467 line,
468 "Multiple clone() calls on one line can hide ownership confusion.",
469 "Check whether borrowing, moving, or restructuring removes the copies.",
470 ));
471 }
472
473 let dyn_count = trimmed.matches("dyn ").count();
474 if dyn_count >= 3 {
475 alerts.push(alert_from_line(
476 BullshitKind::DynTraitAbuse,
477 0.80,
478 file,
479 line_idx + 1,
480 line.find("dyn ").unwrap_or(0) + 1,
481 line,
482 "Heavy dyn usage may be abstraction theater.",
483 "Prefer concrete types or generics unless runtime polymorphism is needed.",
484 ));
485 }
486
487 if trimmed.starts_with("use std::collections::{")
488 && trimmed.contains("HashMap")
489 && trimmed.contains("BTreeMap")
490 {
491 alerts.push(alert_from_line(
492 BullshitKind::CargoCult,
493 0.62,
494 file,
495 line_idx + 1,
496 line.find("HashMap").unwrap_or(0) + 1,
497 line,
498 "Broad collection imports can signal cargo-cult scaffolding.",
499 "Import the collection you actually use, or qualify rare uses inline.",
500 ));
501 }
502 }
503}
504
505fn scan_function_complexity(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) {
506 let lines: Vec<&str> = code.lines().collect();
507 let mut idx = 0;
508
509 while idx < lines.len() {
510 let line = lines[idx];
511 if !looks_like_fn_start(line) {
512 idx += 1;
513 continue;
514 }
515
516 let start_line = idx + 1;
517 let mut brace_balance = 0isize;
518 let mut saw_body = false;
519 let mut complexity = 0usize;
520 let mut end_idx = idx;
521
522 while end_idx < lines.len() {
523 let current = lines[end_idx];
524 complexity += line_complexity(current);
525 for ch in current.chars() {
526 if ch == '{' {
527 saw_body = true;
528 brace_balance += 1;
529 } else if ch == '}' {
530 brace_balance -= 1;
531 }
532 }
533 if saw_body && brace_balance <= 0 {
534 break;
535 }
536 end_idx += 1;
537 }
538
539 if saw_body && complexity >= 6 {
540 let confidence = (complexity as f32 / 24.0).clamp(0.66, 0.95);
541 alerts.push(alert_from_line(
542 BullshitKind::FakeComplexity,
543 confidence,
544 file,
545 start_line,
546 line.find("fn").unwrap_or(0) + 1,
547 line,
548 &format!(
549 "Function complexity score is {complexity}; this smells like fake complexity."
550 ),
551 "Split the function around decisions, loops, and side effects.",
552 ));
553 }
554
555 idx = end_idx.saturating_add(1);
556 }
557}
558
559fn looks_like_fn_start(line: &str) -> bool {
560 let trimmed = line.trim_start();
561 trimmed.starts_with("fn ")
562 || trimmed.starts_with("pub fn ")
563 || trimmed.starts_with("pub(crate) fn ")
564 || trimmed.starts_with("async fn ")
565 || trimmed.starts_with("pub async fn ")
566}
567
568fn line_complexity(line: &str) -> usize {
569 let mut score = 0;
570 let trimmed = line.trim_start();
571 for token in [
572 "if ", "if(", "match ", "for ", "while ", "loop ", "&&", "||",
573 ] {
574 score += line.matches(token).count();
575 }
576 if trimmed.starts_with("if(") {
577 score += 1;
578 }
579 score += line.matches("?;").count();
580 score += line.matches(".unwrap()").count() * 2;
581 score
582}
583
584#[allow(clippy::too_many_arguments)]
585fn make_alert(
586 kind: BullshitKind,
587 confidence: f32,
588 file: &Path,
589 code: &str,
590 start: usize,
591 end: usize,
592 why_bs: &str,
593 suggestion: &str,
594) -> BullshitAlert {
595 let (line, column) = line_column(code, start);
596 BullshitAlert {
597 kind,
598 confidence,
599 severity: confidence,
600 file: file.to_path_buf(),
601 line,
602 column,
603 context_snippet: snippet(code, start, end),
604 why_bs: why_bs.to_string(),
605 suggestion: suggestion.to_string(),
606 }
607}
608
609#[allow(clippy::too_many_arguments)]
610fn alert_from_line(
611 kind: BullshitKind,
612 confidence: f32,
613 file: &Path,
614 line: usize,
615 column: usize,
616 context: &str,
617 why_bs: &str,
618 suggestion: &str,
619) -> BullshitAlert {
620 BullshitAlert {
621 kind,
622 confidence,
623 severity: confidence,
624 file: file.to_path_buf(),
625 line,
626 column,
627 context_snippet: context.trim().to_string(),
628 why_bs: why_bs.to_string(),
629 suggestion: suggestion.to_string(),
630 }
631}
632
633fn line_column(code: &str, byte_pos: usize) -> (usize, usize) {
634 let mut line = 1;
635 let mut col = 1;
636
637 for (idx, ch) in code.char_indices() {
638 if idx >= byte_pos {
639 break;
640 }
641 if ch == '\n' {
642 line += 1;
643 col = 1;
644 } else {
645 col += 1;
646 }
647 }
648
649 (line, col)
650}
651
652fn snippet(code: &str, start: usize, end: usize) -> String {
653 let line_start = code[..start].rfind('\n').map_or(0, |idx| idx + 1);
654 let line_end = code[end..].find('\n').map_or(code.len(), |idx| end + idx);
655 code[line_start..line_end].trim().to_string()
656}
657
658fn dedupe_alerts(alerts: &mut Vec<BullshitAlert>) {
659 alerts.sort_by(|a, b| {
660 a.file
661 .cmp(&b.file)
662 .then_with(|| a.line.cmp(&b.line))
663 .then_with(|| a.column.cmp(&b.column))
664 .then_with(|| format!("{:?}", a.kind).cmp(&format!("{:?}", b.kind)))
665 });
666 alerts.dedup_by(|a, b| {
667 a.file == b.file && a.line == b.line && a.column == b.column && a.kind == b.kind
668 });
669}
670
671pub fn kind_label(kind: BullshitKind) -> &'static str {
672 kind.label()
673}
674
675#[cfg(test)]
676mod tests {
677 use super::*;
678
679 fn config() -> CodeAuditConfig {
680 CodeAuditConfig::default()
681 }
682
683 #[test]
684 fn detects_unwrap_and_sleep() {
685 let code = r#"
686fn main() {
687 let value = thing().unwrap();
688 std::thread::sleep(std::time::Duration::from_millis(10));
689}
690"#;
691 let alerts = scan_code(code, "src/main.rs", &config()).unwrap();
692 assert!(alerts.iter().any(|a| a.kind == BullshitKind::UnwrapAbuse));
693 assert!(alerts.iter().any(|a| a.kind == BullshitKind::SleepAbuse));
694 }
695
696 #[test]
697 fn detects_shared_mutable_state() {
698 let code = "type Store = Arc<RwLock<HashMap<String, String>>>;";
699 let alerts = scan_code(code, "src/lib.rs", &config()).unwrap();
700 assert!(alerts
701 .iter()
702 .any(|a| a.kind == BullshitKind::OverEngineering));
703 }
704
705 #[test]
706 fn detects_fake_complexity() {
707 let code = r#"
708fn tangled(x: usize) -> usize {
709 if x > 1 { if x > 2 { if x > 3 { if x > 4 { if x > 5 { return x; }}}}}
710 match x { 0 => 1, 1 => 2, _ => 3 }
711}
712"#;
713 let alerts = scan_code(code, "src/lib.rs", &config()).unwrap();
714 assert!(alerts
715 .iter()
716 .any(|a| a.kind == BullshitKind::FakeComplexity));
717 }
718
719 #[test]
720 fn ignores_patterns_in_strings_and_comments() {
721 let code = r#"
722fn main() {
723 let text = "Arc<RwLock<HashMap<String, String>>> and thing().unwrap()";
724 // std::thread::sleep(std::time::Duration::from_millis(10));
725}
726"#;
727 let alerts = scan_code(code, "src/main.rs", &config()).unwrap();
728 assert!(
729 alerts.is_empty(),
730 "strings/comments should not produce bullshit alerts: {alerts:?}"
731 );
732 }
733
734 #[test]
735 fn policy_suppresses_kind_and_path() {
736 let mut cfg = config();
737 cfg.ignore_kinds.insert("UnwrapAbuse".to_string());
738 let alerts = scan_code("fn main() { thing().unwrap(); }", "src/main.rs", &cfg).unwrap();
739 assert!(alerts.is_empty());
740
741 let mut cfg = config();
742 cfg.ignore_paths.push("generated".to_string());
743 let alerts = scan_code(
744 "fn main() { thing().unwrap(); }",
745 "src/generated/main.rs",
746 &cfg,
747 )
748 .unwrap();
749 assert!(alerts.is_empty());
750 }
751
752 #[test]
753 fn parses_diff_changed_ranges() {
754 let diff = r#"diff --git a/src/main.rs b/src/main.rs
755index 111..222 100644
756--- a/src/main.rs
757+++ b/src/main.rs
758@@ -1,0 +2,3 @@
759+fn main() {
760+ thing().unwrap();
761+}
762"#;
763 let changed = parse_changed_lines(diff);
764 assert_eq!(changed.get(Path::new("src/main.rs")), Some(&vec![(2, 4)]));
765 }
766}