Skip to main content

cargo_bless/
code_audit.rs

1//! Static Rust code audit for suspicious complexity and brittle patterns.
2
3use std::collections::{HashMap, HashSet};
4use std::fs;
5use std::path::{Path, PathBuf};
6use std::process::Command;
7
8use anyhow::{bail, Context, Result};
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use tree_sitter::{Node, Parser};
12
13const MAX_FILE_BYTES: u64 = 1024 * 1024;
14
15#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
16pub enum BullshitKind {
17    FakeComplexity,
18    CargoCult,
19    OverEngineering,
20    ArcAbuse,
21    RwLockAbuse,
22    SleepAbuse,
23    UnwrapAbuse,
24    DynTraitAbuse,
25    CloneAbuse,
26    MutexAbuse,
27}
28
29impl BullshitKind {
30    fn label(self) -> &'static str {
31        match self {
32            Self::FakeComplexity => "fake complexity",
33            Self::CargoCult => "cargo cult",
34            Self::OverEngineering => "over-engineering",
35            Self::ArcAbuse => "Arc abuse",
36            Self::RwLockAbuse => "RwLock abuse",
37            Self::SleepAbuse => "sleep abuse",
38            Self::UnwrapAbuse => "unwrap abuse",
39            Self::DynTraitAbuse => "dyn trait abuse",
40            Self::CloneAbuse => "clone abuse",
41            Self::MutexAbuse => "mutex abuse",
42        }
43    }
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct BullshitAlert {
48    pub kind: BullshitKind,
49    pub confidence: f32,
50    pub severity: f32,
51    pub file: PathBuf,
52    pub line: usize,
53    pub column: usize,
54    pub context_snippet: String,
55    pub why_bs: String,
56    pub suggestion: String,
57}
58
59#[derive(Debug, Clone)]
60pub struct CodeAuditConfig {
61    pub confidence_threshold: f32,
62    pub max_file_bytes: u64,
63    pub ignore_paths: Vec<String>,
64    pub ignore_kinds: HashSet<String>,
65}
66
67impl Default for CodeAuditConfig {
68    fn default() -> Self {
69        Self {
70            confidence_threshold: 0.60,
71            max_file_bytes: MAX_FILE_BYTES,
72            ignore_paths: Vec::new(),
73            ignore_kinds: HashSet::new(),
74        }
75    }
76}
77
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct CodeAuditReport {
80    pub files_scanned: usize,
81    pub alerts: Vec<BullshitAlert>,
82}
83
84impl CodeAuditReport {
85    pub fn is_clean(&self) -> bool {
86        self.alerts.is_empty()
87    }
88}
89
90pub fn scan_project(
91    manifest_path: Option<&Path>,
92    config: &CodeAuditConfig,
93) -> Result<CodeAuditReport> {
94    scan_project_with_filter(manifest_path, config, None)
95}
96
97pub fn scan_git_diff(
98    manifest_path: Option<&Path>,
99    config: &CodeAuditConfig,
100) -> Result<CodeAuditReport> {
101    let base_dir = project_base_dir(manifest_path);
102    let filter = DiffFilter::from_git_diff(base_dir)?;
103    scan_project_with_filter(manifest_path, config, Some(&filter))
104}
105
106fn scan_project_with_filter(
107    manifest_path: Option<&Path>,
108    config: &CodeAuditConfig,
109    diff_filter: Option<&DiffFilter>,
110) -> Result<CodeAuditReport> {
111    let base_dir = manifest_path
112        .and_then(Path::parent)
113        .filter(|p| !p.as_os_str().is_empty())
114        .unwrap_or_else(|| Path::new("."));
115
116    let mut files = Vec::new();
117    for dir in ["src", "tests", "examples", "benches"] {
118        collect_rust_files(&base_dir.join(dir), config, &mut files)?;
119    }
120
121    let mut alerts = Vec::new();
122    for file in &files {
123        if is_ignored_path(file, config) {
124            continue;
125        }
126        let code = fs::read_to_string(file)
127            .with_context(|| format!("failed to read {}", file.display()))?;
128        let mut file_alerts = scan_code(&code, file, config)?;
129        if let Some(filter) = diff_filter {
130            file_alerts.retain(|alert| filter.includes(alert));
131        }
132        alerts.extend(file_alerts);
133    }
134
135    alerts.sort_by(|a, b| {
136        b.severity
137            .partial_cmp(&a.severity)
138            .unwrap_or(std::cmp::Ordering::Equal)
139            .then_with(|| a.file.cmp(&b.file))
140            .then_with(|| a.line.cmp(&b.line))
141    });
142
143    Ok(CodeAuditReport {
144        files_scanned: files.len(),
145        alerts,
146    })
147}
148
149pub fn scan_code(
150    code: &str,
151    file: impl Into<PathBuf>,
152    config: &CodeAuditConfig,
153) -> Result<Vec<BullshitAlert>> {
154    let file = file.into();
155    if is_ignored_path(&file, config) {
156        return Ok(Vec::new());
157    }
158
159    let ignored_ranges = parse_ignored_ranges(code).unwrap_or_default();
160    let masked = mask_ranges(code, &ignored_ranges);
161    let mut alerts = Vec::new();
162
163    scan_regex_patterns(&masked, &file, &mut alerts)?;
164    scan_line_patterns(&masked, &file, &mut alerts);
165    scan_function_complexity(&masked, &file, &mut alerts);
166
167    alerts.retain(|alert| alert.confidence >= config.confidence_threshold);
168    alerts.retain(|alert| !config.ignore_kinds.contains(&format!("{:?}", alert.kind)));
169    dedupe_alerts(&mut alerts);
170    Ok(alerts)
171}
172
173pub fn config_from_policy(policy: Option<&crate::policy::Policy>) -> CodeAuditConfig {
174    let mut config = CodeAuditConfig::default();
175    if let Some(policy) = policy {
176        config.ignore_paths = policy.code_audit.ignore_paths.clone();
177        config.ignore_kinds = policy.code_audit.ignore_kinds.iter().cloned().collect();
178    }
179    config
180}
181
182fn project_base_dir(manifest_path: Option<&Path>) -> &Path {
183    manifest_path
184        .and_then(Path::parent)
185        .filter(|p| !p.as_os_str().is_empty())
186        .unwrap_or_else(|| Path::new("."))
187}
188
189fn is_ignored_path(path: &Path, config: &CodeAuditConfig) -> bool {
190    let path = path.to_string_lossy();
191    config
192        .ignore_paths
193        .iter()
194        .any(|pattern| path.contains(pattern))
195}
196
197fn collect_rust_files(
198    dir: &Path,
199    config: &CodeAuditConfig,
200    files: &mut Vec<PathBuf>,
201) -> Result<()> {
202    if !dir.exists() {
203        return Ok(());
204    }
205
206    for entry in fs::read_dir(dir).with_context(|| format!("failed to read {}", dir.display()))? {
207        let entry = entry?;
208        let path = entry.path();
209        let name = entry.file_name();
210        let name = name.to_string_lossy();
211
212        if path.is_dir() {
213            if should_skip_dir(&name) {
214                continue;
215            }
216            collect_rust_files(&path, config, files)?;
217            continue;
218        }
219
220        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
221            continue;
222        }
223
224        let metadata = entry.metadata()?;
225        if metadata.len() <= config.max_file_bytes {
226            files.push(path);
227        }
228    }
229
230    Ok(())
231}
232
233fn should_skip_dir(name: &str) -> bool {
234    name.starts_with('.')
235        || matches!(
236            name,
237            "target" | "vendor" | "node_modules" | "dist" | "build" | "third_party"
238        )
239}
240
241#[derive(Debug)]
242struct DiffFilter {
243    base_dir: PathBuf,
244    changed_lines: HashMap<PathBuf, Vec<(usize, usize)>>,
245}
246
247impl DiffFilter {
248    fn from_git_diff(base_dir: &Path) -> Result<Self> {
249        let output = Command::new("git")
250            .arg("-C")
251            .arg(base_dir)
252            .arg("diff")
253            .arg("HEAD")
254            .arg("--unified=0")
255            .arg("--")
256            .output()
257            .with_context(|| "failed to run git diff HEAD --unified=0")?;
258
259        if !output.status.success() {
260            bail!(
261                "git diff failed: {}",
262                String::from_utf8_lossy(&output.stderr).trim()
263            );
264        }
265
266        Ok(Self {
267            base_dir: base_dir.to_path_buf(),
268            changed_lines: parse_changed_lines(&String::from_utf8_lossy(&output.stdout)),
269        })
270    }
271
272    fn includes(&self, alert: &BullshitAlert) -> bool {
273        let path = alert
274            .file
275            .strip_prefix(&self.base_dir)
276            .map(Path::to_path_buf)
277            .unwrap_or_else(|_| alert.file.clone());
278        let path = normalize_diff_path(&path);
279        self.changed_lines.get(&path).is_some_and(|ranges| {
280            ranges
281                .iter()
282                .any(|(start, end)| alert.line >= *start && alert.line <= *end)
283        })
284    }
285}
286
287fn parse_changed_lines(diff: &str) -> HashMap<PathBuf, Vec<(usize, usize)>> {
288    let mut current_file: Option<PathBuf> = None;
289    let mut changed = HashMap::<PathBuf, Vec<(usize, usize)>>::new();
290
291    for line in diff.lines() {
292        if let Some(path) = line.strip_prefix("+++ b/") {
293            current_file = Some(PathBuf::from(path));
294            continue;
295        }
296        if line.starts_with("+++ /dev/null") {
297            current_file = None;
298            continue;
299        }
300
301        if let (Some(file), Some(range)) = (current_file.as_ref(), parse_hunk_new_range(line)) {
302            changed.entry(file.clone()).or_default().push(range);
303        }
304    }
305
306    changed
307}
308
309fn parse_hunk_new_range(line: &str) -> Option<(usize, usize)> {
310    let hunk = line.strip_prefix("@@ ")?;
311    let plus = hunk.split_whitespace().find(|part| part.starts_with('+'))?;
312    let plus = plus.trim_start_matches('+');
313    let (start, count) = plus
314        .split_once(',')
315        .map(|(start, count)| (start, count.parse::<usize>().ok()))
316        .unwrap_or((plus, Some(1)));
317    let start = start.parse::<usize>().ok()?;
318    let count = count?;
319    if count == 0 {
320        None
321    } else {
322        Some((start, start + count - 1))
323    }
324}
325
326fn normalize_diff_path(path: &Path) -> PathBuf {
327    let mut normalized = PathBuf::new();
328    for component in path.components() {
329        match component {
330            std::path::Component::CurDir => {}
331            other => normalized.push(other.as_os_str()),
332        }
333    }
334    normalized
335}
336
337fn parse_ignored_ranges(code: &str) -> Result<Vec<(usize, usize)>> {
338    let mut parser = Parser::new();
339    parser
340        .set_language(&tree_sitter_rust::LANGUAGE.into())
341        .map_err(|err| anyhow::anyhow!("failed to load Rust tree-sitter grammar: {err}"))?;
342    let tree = parser
343        .parse(code, None)
344        .ok_or_else(|| anyhow::anyhow!("tree-sitter failed to parse Rust source"))?;
345
346    let mut ranges = Vec::new();
347    collect_ignored_ranges(tree.root_node(), &mut ranges);
348    Ok(ranges)
349}
350
351fn collect_ignored_ranges(node: Node<'_>, ranges: &mut Vec<(usize, usize)>) {
352    if is_ignored_node(node.kind()) {
353        ranges.push((node.start_byte(), node.end_byte()));
354        return;
355    }
356
357    let mut cursor = node.walk();
358    for child in node.children(&mut cursor) {
359        collect_ignored_ranges(child, ranges);
360    }
361}
362
363fn is_ignored_node(kind: &str) -> bool {
364    matches!(
365        kind,
366        "line_comment" | "block_comment" | "string_literal" | "raw_string_literal" | "char_literal"
367    )
368}
369
370fn mask_ranges(code: &str, ranges: &[(usize, usize)]) -> String {
371    let mut bytes = code.as_bytes().to_vec();
372    for (start, end) in ranges {
373        for idx in *start..*end {
374            if let Some(byte) = bytes.get_mut(idx) {
375                if *byte != b'\n' {
376                    *byte = b' ';
377                }
378            }
379        }
380    }
381    String::from_utf8(bytes).unwrap_or_else(|_| code.to_string())
382}
383
384fn scan_regex_patterns(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) -> Result<()> {
385    let patterns = [
386        (
387            r"Arc\s*<\s*RwLock\s*<",
388            BullshitKind::OverEngineering,
389            0.86,
390            "Arc<RwLock<...>> is often shared mutable state wearing a tuxedo.",
391            "Try explicit ownership, message passing, or a narrower shared state boundary.",
392        ),
393        (
394            r"Arc\s*<\s*Mutex\s*<",
395            BullshitKind::OverEngineering,
396            0.82,
397            "Arc<Mutex<...>> can be valid, but it is also a classic complexity magnet.",
398            "Check whether ownership can stay local or the locked data can be smaller.",
399        ),
400        (
401            r"Mutex\s*<\s*HashMap\s*<",
402            BullshitKind::MutexAbuse,
403            0.76,
404            "A Mutex<HashMap<...>> is a blunt concurrency primitive.",
405            "Consider sharding, DashMap, or reducing shared mutable state.",
406        ),
407        (
408            r"RwLock\s*<",
409            BullshitKind::RwLockAbuse,
410            0.64,
411            "RwLock adds coordination cost and can hide unclear ownership.",
412            "Use it only when read-heavy sharing is real and measured.",
413        ),
414        (
415            r"\b(std::thread::sleep|tokio::time::sleep)\s*\(",
416            BullshitKind::SleepAbuse,
417            0.78,
418            "Sleep calls are often timing bullshit instead of synchronization.",
419            "Replace sleeps with explicit readiness, timeouts, retries, or test clocks.",
420        ),
421    ];
422
423    for (pattern, kind, confidence, why, suggestion) in patterns {
424        let regex = Regex::new(pattern)?;
425        for mat in regex.find_iter(code) {
426            alerts.push(make_alert(
427                kind,
428                confidence,
429                file,
430                code,
431                mat.start(),
432                mat.end(),
433                why,
434                suggestion,
435            ));
436        }
437    }
438
439    Ok(())
440}
441
442fn scan_line_patterns(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) {
443    for (line_idx, line) in code.lines().enumerate() {
444        let trimmed = line.trim();
445
446        if let Some(col) = line.find(".unwrap()") {
447            alerts.push(alert_from_line(
448                BullshitKind::UnwrapAbuse,
449                0.72,
450                file,
451                line_idx + 1,
452                col + 1,
453                line,
454                "unwrap() is a runtime trap dressed up as confidence.",
455                "Propagate the error with ?, add context, or handle the failure explicitly.",
456            ));
457        }
458
459        let clone_count = line.matches(".clone()").count();
460        if clone_count >= 2 {
461            alerts.push(alert_from_line(
462                BullshitKind::CloneAbuse,
463                (0.60 + clone_count as f32 * 0.08).min(0.92),
464                file,
465                line_idx + 1,
466                line.find(".clone()").unwrap_or(0) + 1,
467                line,
468                "Multiple clone() calls on one line can hide ownership confusion.",
469                "Check whether borrowing, moving, or restructuring removes the copies.",
470            ));
471        }
472
473        let dyn_count = trimmed.matches("dyn ").count();
474        if dyn_count >= 3 {
475            alerts.push(alert_from_line(
476                BullshitKind::DynTraitAbuse,
477                0.80,
478                file,
479                line_idx + 1,
480                line.find("dyn ").unwrap_or(0) + 1,
481                line,
482                "Heavy dyn usage may be abstraction theater.",
483                "Prefer concrete types or generics unless runtime polymorphism is needed.",
484            ));
485        }
486
487        if trimmed.starts_with("use std::collections::{")
488            && trimmed.contains("HashMap")
489            && trimmed.contains("BTreeMap")
490        {
491            alerts.push(alert_from_line(
492                BullshitKind::CargoCult,
493                0.62,
494                file,
495                line_idx + 1,
496                line.find("HashMap").unwrap_or(0) + 1,
497                line,
498                "Broad collection imports can signal cargo-cult scaffolding.",
499                "Import the collection you actually use, or qualify rare uses inline.",
500            ));
501        }
502    }
503}
504
505fn scan_function_complexity(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) {
506    let lines: Vec<&str> = code.lines().collect();
507    let mut idx = 0;
508
509    while idx < lines.len() {
510        let line = lines[idx];
511        if !looks_like_fn_start(line) {
512            idx += 1;
513            continue;
514        }
515
516        let start_line = idx + 1;
517        let mut brace_balance = 0isize;
518        let mut saw_body = false;
519        let mut complexity = 0usize;
520        let mut end_idx = idx;
521
522        while end_idx < lines.len() {
523            let current = lines[end_idx];
524            complexity += line_complexity(current);
525            for ch in current.chars() {
526                if ch == '{' {
527                    saw_body = true;
528                    brace_balance += 1;
529                } else if ch == '}' {
530                    brace_balance -= 1;
531                }
532            }
533            if saw_body && brace_balance <= 0 {
534                break;
535            }
536            end_idx += 1;
537        }
538
539        if saw_body && complexity >= 6 {
540            let confidence = (complexity as f32 / 24.0).clamp(0.66, 0.95);
541            alerts.push(alert_from_line(
542                BullshitKind::FakeComplexity,
543                confidence,
544                file,
545                start_line,
546                line.find("fn").unwrap_or(0) + 1,
547                line,
548                &format!(
549                    "Function complexity score is {complexity}; this smells like fake complexity."
550                ),
551                "Split the function around decisions, loops, and side effects.",
552            ));
553        }
554
555        idx = end_idx.saturating_add(1);
556    }
557}
558
559fn looks_like_fn_start(line: &str) -> bool {
560    let trimmed = line.trim_start();
561    trimmed.starts_with("fn ")
562        || trimmed.starts_with("pub fn ")
563        || trimmed.starts_with("pub(crate) fn ")
564        || trimmed.starts_with("async fn ")
565        || trimmed.starts_with("pub async fn ")
566}
567
568fn line_complexity(line: &str) -> usize {
569    let mut score = 0;
570    let trimmed = line.trim_start();
571    for token in [
572        "if ", "if(", "match ", "for ", "while ", "loop ", "&&", "||",
573    ] {
574        score += line.matches(token).count();
575    }
576    if trimmed.starts_with("if(") {
577        score += 1;
578    }
579    score += line.matches("?;").count();
580    score += line.matches(".unwrap()").count() * 2;
581    score
582}
583
584#[allow(clippy::too_many_arguments)]
585fn make_alert(
586    kind: BullshitKind,
587    confidence: f32,
588    file: &Path,
589    code: &str,
590    start: usize,
591    end: usize,
592    why_bs: &str,
593    suggestion: &str,
594) -> BullshitAlert {
595    let (line, column) = line_column(code, start);
596    BullshitAlert {
597        kind,
598        confidence,
599        severity: confidence,
600        file: file.to_path_buf(),
601        line,
602        column,
603        context_snippet: snippet(code, start, end),
604        why_bs: why_bs.to_string(),
605        suggestion: suggestion.to_string(),
606    }
607}
608
609#[allow(clippy::too_many_arguments)]
610fn alert_from_line(
611    kind: BullshitKind,
612    confidence: f32,
613    file: &Path,
614    line: usize,
615    column: usize,
616    context: &str,
617    why_bs: &str,
618    suggestion: &str,
619) -> BullshitAlert {
620    BullshitAlert {
621        kind,
622        confidence,
623        severity: confidence,
624        file: file.to_path_buf(),
625        line,
626        column,
627        context_snippet: context.trim().to_string(),
628        why_bs: why_bs.to_string(),
629        suggestion: suggestion.to_string(),
630    }
631}
632
633fn line_column(code: &str, byte_pos: usize) -> (usize, usize) {
634    let mut line = 1;
635    let mut col = 1;
636
637    for (idx, ch) in code.char_indices() {
638        if idx >= byte_pos {
639            break;
640        }
641        if ch == '\n' {
642            line += 1;
643            col = 1;
644        } else {
645            col += 1;
646        }
647    }
648
649    (line, col)
650}
651
652fn snippet(code: &str, start: usize, end: usize) -> String {
653    let line_start = code[..start].rfind('\n').map_or(0, |idx| idx + 1);
654    let line_end = code[end..].find('\n').map_or(code.len(), |idx| end + idx);
655    code[line_start..line_end].trim().to_string()
656}
657
658fn dedupe_alerts(alerts: &mut Vec<BullshitAlert>) {
659    alerts.sort_by(|a, b| {
660        a.file
661            .cmp(&b.file)
662            .then_with(|| a.line.cmp(&b.line))
663            .then_with(|| a.column.cmp(&b.column))
664            .then_with(|| format!("{:?}", a.kind).cmp(&format!("{:?}", b.kind)))
665    });
666    alerts.dedup_by(|a, b| {
667        a.file == b.file && a.line == b.line && a.column == b.column && a.kind == b.kind
668    });
669}
670
671pub fn kind_label(kind: BullshitKind) -> &'static str {
672    kind.label()
673}
674
675#[cfg(test)]
676mod tests {
677    use super::*;
678
679    fn config() -> CodeAuditConfig {
680        CodeAuditConfig::default()
681    }
682
683    #[test]
684    fn detects_unwrap_and_sleep() {
685        let code = r#"
686fn main() {
687    let value = thing().unwrap();
688    std::thread::sleep(std::time::Duration::from_millis(10));
689}
690"#;
691        let alerts = scan_code(code, "src/main.rs", &config()).unwrap();
692        assert!(alerts.iter().any(|a| a.kind == BullshitKind::UnwrapAbuse));
693        assert!(alerts.iter().any(|a| a.kind == BullshitKind::SleepAbuse));
694    }
695
696    #[test]
697    fn detects_shared_mutable_state() {
698        let code = "type Store = Arc<RwLock<HashMap<String, String>>>;";
699        let alerts = scan_code(code, "src/lib.rs", &config()).unwrap();
700        assert!(alerts
701            .iter()
702            .any(|a| a.kind == BullshitKind::OverEngineering));
703    }
704
705    #[test]
706    fn detects_fake_complexity() {
707        let code = r#"
708fn tangled(x: usize) -> usize {
709    if x > 1 { if x > 2 { if x > 3 { if x > 4 { if x > 5 { return x; }}}}}
710    match x { 0 => 1, 1 => 2, _ => 3 }
711}
712"#;
713        let alerts = scan_code(code, "src/lib.rs", &config()).unwrap();
714        assert!(alerts
715            .iter()
716            .any(|a| a.kind == BullshitKind::FakeComplexity));
717    }
718
719    #[test]
720    fn ignores_patterns_in_strings_and_comments() {
721        let code = r#"
722fn main() {
723    let text = "Arc<RwLock<HashMap<String, String>>> and thing().unwrap()";
724    // std::thread::sleep(std::time::Duration::from_millis(10));
725}
726"#;
727        let alerts = scan_code(code, "src/main.rs", &config()).unwrap();
728        assert!(
729            alerts.is_empty(),
730            "strings/comments should not produce bullshit alerts: {alerts:?}"
731        );
732    }
733
734    #[test]
735    fn policy_suppresses_kind_and_path() {
736        let mut cfg = config();
737        cfg.ignore_kinds.insert("UnwrapAbuse".to_string());
738        let alerts = scan_code("fn main() { thing().unwrap(); }", "src/main.rs", &cfg).unwrap();
739        assert!(alerts.is_empty());
740
741        let mut cfg = config();
742        cfg.ignore_paths.push("generated".to_string());
743        let alerts = scan_code(
744            "fn main() { thing().unwrap(); }",
745            "src/generated/main.rs",
746            &cfg,
747        )
748        .unwrap();
749        assert!(alerts.is_empty());
750    }
751
752    #[test]
753    fn parses_diff_changed_ranges() {
754        let diff = r#"diff --git a/src/main.rs b/src/main.rs
755index 111..222 100644
756--- a/src/main.rs
757+++ b/src/main.rs
758@@ -1,0 +2,3 @@
759+fn main() {
760+    thing().unwrap();
761+}
762"#;
763        let changed = parse_changed_lines(diff);
764        assert_eq!(changed.get(Path::new("src/main.rs")), Some(&vec![(2, 4)]));
765    }
766}