Skip to main content

cargo_bless/
code_audit.rs

1//! Static Rust code audit for suspicious complexity and brittle patterns.
2
3use std::collections::{HashMap, HashSet};
4use std::fs;
5use std::path::{Path, PathBuf};
6use std::process::Command;
7
8use anyhow::{bail, Context, Result};
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use tree_sitter::{Node, Parser};
12
13const MAX_FILE_BYTES: u64 = 1024 * 1024;
14
15#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
16pub enum BullshitKind {
17    FakeComplexity,
18    CargoCult,
19    OverEngineering,
20    ArcAbuse,
21    RwLockAbuse,
22    SleepAbuse,
23    UnwrapAbuse,
24    DynTraitAbuse,
25    CloneAbuse,
26    MutexAbuse,
27    TodoUnimplemented,
28    RefCellAbuse,
29    BoolComparison,
30    StringAntiPattern,
31    DiscardedError,
32    LossyUtf8,
33}
34
35impl BullshitKind {
36    fn label(self) -> &'static str {
37        match self {
38            Self::FakeComplexity => "fake complexity",
39            Self::CargoCult => "cargo cult",
40            Self::OverEngineering => "over-engineering",
41            Self::ArcAbuse => "Arc abuse",
42            Self::RwLockAbuse => "RwLock abuse",
43            Self::SleepAbuse => "sleep abuse",
44            Self::UnwrapAbuse => "unwrap abuse",
45            Self::DynTraitAbuse => "dyn trait abuse",
46            Self::CloneAbuse => "clone abuse",
47            Self::MutexAbuse => "mutex abuse",
48            Self::TodoUnimplemented => "todo/unimplemented",
49            Self::RefCellAbuse => "RefCell abuse",
50            Self::BoolComparison => "redundant bool comparison",
51            Self::StringAntiPattern => "string anti-pattern",
52            Self::DiscardedError => "discarded error",
53            Self::LossyUtf8 => "lossy UTF-8 conversion",
54        }
55    }
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct BullshitAlert {
60    pub kind: BullshitKind,
61    pub confidence: f32,
62    pub severity: f32,
63    pub file: PathBuf,
64    pub line: usize,
65    pub column: usize,
66    pub context_snippet: String,
67    pub why_bs: String,
68    pub suggestion: String,
69}
70
71#[derive(Debug, Clone)]
72pub struct CodeAuditConfig {
73    pub confidence_threshold: f32,
74    pub max_file_bytes: u64,
75    pub ignore_paths: Vec<String>,
76    pub ignore_kinds: HashSet<String>,
77    /// Scan tests/, examples/, and benches/ in addition to src/. Default: false.
78    pub include_tests: bool,
79}
80
81impl Default for CodeAuditConfig {
82    fn default() -> Self {
83        Self {
84            confidence_threshold: 0.60,
85            max_file_bytes: MAX_FILE_BYTES,
86            ignore_paths: Vec::new(),
87            ignore_kinds: HashSet::new(),
88            include_tests: false,
89        }
90    }
91}
92
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct CodeAuditReport {
95    pub files_scanned: usize,
96    pub alerts: Vec<BullshitAlert>,
97}
98
99impl CodeAuditReport {
100    pub fn is_clean(&self) -> bool {
101        self.alerts.is_empty()
102    }
103}
104
105/// Concatenate workspace member audits into one report (sums `files_scanned`, merges alerts).
106pub fn merge_reports(reports: Vec<CodeAuditReport>) -> CodeAuditReport {
107    let mut files_scanned = 0usize;
108    let mut alerts = Vec::new();
109    for r in reports {
110        files_scanned += r.files_scanned;
111        alerts.extend(r.alerts);
112    }
113    CodeAuditReport {
114        files_scanned,
115        alerts,
116    }
117}
118
119pub fn scan_project(
120    manifest_path: Option<&Path>,
121    config: &CodeAuditConfig,
122) -> Result<CodeAuditReport> {
123    scan_project_with_filter(manifest_path, config, None)
124}
125
126pub fn scan_git_diff(
127    manifest_path: Option<&Path>,
128    config: &CodeAuditConfig,
129) -> Result<CodeAuditReport> {
130    let base_dir = project_base_dir(manifest_path);
131    let filter = DiffFilter::from_git_diff(base_dir)?;
132    scan_project_with_filter(manifest_path, config, Some(&filter))
133}
134
135fn scan_project_with_filter(
136    manifest_path: Option<&Path>,
137    config: &CodeAuditConfig,
138    diff_filter: Option<&DiffFilter>,
139) -> Result<CodeAuditReport> {
140    let base_dir = manifest_path
141        .and_then(Path::parent)
142        .filter(|p| !p.as_os_str().is_empty())
143        .unwrap_or_else(|| Path::new("."));
144
145    let mut files = Vec::new();
146    let src_dir = base_dir.join("src");
147    if src_dir.is_dir() {
148        collect_rust_files(&src_dir, config, &mut files)?;
149        if config.include_tests {
150            for dir in &["tests", "examples", "benches"] {
151                collect_rust_files(&base_dir.join(dir), config, &mut files)?;
152            }
153        }
154    } else {
155        // Non-standard layout: scan the manifest dir itself for .rs files
156        collect_rust_files(base_dir, config, &mut files)?;
157    }
158
159    let mut alerts = Vec::new();
160    for file in &files {
161        if is_ignored_path(file, config) {
162            continue;
163        }
164        let code = fs::read_to_string(file)
165            .with_context(|| format!("failed to read {}", file.display()))?;
166        let mut file_alerts = scan_code(&code, file, config)?;
167        if let Some(filter) = diff_filter {
168            file_alerts.retain(|alert| filter.includes(alert));
169        }
170        alerts.extend(file_alerts);
171    }
172
173    alerts.sort_by(|a, b| {
174        b.severity
175            .partial_cmp(&a.severity)
176            .unwrap_or(std::cmp::Ordering::Equal)
177            .then_with(|| a.file.cmp(&b.file))
178            .then_with(|| a.line.cmp(&b.line))
179    });
180
181    Ok(CodeAuditReport {
182        files_scanned: files.len(),
183        alerts,
184    })
185}
186
187pub fn scan_code(
188    code: &str,
189    file: impl Into<PathBuf>,
190    config: &CodeAuditConfig,
191) -> Result<Vec<BullshitAlert>> {
192    let file = file.into();
193    if is_ignored_path(&file, config) {
194        return Ok(Vec::new());
195    }
196
197    let ignored_ranges = parse_ignored_ranges(code).unwrap_or_default();
198    let masked = mask_ranges(code, &ignored_ranges);
199    let mut alerts = Vec::new();
200
201    scan_regex_patterns(&masked, &file, &mut alerts)?;
202    scan_line_patterns(&masked, &file, &mut alerts);
203    scan_function_complexity(&masked, &file, &mut alerts);
204
205    alerts.retain(|alert| alert.confidence >= config.confidence_threshold);
206    alerts.retain(|alert| !config.ignore_kinds.contains(&format!("{:?}", alert.kind)));
207    dedupe_alerts(&mut alerts);
208    Ok(alerts)
209}
210
211pub fn config_from_policy(policy: Option<&crate::policy::Policy>) -> CodeAuditConfig {
212    let mut config = CodeAuditConfig::default();
213    if let Some(policy) = policy {
214        config.ignore_paths = policy.code_audit.ignore_paths.clone();
215        config.ignore_kinds = policy.code_audit.ignore_kinds.iter().cloned().collect();
216        if policy.settings.min_confidence > 0.0 {
217            config.confidence_threshold = policy.settings.min_confidence as f32;
218        }
219        if policy.code_audit.include_tests {
220            config.include_tests = true;
221        }
222    }
223    config
224}
225
226fn project_base_dir(manifest_path: Option<&Path>) -> &Path {
227    manifest_path
228        .and_then(Path::parent)
229        .filter(|p| !p.as_os_str().is_empty())
230        .unwrap_or_else(|| Path::new("."))
231}
232
233fn is_ignored_path(path: &Path, config: &CodeAuditConfig) -> bool {
234    let path = path.to_string_lossy();
235    config
236        .ignore_paths
237        .iter()
238        .any(|pattern| path.contains(pattern))
239}
240
241fn collect_rust_files(
242    dir: &Path,
243    config: &CodeAuditConfig,
244    files: &mut Vec<PathBuf>,
245) -> Result<()> {
246    if !dir.exists() {
247        return Ok(());
248    }
249
250    for entry in fs::read_dir(dir).with_context(|| format!("failed to read {}", dir.display()))? {
251        let entry = entry?;
252        let path = entry.path();
253        let name = entry.file_name();
254        let name = name.to_string_lossy();
255
256        if path.is_dir() {
257            if should_skip_dir(&name) {
258                continue;
259            }
260            collect_rust_files(&path, config, files)?;
261            continue;
262        }
263
264        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
265            continue;
266        }
267
268        let metadata = entry.metadata()?;
269        if metadata.len() <= config.max_file_bytes {
270            files.push(path);
271        }
272    }
273
274    Ok(())
275}
276
277fn should_skip_dir(name: &str) -> bool {
278    name.starts_with('.')
279        || matches!(
280            name,
281            "target" | "vendor" | "node_modules" | "dist" | "build" | "third_party"
282        )
283}
284
285#[derive(Debug)]
286struct DiffFilter {
287    base_dir: PathBuf,
288    changed_lines: HashMap<PathBuf, Vec<(usize, usize)>>,
289}
290
291impl DiffFilter {
292    fn from_git_diff(base_dir: &Path) -> Result<Self> {
293        let output = Command::new("git")
294            .arg("-C")
295            .arg(base_dir)
296            .arg("diff")
297            .arg("HEAD")
298            .arg("--unified=0")
299            .arg("--")
300            .output()
301            .with_context(|| "failed to run git diff HEAD --unified=0")?;
302
303        if !output.status.success() {
304            bail!(
305                "git diff failed: {}",
306                String::from_utf8_lossy(&output.stderr).trim()
307            );
308        }
309
310        Ok(Self {
311            base_dir: base_dir.to_path_buf(),
312            changed_lines: parse_changed_lines(&String::from_utf8_lossy(&output.stdout)),
313        })
314    }
315
316    fn includes(&self, alert: &BullshitAlert) -> bool {
317        let path = alert
318            .file
319            .strip_prefix(&self.base_dir)
320            .map(Path::to_path_buf)
321            .unwrap_or_else(|_| alert.file.clone());
322        let path = normalize_diff_path(&path);
323        self.changed_lines.get(&path).is_some_and(|ranges| {
324            ranges
325                .iter()
326                .any(|(start, end)| alert.line >= *start && alert.line <= *end)
327        })
328    }
329}
330
331fn parse_changed_lines(diff: &str) -> HashMap<PathBuf, Vec<(usize, usize)>> {
332    let mut current_file: Option<PathBuf> = None;
333    let mut changed = HashMap::<PathBuf, Vec<(usize, usize)>>::new();
334
335    for line in diff.lines() {
336        if let Some(path) = line.strip_prefix("+++ b/") {
337            current_file = Some(PathBuf::from(path));
338            continue;
339        }
340        if line.starts_with("+++ /dev/null") {
341            current_file = None;
342            continue;
343        }
344
345        if let (Some(file), Some(range)) = (current_file.as_ref(), parse_hunk_new_range(line)) {
346            changed.entry(file.clone()).or_default().push(range);
347        }
348    }
349
350    changed
351}
352
353fn parse_hunk_new_range(line: &str) -> Option<(usize, usize)> {
354    let hunk = line.strip_prefix("@@ ")?;
355    let plus = hunk.split_whitespace().find(|part| part.starts_with('+'))?;
356    let plus = plus.trim_start_matches('+');
357    let (start, count) = plus
358        .split_once(',')
359        .map(|(start, count)| (start, count.parse::<usize>().ok()))
360        .unwrap_or((plus, Some(1)));
361    let start = start.parse::<usize>().ok()?;
362    let count = count?;
363    if count == 0 {
364        None
365    } else {
366        Some((start, start + count - 1))
367    }
368}
369
370fn normalize_diff_path(path: &Path) -> PathBuf {
371    let mut normalized = PathBuf::new();
372    for component in path.components() {
373        match component {
374            std::path::Component::CurDir => {}
375            other => normalized.push(other.as_os_str()),
376        }
377    }
378    normalized
379}
380
381fn parse_ignored_ranges(code: &str) -> Result<Vec<(usize, usize)>> {
382    let mut parser = Parser::new();
383    parser
384        .set_language(&tree_sitter_rust::LANGUAGE.into())
385        .map_err(|err| anyhow::anyhow!("failed to load Rust tree-sitter grammar: {err}"))?;
386    let tree = parser
387        .parse(code, None)
388        .ok_or_else(|| anyhow::anyhow!("tree-sitter failed to parse Rust source"))?;
389
390    let mut ranges = Vec::new();
391    collect_ignored_ranges(tree.root_node(), code.as_bytes(), &mut ranges);
392    Ok(ranges)
393}
394
395fn collect_ignored_ranges(node: Node<'_>, code: &[u8], ranges: &mut Vec<(usize, usize)>) {
396    let kind = node.kind();
397
398    // Mask string/comment literals
399    if matches!(
400        kind,
401        "line_comment" | "block_comment" | "string_literal" | "raw_string_literal" | "char_literal"
402    ) {
403        ranges.push((node.start_byte(), node.end_byte()));
404        return;
405    }
406
407    // At container levels, scan for attribute+item pairs where the attribute is #[test]
408    // or #[cfg(test)]. The attribute is a SIBLING of the item, not a child.
409    if matches!(kind, "source_file" | "declaration_list") {
410        let children: Vec<Node<'_>> = {
411            let mut cursor = node.walk();
412            node.children(&mut cursor).collect()
413        };
414        let mut i = 0;
415        while i < children.len() {
416            let child = children[i];
417            if child.kind() == "attribute_item" && is_test_attr(child, code) {
418                // Mask this attribute and the item that follows it (skip over any other
419                // attribute_item siblings until we hit the actual item)
420                ranges.push((child.start_byte(), child.end_byte()));
421                let mut j = i + 1;
422                while j < children.len() {
423                    let next = children[j];
424                    if next.kind() == "attribute_item" {
425                        ranges.push((next.start_byte(), next.end_byte()));
426                    } else {
427                        // This is the actual item (function, mod, etc.) — mask it
428                        ranges.push((next.start_byte(), next.end_byte()));
429                        i = j;
430                        break;
431                    }
432                    j += 1;
433                }
434            } else {
435                collect_ignored_ranges(child, code, ranges);
436            }
437            i += 1;
438        }
439        return;
440    }
441
442    let mut cursor = node.walk();
443    for child in node.children(&mut cursor) {
444        collect_ignored_ranges(child, code, ranges);
445    }
446}
447
448fn is_test_attr(node: Node<'_>, code: &[u8]) -> bool {
449    if let Ok(text) = std::str::from_utf8(&code[node.start_byte()..node.end_byte()]) {
450        let t: String = text.chars().filter(|c| !c.is_whitespace()).collect();
451        t == "#[test]" || t.contains("#[cfg(test)]") || t.contains("#[cfg(any(test")
452    } else {
453        false
454    }
455}
456
457fn mask_ranges(code: &str, ranges: &[(usize, usize)]) -> String {
458    let mut bytes = code.as_bytes().to_vec();
459    for (start, end) in ranges {
460        for idx in *start..*end {
461            if let Some(byte) = bytes.get_mut(idx) {
462                if *byte != b'\n' {
463                    *byte = b' ';
464                }
465            }
466        }
467    }
468    String::from_utf8(bytes).unwrap_or_else(|_| code.to_string())
469}
470
471fn scan_regex_patterns(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) -> Result<()> {
472    let patterns = [
473        (
474            r"Arc\s*<\s*RwLock\s*<",
475            BullshitKind::OverEngineering,
476            0.86,
477            "Arc<RwLock<...>> is often shared mutable state wearing a tuxedo.",
478            "Try explicit ownership, message passing, or a narrower shared state boundary.",
479        ),
480        (
481            r"Arc\s*<\s*Mutex\s*<",
482            BullshitKind::OverEngineering,
483            0.82,
484            "Arc<Mutex<...>> can be valid, but it is also a classic complexity magnet.",
485            "Check whether ownership can stay local or the locked data can be smaller.",
486        ),
487        (
488            r"Mutex\s*<\s*HashMap\s*<",
489            BullshitKind::MutexAbuse,
490            0.76,
491            "A Mutex<HashMap<...>> is a blunt concurrency primitive.",
492            "Consider sharding, DashMap, or reducing shared mutable state.",
493        ),
494        (
495            r"RwLock\s*<",
496            BullshitKind::RwLockAbuse,
497            0.64,
498            "RwLock adds coordination cost and can hide unclear ownership.",
499            "Use it only when read-heavy sharing is real and measured.",
500        ),
501        (
502            r"\b(std::thread::sleep|tokio::time::sleep)\s*\(",
503            BullshitKind::SleepAbuse,
504            0.78,
505            "Sleep calls are often timing bullshit instead of synchronization.",
506            "Replace sleeps with explicit readiness, timeouts, retries, or test clocks.",
507        ),
508        (
509            r"Arc\s*<\s*(String|Vec\s*<|Box\s*<)",
510            BullshitKind::ArcAbuse,
511            0.62,
512            "Arc<String>, Arc<Vec<...>>, or Arc<Box<...>> wraps a value type in shared ownership — often unnecessary.",
513            "Use Arc<str> instead of Arc<String>, or reconsider whether sharing is needed at all.",
514        ),
515        (
516            r"\b(todo|unimplemented)\s*!\s*\(",
517            BullshitKind::TodoUnimplemented,
518            0.75,
519            "todo!() or unimplemented!() will panic at runtime if reached in production.",
520            "Return a Result or Option instead; replace the placeholder with a real implementation or a meaningful error.",
521        ),
522        (
523            r"RefCell\s*<",
524            BullshitKind::RefCellAbuse,
525            0.60,
526            "RefCell<T> defers borrow checking to runtime — a panic will occur if borrow rules are violated.",
527            "Consider restructuring to use compile-time borrows, or Cell<T> for Copy types.",
528        ),
529    ];
530
531    for (pattern, kind, confidence, why, suggestion) in patterns {
532        let regex = Regex::new(pattern)?;
533        for mat in regex.find_iter(code) {
534            alerts.push(make_alert(
535                kind,
536                confidence,
537                file,
538                code,
539                mat.start(),
540                mat.end(),
541                why,
542                suggestion,
543            ));
544        }
545    }
546
547    Ok(())
548}
549
550fn scan_line_patterns(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) {
551    for (line_idx, line) in code.lines().enumerate() {
552        let trimmed = line.trim();
553
554        if let Some(col) = line.find(".unwrap()") {
555            alerts.push(alert_from_line(
556                BullshitKind::UnwrapAbuse,
557                0.72,
558                file,
559                line_idx + 1,
560                col + 1,
561                line,
562                "unwrap() is a runtime trap dressed up as confidence.",
563                "Replace with .expect(\"reason it can't fail\") for a panic with context, propagate with ?, or handle the None/Err explicitly.",
564            ));
565        }
566
567        let clone_count = line.matches(".clone()").count();
568        if clone_count >= 2 {
569            alerts.push(alert_from_line(
570                BullshitKind::CloneAbuse,
571                (0.60 + clone_count as f32 * 0.08).min(0.92),
572                file,
573                line_idx + 1,
574                line.find(".clone()").unwrap_or(0) + 1,
575                line,
576                "Multiple clone() calls on one line can hide ownership confusion.",
577                "Check whether borrowing, moving, or restructuring removes the copies.",
578            ));
579        }
580
581        let dyn_count = trimmed.matches("dyn ").count();
582        if dyn_count >= 3 {
583            alerts.push(alert_from_line(
584                BullshitKind::DynTraitAbuse,
585                0.80,
586                file,
587                line_idx + 1,
588                line.find("dyn ").unwrap_or(0) + 1,
589                line,
590                "Heavy dyn usage may be abstraction theater.",
591                "Prefer concrete types or generics unless runtime polymorphism is needed.",
592            ));
593        }
594
595        if trimmed.starts_with("use std::collections::{")
596            && trimmed.contains("HashMap")
597            && trimmed.contains("BTreeMap")
598        {
599            alerts.push(alert_from_line(
600                BullshitKind::CargoCult,
601                0.62,
602                file,
603                line_idx + 1,
604                line.find("HashMap").unwrap_or(0) + 1,
605                line,
606                "Broad collection imports can signal cargo-cult scaffolding.",
607                "Import the collection you actually use, or qualify rare uses inline.",
608            ));
609        }
610
611        if line.contains("== true") || line.contains("== false")
612            || line.contains("!= true") || line.contains("!= false")
613        {
614            let col = line.find("== true")
615                .or_else(|| line.find("== false"))
616                .or_else(|| line.find("!= true"))
617                .or_else(|| line.find("!= false"))
618                .unwrap_or(0) + 1;
619            alerts.push(alert_from_line(
620                BullshitKind::BoolComparison,
621                0.68,
622                file,
623                line_idx + 1,
624                col,
625                line,
626                "Comparing a boolean expression to `true` or `false` is redundant.",
627                "Use the expression directly (`if x`) or its negation (`if !x`) instead of `== true` / `== false`.",
628            ));
629        }
630
631        if line.contains(".to_string().as_str()") || line.contains(".to_owned().as_str()") {
632            let col = line.find(".to_string().as_str()")
633                .or_else(|| line.find(".to_owned().as_str()"))
634                .unwrap_or(0) + 1;
635            alerts.push(alert_from_line(
636                BullshitKind::StringAntiPattern,
637                0.74,
638                file,
639                line_idx + 1,
640                col,
641                line,
642                "Converting to String then immediately borrowing as &str creates an unnecessary temporary.",
643                "Use `.as_str()` on an existing String, or pass a `&str` directly without allocating.",
644            ));
645        }
646
647        // .ok() used as a statement silently discards an error
648        if line.trim_end().ends_with(".ok();") || line.contains(").ok();") {
649            let col = line.find(".ok()").unwrap_or(0) + 1;
650            alerts.push(alert_from_line(
651                BullshitKind::DiscardedError,
652                0.76,
653                file,
654                line_idx + 1,
655                col,
656                line,
657                "Calling `.ok()` as a statement silently discards the error variant.",
658                "Propagate with `?`, handle the `Err`, or at minimum log before discarding.",
659            ));
660        }
661
662        // let _ = discards the return value (often a Result)
663        let trimmed_start = line.trim_start();
664        if trimmed_start.starts_with("let _ =")
665            && !trimmed_start.starts_with("let _ = ()")
666            && trimmed_start.contains('(')
667        {
668            let col = line.find("let _ =").unwrap_or(0) + 1;
669            alerts.push(alert_from_line(
670                BullshitKind::DiscardedError,
671                0.65,
672                file,
673                line_idx + 1,
674                col,
675                line,
676                "`let _ = expr` silently ignores the return value — likely a discarded Result or error.",
677                "Handle the value explicitly, use `drop()` with a comment explaining why, or propagate.",
678            ));
679        }
680
681        if line.contains("from_utf8_lossy(") {
682            let col = line.find("from_utf8_lossy(").unwrap_or(0) + 1;
683            alerts.push(alert_from_line(
684                BullshitKind::LossyUtf8,
685                0.70,
686                file,
687                line_idx + 1,
688                col,
689                line,
690                "`from_utf8_lossy` silently replaces invalid UTF-8 bytes with U+FFFD, corrupting binary data.",
691                "Use `from_utf8` and handle the error, or work with raw bytes via `OsStr` / `io::Write::write_all`.",
692            ));
693        }
694    }
695}
696
697fn scan_function_complexity(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) {
698    let lines: Vec<&str> = code.lines().collect();
699    let mut idx = 0;
700
701    while idx < lines.len() {
702        let line = lines[idx];
703        if !looks_like_fn_start(line) {
704            idx += 1;
705            continue;
706        }
707
708        let start_line = idx + 1;
709        let mut brace_balance = 0isize;
710        let mut saw_body = false;
711        let mut complexity = 0usize;
712        let mut end_idx = idx;
713
714        while end_idx < lines.len() {
715            let current = lines[end_idx];
716            complexity += line_complexity(current);
717            for ch in current.chars() {
718                if ch == '{' {
719                    saw_body = true;
720                    brace_balance += 1;
721                } else if ch == '}' {
722                    brace_balance -= 1;
723                }
724            }
725            if saw_body && brace_balance <= 0 {
726                break;
727            }
728            end_idx += 1;
729        }
730
731        if saw_body && complexity >= 6 {
732            let confidence = (complexity as f32 / 24.0).clamp(0.66, 0.95);
733            alerts.push(alert_from_line(
734                BullshitKind::FakeComplexity,
735                confidence,
736                file,
737                start_line,
738                line.find("fn").unwrap_or(0) + 1,
739                line,
740                &format!(
741                    "Function complexity score is {complexity}; this smells like fake complexity."
742                ),
743                "Split the function around decisions, loops, and side effects.",
744            ));
745        }
746
747        idx = end_idx.saturating_add(1);
748    }
749}
750
751fn looks_like_fn_start(line: &str) -> bool {
752    let trimmed = line.trim_start();
753    trimmed.starts_with("fn ")
754        || trimmed.starts_with("pub fn ")
755        || trimmed.starts_with("pub(crate) fn ")
756        || trimmed.starts_with("async fn ")
757        || trimmed.starts_with("pub async fn ")
758}
759
760fn line_complexity(line: &str) -> usize {
761    let mut score = 0;
762    let trimmed = line.trim_start();
763    for token in [
764        "if ", "if(", "match ", "for ", "while ", "loop ", "&&", "||",
765    ] {
766        score += line.matches(token).count();
767    }
768    if trimmed.starts_with("if(") {
769        score += 1;
770    }
771    score += line.matches("?;").count();
772    score += line.matches(".unwrap()").count() * 2;
773    score
774}
775
776#[allow(clippy::too_many_arguments)]
777fn make_alert(
778    kind: BullshitKind,
779    confidence: f32,
780    file: &Path,
781    code: &str,
782    start: usize,
783    end: usize,
784    why_bs: &str,
785    suggestion: &str,
786) -> BullshitAlert {
787    let (line, column) = line_column(code, start);
788    BullshitAlert {
789        kind,
790        confidence,
791        severity: confidence,
792        file: file.to_path_buf(),
793        line,
794        column,
795        context_snippet: snippet(code, start, end),
796        why_bs: why_bs.to_string(),
797        suggestion: suggestion.to_string(),
798    }
799}
800
801#[allow(clippy::too_many_arguments)]
802fn alert_from_line(
803    kind: BullshitKind,
804    confidence: f32,
805    file: &Path,
806    line: usize,
807    column: usize,
808    context: &str,
809    why_bs: &str,
810    suggestion: &str,
811) -> BullshitAlert {
812    BullshitAlert {
813        kind,
814        confidence,
815        severity: confidence,
816        file: file.to_path_buf(),
817        line,
818        column,
819        context_snippet: context.trim().to_string(),
820        why_bs: why_bs.to_string(),
821        suggestion: suggestion.to_string(),
822    }
823}
824
825fn line_column(code: &str, byte_pos: usize) -> (usize, usize) {
826    let mut line = 1;
827    let mut col = 1;
828
829    for (idx, ch) in code.char_indices() {
830        if idx >= byte_pos {
831            break;
832        }
833        if ch == '\n' {
834            line += 1;
835            col = 1;
836        } else {
837            col += 1;
838        }
839    }
840
841    (line, col)
842}
843
844fn snippet(code: &str, start: usize, end: usize) -> String {
845    let line_start = code[..start].rfind('\n').map_or(0, |idx| idx + 1);
846    let line_end = code[end..].find('\n').map_or(code.len(), |idx| end + idx);
847    code[line_start..line_end].trim().to_string()
848}
849
850fn dedupe_alerts(alerts: &mut Vec<BullshitAlert>) {
851    alerts.sort_by(|a, b| {
852        a.file
853            .cmp(&b.file)
854            .then_with(|| a.line.cmp(&b.line))
855            .then_with(|| a.column.cmp(&b.column))
856            .then_with(|| format!("{:?}", a.kind).cmp(&format!("{:?}", b.kind)))
857    });
858    alerts.dedup_by(|a, b| {
859        a.file == b.file && a.line == b.line && a.column == b.column && a.kind == b.kind
860    });
861}
862
863pub fn kind_label(kind: BullshitKind) -> &'static str {
864    kind.label()
865}
866
867#[cfg(test)]
868mod tests {
869    use super::*;
870
871    fn config() -> CodeAuditConfig {
872        CodeAuditConfig::default()
873    }
874
875    #[test]
876    fn detects_unwrap_and_sleep() {
877        let code = r#"
878fn main() {
879    let value = thing().unwrap();
880    std::thread::sleep(std::time::Duration::from_millis(10));
881}
882"#;
883        let alerts = scan_code(code, "src/main.rs", &config()).unwrap();
884        assert!(alerts.iter().any(|a| a.kind == BullshitKind::UnwrapAbuse));
885        assert!(alerts.iter().any(|a| a.kind == BullshitKind::SleepAbuse));
886    }
887
888    #[test]
889    fn detects_shared_mutable_state() {
890        let code = "type Store = Arc<RwLock<HashMap<String, String>>>;";
891        let alerts = scan_code(code, "src/lib.rs", &config()).unwrap();
892        assert!(alerts
893            .iter()
894            .any(|a| a.kind == BullshitKind::OverEngineering));
895    }
896
897    #[test]
898    fn detects_fake_complexity() {
899        let code = r#"
900fn tangled(x: usize) -> usize {
901    if x > 1 { if x > 2 { if x > 3 { if x > 4 { if x > 5 { return x; }}}}}
902    match x { 0 => 1, 1 => 2, _ => 3 }
903}
904"#;
905        let alerts = scan_code(code, "src/lib.rs", &config()).unwrap();
906        assert!(alerts
907            .iter()
908            .any(|a| a.kind == BullshitKind::FakeComplexity));
909    }
910
911    #[test]
912    fn ignores_patterns_in_strings_and_comments() {
913        let code = r#"
914fn main() {
915    let text = "Arc<RwLock<HashMap<String, String>>> and thing().unwrap()";
916    // std::thread::sleep(std::time::Duration::from_millis(10));
917}
918"#;
919        let alerts = scan_code(code, "src/main.rs", &config()).unwrap();
920        assert!(
921            alerts.is_empty(),
922            "strings/comments should not produce bullshit alerts: {alerts:?}"
923        );
924    }
925
926    #[test]
927    fn policy_suppresses_kind_and_path() {
928        let mut cfg = config();
929        cfg.ignore_kinds.insert("UnwrapAbuse".to_string());
930        let alerts = scan_code("fn main() { thing().unwrap(); }", "src/main.rs", &cfg).unwrap();
931        assert!(alerts.is_empty());
932
933        let mut cfg = config();
934        cfg.ignore_paths.push("generated".to_string());
935        let alerts = scan_code(
936            "fn main() { thing().unwrap(); }",
937            "src/generated/main.rs",
938            &cfg,
939        )
940        .unwrap();
941        assert!(alerts.is_empty());
942    }
943
944    #[test]
945    fn parses_diff_changed_ranges() {
946        let diff = r#"diff --git a/src/main.rs b/src/main.rs
947index 111..222 100644
948--- a/src/main.rs
949+++ b/src/main.rs
950@@ -1,0 +2,3 @@
951+fn main() {
952+    thing().unwrap();
953+}
954"#;
955        let changed = parse_changed_lines(diff);
956        assert_eq!(changed.get(Path::new("src/main.rs")), Some(&vec![(2, 4)]));
957    }
958}