1use std::collections::{HashMap, HashSet};
4use std::fs;
5use std::path::{Path, PathBuf};
6use std::process::Command;
7
8use anyhow::{bail, Context, Result};
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use tree_sitter::{Node, Parser};
12
13const MAX_FILE_BYTES: u64 = 1024 * 1024;
14
15#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
16pub enum BullshitKind {
17 FakeComplexity,
18 CargoCult,
19 OverEngineering,
20 ArcAbuse,
21 RwLockAbuse,
22 SleepAbuse,
23 UnwrapAbuse,
24 DynTraitAbuse,
25 CloneAbuse,
26 MutexAbuse,
27 TodoUnimplemented,
28 RefCellAbuse,
29 BoolComparison,
30 StringAntiPattern,
31 DiscardedError,
32 LossyUtf8,
33}
34
35impl BullshitKind {
36 fn label(self) -> &'static str {
37 match self {
38 Self::FakeComplexity => "fake complexity",
39 Self::CargoCult => "cargo cult",
40 Self::OverEngineering => "over-engineering",
41 Self::ArcAbuse => "Arc abuse",
42 Self::RwLockAbuse => "RwLock abuse",
43 Self::SleepAbuse => "sleep abuse",
44 Self::UnwrapAbuse => "unwrap abuse",
45 Self::DynTraitAbuse => "dyn trait abuse",
46 Self::CloneAbuse => "clone abuse",
47 Self::MutexAbuse => "mutex abuse",
48 Self::TodoUnimplemented => "todo/unimplemented",
49 Self::RefCellAbuse => "RefCell abuse",
50 Self::BoolComparison => "redundant bool comparison",
51 Self::StringAntiPattern => "string anti-pattern",
52 Self::DiscardedError => "discarded error",
53 Self::LossyUtf8 => "lossy UTF-8 conversion",
54 }
55 }
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct BullshitAlert {
60 pub kind: BullshitKind,
61 pub confidence: f32,
62 pub severity: f32,
63 pub file: PathBuf,
64 pub line: usize,
65 pub column: usize,
66 pub context_snippet: String,
67 pub why_bs: String,
68 pub suggestion: String,
69}
70
71#[derive(Debug, Clone)]
72pub struct CodeAuditConfig {
73 pub confidence_threshold: f32,
74 pub max_file_bytes: u64,
75 pub ignore_paths: Vec<String>,
76 pub ignore_kinds: HashSet<String>,
77 pub include_tests: bool,
79}
80
81impl Default for CodeAuditConfig {
82 fn default() -> Self {
83 Self {
84 confidence_threshold: 0.60,
85 max_file_bytes: MAX_FILE_BYTES,
86 ignore_paths: Vec::new(),
87 ignore_kinds: HashSet::new(),
88 include_tests: false,
89 }
90 }
91}
92
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct CodeAuditReport {
95 pub files_scanned: usize,
96 pub alerts: Vec<BullshitAlert>,
97}
98
99impl CodeAuditReport {
100 pub fn is_clean(&self) -> bool {
101 self.alerts.is_empty()
102 }
103}
104
105pub fn merge_reports(reports: Vec<CodeAuditReport>) -> CodeAuditReport {
107 let mut files_scanned = 0usize;
108 let mut alerts = Vec::new();
109 for r in reports {
110 files_scanned += r.files_scanned;
111 alerts.extend(r.alerts);
112 }
113 CodeAuditReport {
114 files_scanned,
115 alerts,
116 }
117}
118
119pub fn scan_project(
120 manifest_path: Option<&Path>,
121 config: &CodeAuditConfig,
122) -> Result<CodeAuditReport> {
123 scan_project_with_filter(manifest_path, config, None)
124}
125
126pub fn scan_git_diff(
127 manifest_path: Option<&Path>,
128 config: &CodeAuditConfig,
129) -> Result<CodeAuditReport> {
130 let base_dir = project_base_dir(manifest_path);
131 let filter = DiffFilter::from_git_diff(base_dir)?;
132 scan_project_with_filter(manifest_path, config, Some(&filter))
133}
134
135fn scan_project_with_filter(
136 manifest_path: Option<&Path>,
137 config: &CodeAuditConfig,
138 diff_filter: Option<&DiffFilter>,
139) -> Result<CodeAuditReport> {
140 let base_dir = manifest_path
141 .and_then(Path::parent)
142 .filter(|p| !p.as_os_str().is_empty())
143 .unwrap_or_else(|| Path::new("."));
144
145 let mut files = Vec::new();
146 let src_dir = base_dir.join("src");
147 if src_dir.is_dir() {
148 collect_rust_files(&src_dir, config, &mut files)?;
149 if config.include_tests {
150 for dir in &["tests", "examples", "benches"] {
151 collect_rust_files(&base_dir.join(dir), config, &mut files)?;
152 }
153 }
154 } else {
155 collect_rust_files(base_dir, config, &mut files)?;
157 }
158
159 let mut alerts = Vec::new();
160 for file in &files {
161 if is_ignored_path(file, config) {
162 continue;
163 }
164 let code = fs::read_to_string(file)
165 .with_context(|| format!("failed to read {}", file.display()))?;
166 let mut file_alerts = scan_code(&code, file, config)?;
167 if let Some(filter) = diff_filter {
168 file_alerts.retain(|alert| filter.includes(alert));
169 }
170 alerts.extend(file_alerts);
171 }
172
173 alerts.sort_by(|a, b| {
174 b.severity
175 .partial_cmp(&a.severity)
176 .unwrap_or(std::cmp::Ordering::Equal)
177 .then_with(|| a.file.cmp(&b.file))
178 .then_with(|| a.line.cmp(&b.line))
179 });
180
181 Ok(CodeAuditReport {
182 files_scanned: files.len(),
183 alerts,
184 })
185}
186
187pub fn scan_code(
188 code: &str,
189 file: impl Into<PathBuf>,
190 config: &CodeAuditConfig,
191) -> Result<Vec<BullshitAlert>> {
192 let file = file.into();
193 if is_ignored_path(&file, config) {
194 return Ok(Vec::new());
195 }
196
197 let ignored_ranges = parse_ignored_ranges(code).unwrap_or_default();
198 let masked = mask_ranges(code, &ignored_ranges);
199 let mut alerts = Vec::new();
200
201 scan_regex_patterns(&masked, &file, &mut alerts)?;
202 scan_line_patterns(&masked, &file, &mut alerts);
203 scan_function_complexity(&masked, &file, &mut alerts);
204
205 alerts.retain(|alert| alert.confidence >= config.confidence_threshold);
206 alerts.retain(|alert| !config.ignore_kinds.contains(&format!("{:?}", alert.kind)));
207 dedupe_alerts(&mut alerts);
208 Ok(alerts)
209}
210
211pub fn config_from_policy(policy: Option<&crate::policy::Policy>) -> CodeAuditConfig {
212 let mut config = CodeAuditConfig::default();
213 if let Some(policy) = policy {
214 config.ignore_paths = policy.code_audit.ignore_paths.clone();
215 config.ignore_kinds = policy.code_audit.ignore_kinds.iter().cloned().collect();
216 if policy.settings.min_confidence > 0.0 {
217 config.confidence_threshold = policy.settings.min_confidence as f32;
218 }
219 if policy.code_audit.include_tests {
220 config.include_tests = true;
221 }
222 }
223 config
224}
225
226fn project_base_dir(manifest_path: Option<&Path>) -> &Path {
227 manifest_path
228 .and_then(Path::parent)
229 .filter(|p| !p.as_os_str().is_empty())
230 .unwrap_or_else(|| Path::new("."))
231}
232
233fn is_ignored_path(path: &Path, config: &CodeAuditConfig) -> bool {
234 let path = path.to_string_lossy();
235 config
236 .ignore_paths
237 .iter()
238 .any(|pattern| path.contains(pattern))
239}
240
241fn collect_rust_files(
242 dir: &Path,
243 config: &CodeAuditConfig,
244 files: &mut Vec<PathBuf>,
245) -> Result<()> {
246 if !dir.exists() {
247 return Ok(());
248 }
249
250 for entry in fs::read_dir(dir).with_context(|| format!("failed to read {}", dir.display()))? {
251 let entry = entry?;
252 let path = entry.path();
253 let name = entry.file_name();
254 let name = name.to_string_lossy();
255
256 if path.is_dir() {
257 if should_skip_dir(&name) {
258 continue;
259 }
260 collect_rust_files(&path, config, files)?;
261 continue;
262 }
263
264 if path.extension().and_then(|e| e.to_str()) != Some("rs") {
265 continue;
266 }
267
268 let metadata = entry.metadata()?;
269 if metadata.len() <= config.max_file_bytes {
270 files.push(path);
271 }
272 }
273
274 Ok(())
275}
276
277fn should_skip_dir(name: &str) -> bool {
278 name.starts_with('.')
279 || matches!(
280 name,
281 "target" | "vendor" | "node_modules" | "dist" | "build" | "third_party"
282 )
283}
284
285#[derive(Debug)]
286struct DiffFilter {
287 base_dir: PathBuf,
288 changed_lines: HashMap<PathBuf, Vec<(usize, usize)>>,
289}
290
291impl DiffFilter {
292 fn from_git_diff(base_dir: &Path) -> Result<Self> {
293 let output = Command::new("git")
294 .arg("-C")
295 .arg(base_dir)
296 .arg("diff")
297 .arg("HEAD")
298 .arg("--unified=0")
299 .arg("--")
300 .output()
301 .with_context(|| "failed to run git diff HEAD --unified=0")?;
302
303 if !output.status.success() {
304 bail!(
305 "git diff failed: {}",
306 String::from_utf8_lossy(&output.stderr).trim()
307 );
308 }
309
310 Ok(Self {
311 base_dir: base_dir.to_path_buf(),
312 changed_lines: parse_changed_lines(&String::from_utf8_lossy(&output.stdout)),
313 })
314 }
315
316 fn includes(&self, alert: &BullshitAlert) -> bool {
317 let path = alert
318 .file
319 .strip_prefix(&self.base_dir)
320 .map(Path::to_path_buf)
321 .unwrap_or_else(|_| alert.file.clone());
322 let path = normalize_diff_path(&path);
323 self.changed_lines.get(&path).is_some_and(|ranges| {
324 ranges
325 .iter()
326 .any(|(start, end)| alert.line >= *start && alert.line <= *end)
327 })
328 }
329}
330
331fn parse_changed_lines(diff: &str) -> HashMap<PathBuf, Vec<(usize, usize)>> {
332 let mut current_file: Option<PathBuf> = None;
333 let mut changed = HashMap::<PathBuf, Vec<(usize, usize)>>::new();
334
335 for line in diff.lines() {
336 if let Some(path) = line.strip_prefix("+++ b/") {
337 current_file = Some(PathBuf::from(path));
338 continue;
339 }
340 if line.starts_with("+++ /dev/null") {
341 current_file = None;
342 continue;
343 }
344
345 if let (Some(file), Some(range)) = (current_file.as_ref(), parse_hunk_new_range(line)) {
346 changed.entry(file.clone()).or_default().push(range);
347 }
348 }
349
350 changed
351}
352
353fn parse_hunk_new_range(line: &str) -> Option<(usize, usize)> {
354 let hunk = line.strip_prefix("@@ ")?;
355 let plus = hunk.split_whitespace().find(|part| part.starts_with('+'))?;
356 let plus = plus.trim_start_matches('+');
357 let (start, count) = plus
358 .split_once(',')
359 .map(|(start, count)| (start, count.parse::<usize>().ok()))
360 .unwrap_or((plus, Some(1)));
361 let start = start.parse::<usize>().ok()?;
362 let count = count?;
363 if count == 0 {
364 None
365 } else {
366 Some((start, start + count - 1))
367 }
368}
369
370fn normalize_diff_path(path: &Path) -> PathBuf {
371 let mut normalized = PathBuf::new();
372 for component in path.components() {
373 match component {
374 std::path::Component::CurDir => {}
375 other => normalized.push(other.as_os_str()),
376 }
377 }
378 normalized
379}
380
381fn parse_ignored_ranges(code: &str) -> Result<Vec<(usize, usize)>> {
382 let mut parser = Parser::new();
383 parser
384 .set_language(&tree_sitter_rust::LANGUAGE.into())
385 .map_err(|err| anyhow::anyhow!("failed to load Rust tree-sitter grammar: {err}"))?;
386 let tree = parser
387 .parse(code, None)
388 .ok_or_else(|| anyhow::anyhow!("tree-sitter failed to parse Rust source"))?;
389
390 let mut ranges = Vec::new();
391 collect_ignored_ranges(tree.root_node(), code.as_bytes(), &mut ranges);
392 Ok(ranges)
393}
394
395fn collect_ignored_ranges(node: Node<'_>, code: &[u8], ranges: &mut Vec<(usize, usize)>) {
396 let kind = node.kind();
397
398 if matches!(
400 kind,
401 "line_comment" | "block_comment" | "string_literal" | "raw_string_literal" | "char_literal"
402 ) {
403 ranges.push((node.start_byte(), node.end_byte()));
404 return;
405 }
406
407 if matches!(kind, "source_file" | "declaration_list") {
410 let children: Vec<Node<'_>> = {
411 let mut cursor = node.walk();
412 node.children(&mut cursor).collect()
413 };
414 let mut i = 0;
415 while i < children.len() {
416 let child = children[i];
417 if child.kind() == "attribute_item" && is_test_attr(child, code) {
418 ranges.push((child.start_byte(), child.end_byte()));
421 let mut j = i + 1;
422 while j < children.len() {
423 let next = children[j];
424 if next.kind() == "attribute_item" {
425 ranges.push((next.start_byte(), next.end_byte()));
426 } else {
427 ranges.push((next.start_byte(), next.end_byte()));
429 i = j;
430 break;
431 }
432 j += 1;
433 }
434 } else {
435 collect_ignored_ranges(child, code, ranges);
436 }
437 i += 1;
438 }
439 return;
440 }
441
442 let mut cursor = node.walk();
443 for child in node.children(&mut cursor) {
444 collect_ignored_ranges(child, code, ranges);
445 }
446}
447
448fn is_test_attr(node: Node<'_>, code: &[u8]) -> bool {
449 if let Ok(text) = std::str::from_utf8(&code[node.start_byte()..node.end_byte()]) {
450 let t: String = text.chars().filter(|c| !c.is_whitespace()).collect();
451 t == "#[test]" || t.contains("#[cfg(test)]") || t.contains("#[cfg(any(test")
452 } else {
453 false
454 }
455}
456
457fn mask_ranges(code: &str, ranges: &[(usize, usize)]) -> String {
458 let mut bytes = code.as_bytes().to_vec();
459 for (start, end) in ranges {
460 for idx in *start..*end {
461 if let Some(byte) = bytes.get_mut(idx) {
462 if *byte != b'\n' {
463 *byte = b' ';
464 }
465 }
466 }
467 }
468 String::from_utf8(bytes).unwrap_or_else(|_| code.to_string())
469}
470
471fn scan_regex_patterns(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) -> Result<()> {
472 let patterns = [
473 (
474 r"Arc\s*<\s*RwLock\s*<",
475 BullshitKind::OverEngineering,
476 0.86,
477 "Arc<RwLock<...>> is often shared mutable state wearing a tuxedo.",
478 "Try explicit ownership, message passing, or a narrower shared state boundary.",
479 ),
480 (
481 r"Arc\s*<\s*Mutex\s*<",
482 BullshitKind::OverEngineering,
483 0.82,
484 "Arc<Mutex<...>> can be valid, but it is also a classic complexity magnet.",
485 "Check whether ownership can stay local or the locked data can be smaller.",
486 ),
487 (
488 r"Mutex\s*<\s*HashMap\s*<",
489 BullshitKind::MutexAbuse,
490 0.76,
491 "A Mutex<HashMap<...>> is a blunt concurrency primitive.",
492 "Consider sharding, DashMap, or reducing shared mutable state.",
493 ),
494 (
495 r"RwLock\s*<",
496 BullshitKind::RwLockAbuse,
497 0.64,
498 "RwLock adds coordination cost and can hide unclear ownership.",
499 "Use it only when read-heavy sharing is real and measured.",
500 ),
501 (
502 r"\b(std::thread::sleep|tokio::time::sleep)\s*\(",
503 BullshitKind::SleepAbuse,
504 0.78,
505 "Sleep calls are often timing bullshit instead of synchronization.",
506 "Replace sleeps with explicit readiness, timeouts, retries, or test clocks.",
507 ),
508 (
509 r"Arc\s*<\s*(String|Vec\s*<|Box\s*<)",
510 BullshitKind::ArcAbuse,
511 0.62,
512 "Arc<String>, Arc<Vec<...>>, or Arc<Box<...>> wraps a value type in shared ownership — often unnecessary.",
513 "Use Arc<str> instead of Arc<String>, or reconsider whether sharing is needed at all.",
514 ),
515 (
516 r"\b(todo|unimplemented)\s*!\s*\(",
517 BullshitKind::TodoUnimplemented,
518 0.75,
519 "todo!() or unimplemented!() will panic at runtime if reached in production.",
520 "Return a Result or Option instead; replace the placeholder with a real implementation or a meaningful error.",
521 ),
522 (
523 r"RefCell\s*<",
524 BullshitKind::RefCellAbuse,
525 0.60,
526 "RefCell<T> defers borrow checking to runtime — a panic will occur if borrow rules are violated.",
527 "Consider restructuring to use compile-time borrows, or Cell<T> for Copy types.",
528 ),
529 ];
530
531 for (pattern, kind, confidence, why, suggestion) in patterns {
532 let regex = Regex::new(pattern)?;
533 for mat in regex.find_iter(code) {
534 alerts.push(make_alert(
535 kind,
536 confidence,
537 file,
538 code,
539 mat.start(),
540 mat.end(),
541 why,
542 suggestion,
543 ));
544 }
545 }
546
547 Ok(())
548}
549
550fn scan_line_patterns(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) {
551 for (line_idx, line) in code.lines().enumerate() {
552 let trimmed = line.trim();
553
554 if let Some(col) = line.find(".unwrap()") {
555 alerts.push(alert_from_line(
556 BullshitKind::UnwrapAbuse,
557 0.72,
558 file,
559 line_idx + 1,
560 col + 1,
561 line,
562 "unwrap() is a runtime trap dressed up as confidence.",
563 "Replace with .expect(\"reason it can't fail\") for a panic with context, propagate with ?, or handle the None/Err explicitly.",
564 ));
565 }
566
567 let clone_count = line.matches(".clone()").count();
568 if clone_count >= 2 {
569 alerts.push(alert_from_line(
570 BullshitKind::CloneAbuse,
571 (0.60 + clone_count as f32 * 0.08).min(0.92),
572 file,
573 line_idx + 1,
574 line.find(".clone()").unwrap_or(0) + 1,
575 line,
576 "Multiple clone() calls on one line can hide ownership confusion.",
577 "Check whether borrowing, moving, or restructuring removes the copies.",
578 ));
579 }
580
581 let dyn_count = trimmed.matches("dyn ").count();
582 if dyn_count >= 3 {
583 alerts.push(alert_from_line(
584 BullshitKind::DynTraitAbuse,
585 0.80,
586 file,
587 line_idx + 1,
588 line.find("dyn ").unwrap_or(0) + 1,
589 line,
590 "Heavy dyn usage may be abstraction theater.",
591 "Prefer concrete types or generics unless runtime polymorphism is needed.",
592 ));
593 }
594
595 if trimmed.starts_with("use std::collections::{")
596 && trimmed.contains("HashMap")
597 && trimmed.contains("BTreeMap")
598 {
599 alerts.push(alert_from_line(
600 BullshitKind::CargoCult,
601 0.62,
602 file,
603 line_idx + 1,
604 line.find("HashMap").unwrap_or(0) + 1,
605 line,
606 "Broad collection imports can signal cargo-cult scaffolding.",
607 "Import the collection you actually use, or qualify rare uses inline.",
608 ));
609 }
610
611 if line.contains("== true") || line.contains("== false")
612 || line.contains("!= true") || line.contains("!= false")
613 {
614 let col = line.find("== true")
615 .or_else(|| line.find("== false"))
616 .or_else(|| line.find("!= true"))
617 .or_else(|| line.find("!= false"))
618 .unwrap_or(0) + 1;
619 alerts.push(alert_from_line(
620 BullshitKind::BoolComparison,
621 0.68,
622 file,
623 line_idx + 1,
624 col,
625 line,
626 "Comparing a boolean expression to `true` or `false` is redundant.",
627 "Use the expression directly (`if x`) or its negation (`if !x`) instead of `== true` / `== false`.",
628 ));
629 }
630
631 if line.contains(".to_string().as_str()") || line.contains(".to_owned().as_str()") {
632 let col = line.find(".to_string().as_str()")
633 .or_else(|| line.find(".to_owned().as_str()"))
634 .unwrap_or(0) + 1;
635 alerts.push(alert_from_line(
636 BullshitKind::StringAntiPattern,
637 0.74,
638 file,
639 line_idx + 1,
640 col,
641 line,
642 "Converting to String then immediately borrowing as &str creates an unnecessary temporary.",
643 "Use `.as_str()` on an existing String, or pass a `&str` directly without allocating.",
644 ));
645 }
646
647 if line.trim_end().ends_with(".ok();") || line.contains(").ok();") {
649 let col = line.find(".ok()").unwrap_or(0) + 1;
650 alerts.push(alert_from_line(
651 BullshitKind::DiscardedError,
652 0.76,
653 file,
654 line_idx + 1,
655 col,
656 line,
657 "Calling `.ok()` as a statement silently discards the error variant.",
658 "Propagate with `?`, handle the `Err`, or at minimum log before discarding.",
659 ));
660 }
661
662 let trimmed_start = line.trim_start();
664 if trimmed_start.starts_with("let _ =")
665 && !trimmed_start.starts_with("let _ = ()")
666 && trimmed_start.contains('(')
667 {
668 let col = line.find("let _ =").unwrap_or(0) + 1;
669 alerts.push(alert_from_line(
670 BullshitKind::DiscardedError,
671 0.65,
672 file,
673 line_idx + 1,
674 col,
675 line,
676 "`let _ = expr` silently ignores the return value — likely a discarded Result or error.",
677 "Handle the value explicitly, use `drop()` with a comment explaining why, or propagate.",
678 ));
679 }
680
681 if line.contains("from_utf8_lossy(") {
682 let col = line.find("from_utf8_lossy(").unwrap_or(0) + 1;
683 alerts.push(alert_from_line(
684 BullshitKind::LossyUtf8,
685 0.70,
686 file,
687 line_idx + 1,
688 col,
689 line,
690 "`from_utf8_lossy` silently replaces invalid UTF-8 bytes with U+FFFD, corrupting binary data.",
691 "Use `from_utf8` and handle the error, or work with raw bytes via `OsStr` / `io::Write::write_all`.",
692 ));
693 }
694 }
695}
696
697fn scan_function_complexity(code: &str, file: &Path, alerts: &mut Vec<BullshitAlert>) {
698 let lines: Vec<&str> = code.lines().collect();
699 let mut idx = 0;
700
701 while idx < lines.len() {
702 let line = lines[idx];
703 if !looks_like_fn_start(line) {
704 idx += 1;
705 continue;
706 }
707
708 let start_line = idx + 1;
709 let mut brace_balance = 0isize;
710 let mut saw_body = false;
711 let mut complexity = 0usize;
712 let mut end_idx = idx;
713
714 while end_idx < lines.len() {
715 let current = lines[end_idx];
716 complexity += line_complexity(current);
717 for ch in current.chars() {
718 if ch == '{' {
719 saw_body = true;
720 brace_balance += 1;
721 } else if ch == '}' {
722 brace_balance -= 1;
723 }
724 }
725 if saw_body && brace_balance <= 0 {
726 break;
727 }
728 end_idx += 1;
729 }
730
731 if saw_body && complexity >= 6 {
732 let confidence = (complexity as f32 / 24.0).clamp(0.66, 0.95);
733 alerts.push(alert_from_line(
734 BullshitKind::FakeComplexity,
735 confidence,
736 file,
737 start_line,
738 line.find("fn").unwrap_or(0) + 1,
739 line,
740 &format!(
741 "Function complexity score is {complexity}; this smells like fake complexity."
742 ),
743 "Split the function around decisions, loops, and side effects.",
744 ));
745 }
746
747 idx = end_idx.saturating_add(1);
748 }
749}
750
751fn looks_like_fn_start(line: &str) -> bool {
752 let trimmed = line.trim_start();
753 trimmed.starts_with("fn ")
754 || trimmed.starts_with("pub fn ")
755 || trimmed.starts_with("pub(crate) fn ")
756 || trimmed.starts_with("async fn ")
757 || trimmed.starts_with("pub async fn ")
758}
759
760fn line_complexity(line: &str) -> usize {
761 let mut score = 0;
762 let trimmed = line.trim_start();
763 for token in [
764 "if ", "if(", "match ", "for ", "while ", "loop ", "&&", "||",
765 ] {
766 score += line.matches(token).count();
767 }
768 if trimmed.starts_with("if(") {
769 score += 1;
770 }
771 score += line.matches("?;").count();
772 score += line.matches(".unwrap()").count() * 2;
773 score
774}
775
776#[allow(clippy::too_many_arguments)]
777fn make_alert(
778 kind: BullshitKind,
779 confidence: f32,
780 file: &Path,
781 code: &str,
782 start: usize,
783 end: usize,
784 why_bs: &str,
785 suggestion: &str,
786) -> BullshitAlert {
787 let (line, column) = line_column(code, start);
788 BullshitAlert {
789 kind,
790 confidence,
791 severity: confidence,
792 file: file.to_path_buf(),
793 line,
794 column,
795 context_snippet: snippet(code, start, end),
796 why_bs: why_bs.to_string(),
797 suggestion: suggestion.to_string(),
798 }
799}
800
801#[allow(clippy::too_many_arguments)]
802fn alert_from_line(
803 kind: BullshitKind,
804 confidence: f32,
805 file: &Path,
806 line: usize,
807 column: usize,
808 context: &str,
809 why_bs: &str,
810 suggestion: &str,
811) -> BullshitAlert {
812 BullshitAlert {
813 kind,
814 confidence,
815 severity: confidence,
816 file: file.to_path_buf(),
817 line,
818 column,
819 context_snippet: context.trim().to_string(),
820 why_bs: why_bs.to_string(),
821 suggestion: suggestion.to_string(),
822 }
823}
824
825fn line_column(code: &str, byte_pos: usize) -> (usize, usize) {
826 let mut line = 1;
827 let mut col = 1;
828
829 for (idx, ch) in code.char_indices() {
830 if idx >= byte_pos {
831 break;
832 }
833 if ch == '\n' {
834 line += 1;
835 col = 1;
836 } else {
837 col += 1;
838 }
839 }
840
841 (line, col)
842}
843
844fn snippet(code: &str, start: usize, end: usize) -> String {
845 let line_start = code[..start].rfind('\n').map_or(0, |idx| idx + 1);
846 let line_end = code[end..].find('\n').map_or(code.len(), |idx| end + idx);
847 code[line_start..line_end].trim().to_string()
848}
849
850fn dedupe_alerts(alerts: &mut Vec<BullshitAlert>) {
851 alerts.sort_by(|a, b| {
852 a.file
853 .cmp(&b.file)
854 .then_with(|| a.line.cmp(&b.line))
855 .then_with(|| a.column.cmp(&b.column))
856 .then_with(|| format!("{:?}", a.kind).cmp(&format!("{:?}", b.kind)))
857 });
858 alerts.dedup_by(|a, b| {
859 a.file == b.file && a.line == b.line && a.column == b.column && a.kind == b.kind
860 });
861}
862
863pub fn kind_label(kind: BullshitKind) -> &'static str {
864 kind.label()
865}
866
867#[cfg(test)]
868mod tests {
869 use super::*;
870
871 fn config() -> CodeAuditConfig {
872 CodeAuditConfig::default()
873 }
874
875 #[test]
876 fn detects_unwrap_and_sleep() {
877 let code = r#"
878fn main() {
879 let value = thing().unwrap();
880 std::thread::sleep(std::time::Duration::from_millis(10));
881}
882"#;
883 let alerts = scan_code(code, "src/main.rs", &config()).unwrap();
884 assert!(alerts.iter().any(|a| a.kind == BullshitKind::UnwrapAbuse));
885 assert!(alerts.iter().any(|a| a.kind == BullshitKind::SleepAbuse));
886 }
887
888 #[test]
889 fn detects_shared_mutable_state() {
890 let code = "type Store = Arc<RwLock<HashMap<String, String>>>;";
891 let alerts = scan_code(code, "src/lib.rs", &config()).unwrap();
892 assert!(alerts
893 .iter()
894 .any(|a| a.kind == BullshitKind::OverEngineering));
895 }
896
897 #[test]
898 fn detects_fake_complexity() {
899 let code = r#"
900fn tangled(x: usize) -> usize {
901 if x > 1 { if x > 2 { if x > 3 { if x > 4 { if x > 5 { return x; }}}}}
902 match x { 0 => 1, 1 => 2, _ => 3 }
903}
904"#;
905 let alerts = scan_code(code, "src/lib.rs", &config()).unwrap();
906 assert!(alerts
907 .iter()
908 .any(|a| a.kind == BullshitKind::FakeComplexity));
909 }
910
911 #[test]
912 fn ignores_patterns_in_strings_and_comments() {
913 let code = r#"
914fn main() {
915 let text = "Arc<RwLock<HashMap<String, String>>> and thing().unwrap()";
916 // std::thread::sleep(std::time::Duration::from_millis(10));
917}
918"#;
919 let alerts = scan_code(code, "src/main.rs", &config()).unwrap();
920 assert!(
921 alerts.is_empty(),
922 "strings/comments should not produce bullshit alerts: {alerts:?}"
923 );
924 }
925
926 #[test]
927 fn policy_suppresses_kind_and_path() {
928 let mut cfg = config();
929 cfg.ignore_kinds.insert("UnwrapAbuse".to_string());
930 let alerts = scan_code("fn main() { thing().unwrap(); }", "src/main.rs", &cfg).unwrap();
931 assert!(alerts.is_empty());
932
933 let mut cfg = config();
934 cfg.ignore_paths.push("generated".to_string());
935 let alerts = scan_code(
936 "fn main() { thing().unwrap(); }",
937 "src/generated/main.rs",
938 &cfg,
939 )
940 .unwrap();
941 assert!(alerts.is_empty());
942 }
943
944 #[test]
945 fn parses_diff_changed_ranges() {
946 let diff = r#"diff --git a/src/main.rs b/src/main.rs
947index 111..222 100644
948--- a/src/main.rs
949+++ b/src/main.rs
950@@ -1,0 +2,3 @@
951+fn main() {
952+ thing().unwrap();
953+}
954"#;
955 let changed = parse_changed_lines(diff);
956 assert_eq!(changed.get(Path::new("src/main.rs")), Some(&vec![(2, 4)]));
957 }
958}