Skip to main content

batuta/bug_hunter/
patterns.rs

1//! Pattern detection utilities for bug-hunter.
2//!
3//! This module contains functions for detecting code patterns and determining
4//! whether they represent real technical debt or false positives.
5//!
6//! # Safety:
7//!
8//! This module contains the string literals "unsafe {" and "transmute" as
9//! pattern matchers for detecting unsafe code in scanned files. These are
10//! string constants used for pattern matching, not actual unsafe code.
11
12use super::types::Finding;
13use std::collections::HashSet;
14
15/// Check if a finding should be suppressed (BH-15).
16/// Wired into analyze_common_patterns per issue #17.
17pub fn should_suppress_finding(finding: &Finding, line_content: &str) -> bool {
18    // Issue #17: Suppress identical-blocks warnings for mapper functions
19    if finding.title.contains("identical blocks") || finding.title.contains("if_same_then_else") {
20        // Check if this looks like a mapper function (returns enum variants)
21        if line_content.contains("=>") || line_content.contains("PartitionSpec::") {
22            return true;
23        }
24        // Check for intentional comment
25        if line_content.contains("INTENTIONAL") || line_content.contains("intentional") {
26            return true;
27        }
28    }
29
30    // Suppress warnings about code that detects patterns (meta-level)
31    if (line_content.contains("PATTERN_MARKERS") || line_content.contains("pattern"))
32        && (finding.title.contains("FIXME") || finding.title.contains("TODO"))
33    {
34        return true;
35    }
36
37    false
38}
39
40/// Determine which lines are inside test code (after #[cfg(test)] or #[test]).
41pub fn compute_test_lines(content: &str) -> HashSet<usize> {
42    let mut test_lines = HashSet::new();
43    let mut in_test_module = false;
44    let mut test_module_start_depth: i32 = 0;
45    let mut brace_depth: i32 = 0;
46    let mut waiting_for_brace = false;
47
48    for (line_num, line) in content.lines().enumerate() {
49        let line_num = line_num + 1;
50        let trimmed = line.trim();
51
52        // Track brace depth changes on this line
53        let open_braces = line.matches('{').count() as i32;
54        let close_braces = line.matches('}').count() as i32;
55
56        // Check for test module entry: #[cfg(test)]
57        if trimmed == "#[cfg(test)]" {
58            waiting_for_brace = true;
59            test_lines.insert(line_num); // The attribute itself is test code
60        }
61
62        // Check for individual test function: #[test]
63        if trimmed == "#[test]" || trimmed.starts_with("#[test]") {
64            waiting_for_brace = true;
65            test_lines.insert(line_num); // The attribute itself is test code
66        }
67
68        // If we're waiting for the opening brace of a test block
69        if waiting_for_brace && open_braces > 0 {
70            in_test_module = true;
71            test_module_start_depth = brace_depth; // Remember depth BEFORE this line's braces
72            waiting_for_brace = false;
73        }
74
75        // Update brace depth
76        brace_depth += open_braces - close_braces;
77
78        // Mark lines inside test modules
79        if in_test_module {
80            test_lines.insert(line_num);
81            // Check if we've exited the test module (brace depth returned to start level)
82            if brace_depth <= test_module_start_depth {
83                in_test_module = false;
84            }
85        }
86    }
87
88    test_lines
89}
90
91/// Check tech debt markers (TODO/FIXME/HACK/XXX) for real vs false positive.
92fn check_tech_debt_real(line: &str, before: &str, trimmed: &str) -> bool {
93    let is_doc_comment = trimmed.starts_with("///") || trimmed.starts_with("//!");
94    if is_doc_comment {
95        return false;
96    }
97    let pattern_count =
98        ["TODO", "FIXME", "HACK", "XXX"].iter().filter(|p| line.contains(*p)).count();
99    if pattern_count >= 2 {
100        return false;
101    }
102    let has_comment = before.contains("//") || before.contains("/*");
103    let quotes_before = before.matches('"').count();
104    let in_string = quotes_before % 2 == 1;
105    let char_before = before.chars().last();
106    let has_space_before = matches!(char_before, Some(' ' | '\t' | '/' | '*') | None);
107    has_comment && !in_string && has_space_before
108}
109
110/// Check comment-based patterns (test debt, GPU errors) for real vs false positive.
111fn check_comment_pattern_real(line: &str, before: &str, trimmed: &str) -> bool {
112    let is_comment = trimmed.starts_with("//");
113    let quotes_before = before.matches('"').count();
114    let in_string = quotes_before % 2 == 1;
115    let is_doc_comment = trimmed.starts_with("///") || trimmed.starts_with("//!");
116    if is_doc_comment {
117        return false;
118    }
119    let line_lower = line.to_lowercase();
120    if line_lower.contains("debug:")
121        || line_lower.contains("for debugging")
122        || line_lower.contains("diagnostic")
123    {
124        return false;
125    }
126    if line_lower.contains("returns cuda_error")
127        || line_lower.contains("fix:")
128        || line_lower.contains("via ")
129        || line_lower.contains("sentinel")
130        || line_lower.contains("recreates")
131    {
132        return false;
133    }
134    is_comment && !in_string
135}
136
137/// Check "unimplemented" pattern for intentional design choices vs real debt.
138fn check_unimplemented_exclusions(line: &str, trimmed: &str) -> bool {
139    let line_lower = line.to_lowercase();
140    if line_lower.contains("does not support")
141        || line_lower.contains("not supported")
142        || line_lower.contains("use minimize")
143        || line_lower.contains("by design")
144    {
145        return true;
146    }
147    let trimmed_lower = trimmed.to_lowercase();
148    if trimmed_lower == "unimplemented!("
149        || (trimmed_lower.starts_with("unimplemented!(") && !trimmed_lower.contains(')'))
150    {
151        return true;
152    }
153    if line_lower.contains("_unimplemented")
154        || line_lower.contains("should_panic")
155        || line_lower.contains("// test unimplemented")
156    {
157        return true;
158    }
159    false
160}
161
162/// Check if "not implemented" appears in a test-assertion context.
163fn is_not_implemented_test_context(line_lower: &str) -> bool {
164    line_lower.contains("assert")
165        || line_lower.contains("expect")
166        || line_lower.contains("returns error")
167        || line_lower.contains("should fail")
168        || line_lower.contains("should panic")
169        || line_lower.contains("test_")
170        || line_lower.contains("_test")
171        || line_lower.contains("is_err")
172}
173
174/// Check if "not implemented" is inside a format string or string literal.
175fn is_not_implemented_in_string(line: &str, trimmed: &str) -> bool {
176    let trimmed_end = trimmed.trim_end();
177    trimmed_end.ends_with("\",")
178        || trimmed_end.ends_with('"')
179        || line.contains("{}")
180        || line.contains("{:")
181}
182
183/// Check if a "not implemented" comment is benign (short or describes failures).
184fn is_not_implemented_benign_comment(line_lower: &str, trimmed: &str) -> bool {
185    if !trimmed.starts_with("//") {
186        return false;
187    }
188    line_lower.contains("fails")
189        || line_lower.contains("error")
190        || line_lower.contains("but not implemented")
191        || trimmed.len() < 50
192}
193
194/// Check "not implemented" pattern for test context vs real debt.
195fn check_not_implemented_exclusions(line: &str, trimmed: &str) -> bool {
196    let line_lower = line.to_lowercase();
197    is_not_implemented_test_context(&line_lower)
198        || is_not_implemented_in_string(line, trimmed)
199        || is_not_implemented_benign_comment(&line_lower, trimmed)
200}
201
202/// Check if a single-word euphemism is mid-identifier (false positive).
203fn is_mid_identifier_euphemism(pattern: &str, before: &str) -> bool {
204    const SINGLE_WORD_EUPHEMISMS: [&str; 7] =
205        ["placeholder", "stub", "dummy", "fake", "mock", "temporary", "hardcoded"];
206    if !SINGLE_WORD_EUPHEMISMS.contains(&pattern) {
207        return false;
208    }
209    before.chars().last().is_some_and(|c| c == '_' || c.is_alphanumeric())
210}
211
212/// Check if "hardcoded"/"hard-coded" is used descriptively (not as debt).
213fn is_hardcoded_descriptive(line: &str, pattern: &str, trimmed: &str) -> bool {
214    if pattern != "hardcoded" && pattern != "hard-coded" {
215        return false;
216    }
217    let line_lower = line.to_lowercase();
218    line_lower.contains("from the hardcoded")
219        || line_lower.contains("uses hardcoded")
220        || line_lower.contains("using hardcoded")
221        || (trimmed.starts_with("//") && line_lower.contains("should"))
222}
223
224/// Check euphemism patterns (placeholder, stub, dummy, etc.) for real vs false positive.
225fn check_euphemism_real(line: &str, pattern: &str, before: &str, trimmed: &str) -> bool {
226    let is_doc_comment = trimmed.starts_with("///") || trimmed.starts_with("//!");
227    if is_doc_comment {
228        return false;
229    }
230    if before.matches('"').count() % 2 == 1 {
231        return false;
232    }
233    if pattern == "unimplemented" && check_unimplemented_exclusions(line, trimmed) {
234        return false;
235    }
236    if pattern == "not implemented" && check_not_implemented_exclusions(line, trimmed) {
237        return false;
238    }
239    if is_mid_identifier_euphemism(pattern, before) {
240        return false;
241    }
242    if is_hardcoded_descriptive(line, pattern, trimmed) {
243        return false;
244    }
245    true
246}
247
248/// Check code patterns (unwrap, unsafe, etc.) for real vs false positive.
249fn check_code_pattern_real(before: &str, pattern: &str, trimmed: &str) -> bool {
250    let quotes_before = before.matches('"').count();
251    let in_string = quotes_before % 2 == 1;
252    let is_doc_comment = trimmed.starts_with("///") || trimmed.starts_with("//!");
253    let is_comment = trimmed.starts_with("//");
254    // SAFETY: no actual unsafe code -- string literals for pattern matching against scanned code
255    let keyword_patterns = ["unsafe {", "transmute", "panic!"];
256    if keyword_patterns
257        .iter()
258        .any(|kw| pattern.starts_with(kw.split_whitespace().next().unwrap_or(kw)))
259    {
260        if let Some(c) = before.chars().last() {
261            if c.is_alphanumeric() || c == '_' {
262                return false;
263            }
264        }
265    }
266    !in_string && !is_doc_comment && !is_comment
267}
268
269/// Check if pattern appears in a "real" code context, not inside a string literal.
270pub fn is_real_pattern(line: &str, pattern: &str) -> bool {
271    let Some(pos) = line.find(pattern) else {
272        return false;
273    };
274    let trimmed = line.trim();
275    let before = &line[..pos];
276
277    if matches!(pattern, "TODO" | "FIXME" | "HACK" | "XXX") {
278        return check_tech_debt_real(line, before, trimmed);
279    }
280
281    let is_comment_pattern = matches!(
282        pattern,
283        "were removed"
284            | "tests hang"
285            | "hang during"
286            | "compilation hang"
287            | "// skip"
288            | "// skipped"
289            | "// broken"
290            | "// fails"
291            | "// disabled"
292            | "// fallback"
293            | "// degraded"
294            | "CUDA_ERROR"
295            | "INVALID_PTX"
296            | "PTX error"
297            | "kernel fail"
298    );
299    if is_comment_pattern {
300        return check_comment_pattern_real(line, before, trimmed);
301    }
302
303    let is_euphemism_pattern = matches!(
304        pattern,
305        "placeholder"
306            | "stub"
307            | "dummy"
308            | "fake"
309            | "mock"
310            | "simplified"
311            | "for demonstration"
312            | "demo only"
313            | "not implemented"
314            | "unimplemented"
315            | "temporary"
316            | "hardcoded"
317            | "hard-coded"
318            | "magic number"
319            | "workaround"
320            | "quick fix"
321            | "quick-fix"
322            | "bandaid"
323            | "band-aid"
324            | "kludge"
325            | "tech debt"
326            | "technical debt"
327    );
328    if is_euphemism_pattern {
329        return check_euphemism_real(line, pattern, before, trimmed);
330    }
331
332    check_code_pattern_real(before, pattern, trimmed)
333}
334
335#[cfg(test)]
336mod tests {
337    use super::*;
338
339    #[test]
340    fn test_is_real_pattern_todo_in_comment() {
341        assert!(is_real_pattern("// TODO: fix this", "TODO"));
342    }
343
344    #[test]
345    fn test_is_real_pattern_todo_in_string() {
346        assert!(!is_real_pattern(r#"let msg = "TODO: implement";"#, "TODO"));
347    }
348
349    #[test]
350    fn test_is_real_pattern_todo_in_doc_comment() {
351        assert!(!is_real_pattern("/// TODO: document this", "TODO"));
352    }
353
354    #[test]
355    fn test_is_real_pattern_multiple_patterns() {
356        // Line mentions multiple SATD patterns - probably explaining them
357        assert!(!is_real_pattern("// For TODO/FIXME/HACK/XXX patterns", "TODO"));
358    }
359
360    #[test]
361    fn test_compute_test_lines_basic() {
362        let content = "fn normal() {}\n\n#[cfg(test)]\nmod tests {\n    fn test_foo() {}\n}\n";
363        let test_lines = compute_test_lines(content);
364        // Line 3 is #[cfg(test)], lines 4-6 are inside test module
365        assert!(test_lines.contains(&3)); // #[cfg(test)]
366        assert!(test_lines.contains(&4)); // mod tests {
367        assert!(test_lines.contains(&5)); // fn test_foo() {}
368        assert!(test_lines.contains(&6)); // }
369                                          // Line 1 is normal function, not in test
370        assert!(!test_lines.contains(&1));
371    }
372
373    // =========================================================================
374    // is_real_pattern: comment_pattern branch (lines 138-189)
375    // =========================================================================
376
377    #[test]
378    fn test_is_real_pattern_comment_pattern_in_comment() {
379        // "were removed" in a regular comment → real
380        assert!(is_real_pattern("// tests were removed from suite", "were removed"));
381        assert!(is_real_pattern("// tests hang during CI", "tests hang"));
382    }
383
384    #[test]
385    fn test_is_real_pattern_comment_pattern_in_doc_comment() {
386        // "were removed" in a doc comment → excluded
387        assert!(!is_real_pattern("/// tests were removed from suite", "were removed"));
388        assert!(!is_real_pattern("//! tests hang during CI", "tests hang"));
389    }
390
391    #[test]
392    fn test_is_real_pattern_comment_pattern_in_code() {
393        // "were removed" in actual code (not a comment) → excluded
394        assert!(!is_real_pattern("let msg = were_removed();", "were removed"));
395    }
396
397    #[test]
398    fn test_is_real_pattern_comment_pattern_in_string() {
399        // Inside a string literal → excluded
400        assert!(!is_real_pattern(r#"let msg = "tests hang";"#, "tests hang"));
401    }
402
403    #[test]
404    fn test_is_real_pattern_comment_pattern_debug_excluded() {
405        // Debug/diagnostic comments → excluded
406        assert!(!is_real_pattern("// Debug: hang during test", "hang during"));
407        assert!(!is_real_pattern("// for debugging: compilation hang", "compilation hang"));
408        assert!(!is_real_pattern("// diagnostic: kernel fail info", "kernel fail"));
409    }
410
411    #[test]
412    fn test_is_real_pattern_comment_pattern_arch_excluded() {
413        // Architectural documentation → excluded
414        assert!(!is_real_pattern("// returns CUDA_ERROR_UNKNOWN in this case", "CUDA_ERROR"));
415        assert!(!is_real_pattern("// Fix: INVALID_PTX via recompilation", "INVALID_PTX"));
416        assert!(!is_real_pattern("// sentinel: PTX error code", "PTX error"));
417    }
418
419    #[test]
420    fn test_is_real_pattern_gpu_patterns() {
421        // These are in the is_comment_pattern allowlist — match in comments
422        assert!(is_real_pattern("// CUDA_ERROR observed in production", "CUDA_ERROR"));
423        assert!(is_real_pattern("// INVALID_PTX found in kernel", "INVALID_PTX"));
424        assert!(is_real_pattern("// kernel fail during batch", "kernel fail"));
425        // "cuBLAS fallback" is NOT in comment_pattern list → excluded in comments
426        assert!(!is_real_pattern("// cuBLAS fallback triggered", "cuBLAS fallback"));
427    }
428
429    // =========================================================================
430    // is_real_pattern: euphemism_pattern branch (lines 191-341)
431    // =========================================================================
432
433    #[test]
434    fn test_is_real_pattern_euphemism_in_code() {
435        // "placeholder" in code → real
436        assert!(is_real_pattern("let placeholder = vec![0.0; 10];", "placeholder"));
437        assert!(is_real_pattern("fn stub_impl() { }", "stub"));
438    }
439
440    #[test]
441    fn test_is_real_pattern_euphemism_in_doc_comment() {
442        // Euphemism in doc comment → excluded
443        assert!(!is_real_pattern("/// This is a placeholder for later", "placeholder"));
444        assert!(!is_real_pattern("//! stub implementation", "stub"));
445    }
446
447    #[test]
448    fn test_is_real_pattern_euphemism_in_string() {
449        // Euphemism in string literal → excluded
450        assert!(!is_real_pattern(r#"let msg = "placeholder value";"#, "placeholder"));
451    }
452
453    #[test]
454    fn test_is_real_pattern_euphemism_mid_identifier() {
455        // Euphemism as part of a larger identifier (preceded by _ or alphanumeric) → excluded
456        assert!(!is_real_pattern("let foo_placeholder = 1;", "placeholder"));
457        assert!(!is_real_pattern("fn my_stub() {}", "stub"));
458    }
459
460    #[test]
461    fn test_is_real_pattern_unimplemented_with_explanation() {
462        // unimplemented!() with design explanation → excluded
463        assert!(!is_real_pattern(
464            r#"unimplemented!("does not support stochastic updates")"#,
465            "unimplemented"
466        ));
467        assert!(!is_real_pattern(r#"unimplemented!("not supported by design")"#, "unimplemented"));
468    }
469
470    #[test]
471    fn test_is_real_pattern_unimplemented_bare() {
472        // Bare unimplemented!( without closing paren → excluded (msg on next line)
473        assert!(!is_real_pattern("        unimplemented!(", "unimplemented"));
474    }
475
476    #[test]
477    fn test_is_real_pattern_unimplemented_in_test() {
478        // unimplemented in test context → excluded
479        assert!(!is_real_pattern("fn test_foo_unimplemented() {", "unimplemented"));
480        assert!(!is_real_pattern("#[should_panic] fn unimplemented_test() {}", "unimplemented"));
481    }
482
483    #[test]
484    fn test_is_real_pattern_not_implemented_in_test_assertion() {
485        // "not implemented" in test assertion context → excluded
486        assert!(!is_real_pattern(
487            r#"assert!(result.is_err()); // not implemented"#,
488            "not implemented"
489        ));
490        assert!(!is_real_pattern("assert_eq!(err, \"not implemented\");", "not implemented"));
491    }
492
493    #[test]
494    fn test_is_real_pattern_not_implemented_format_string() {
495        // "not implemented" in format string → excluded
496        assert!(!is_real_pattern(r#"format!("{} not implemented", name)"#, "not implemented"));
497    }
498
499    #[test]
500    fn test_is_real_pattern_not_implemented_comment_short() {
501        // Short comment about "not implemented" → excluded (len < 50)
502        assert!(!is_real_pattern("// not implemented yet", "not implemented"));
503        // Describing failure → excluded
504        assert!(!is_real_pattern("// Still fails because not implemented", "not implemented"));
505    }
506
507    #[test]
508    fn test_is_real_pattern_hardcoded_exclusions() {
509        // hardcoded in test explanation → excluded
510        assert!(!is_real_pattern("// from the hardcoded test data", "hardcoded"));
511        assert!(!is_real_pattern("// uses hardcoded values for testing", "hardcoded"));
512    }
513
514    #[test]
515    fn test_is_real_pattern_tech_debt_markers() {
516        assert!(is_real_pattern("let x = 1; // tech debt from v1", "tech debt"));
517        assert!(is_real_pattern("// This is a kludge that needs fixing", "kludge"));
518        assert!(is_real_pattern("let workaround = compute();", "workaround"));
519    }
520
521    // =========================================================================
522    // is_real_pattern: code_pattern branch (lines 343-370)
523    // =========================================================================
524
525    #[test]
526    fn test_is_real_pattern_code_pattern_in_doc_comment() {
527        // Code patterns in doc comments → excluded
528        assert!(!is_real_pattern("/// Use unwrap() only in tests", "unwrap()"));
529        // SAFETY: no actual unsafe code -- testing pattern detection for doc comment exclusion
530        assert!(!is_real_pattern("//! unsafe blocks require safety docs", "unsafe {"));
531    }
532
533    #[test]
534    fn test_is_real_pattern_code_pattern_in_regular_comment() {
535        // Code patterns in regular comments → excluded
536        assert!(!is_real_pattern("// be careful with unwrap()", "unwrap()"));
537        assert!(!is_real_pattern("// avoid panic! in production", "panic!"));
538    }
539
540    #[test]
541    fn test_is_real_pattern_keyword_in_identifier() {
542        // SAFETY: no actual unsafe code -- testing identifier-embedded keyword exclusion
543        assert!(!is_real_pattern("if in_unsafe {", "unsafe {"));
544        assert!(!is_real_pattern("let foo_unsafe = true;", "unsafe {"));
545    }
546
547    #[test]
548    fn test_is_real_pattern_code_pattern_real() {
549        // SAFETY: no actual unsafe code -- testing that real unsafe patterns ARE detected
550        assert!(is_real_pattern("    unsafe { ptr::read(p) }", "unsafe {"));
551        assert!(is_real_pattern("let x = opt.unwrap();", "unwrap()"));
552        assert!(is_real_pattern("    transmute::<u32, f32>(bits)", "transmute"));
553    }
554
555    #[test]
556    fn test_is_real_pattern_pattern_not_found() {
557        // Pattern not in line at all
558        assert!(!is_real_pattern("fn main() {}", "TODO"));
559    }
560
561    // =========================================================================
562    // Existing tests below
563    // =========================================================================
564
565    #[test]
566    fn test_should_suppress_identical_blocks_mapper() {
567        let finding = Finding::new("BH-001", std::path::PathBuf::new(), 1, "identical blocks");
568        assert!(should_suppress_finding(&finding, "Foo => Bar"));
569    }
570
571    #[test]
572    fn test_should_suppress_intentional() {
573        let finding = Finding::new("BH-001", std::path::PathBuf::new(), 1, "identical blocks");
574        assert!(should_suppress_finding(&finding, "// INTENTIONAL duplicate"));
575    }
576}