Skip to main content

batuta/bug_hunter/
modes_analyze.rs

1//! BH-03: LLM-augmented static analysis (LLIFT pattern).
2
3use super::config;
4use super::defect_patterns;
5use super::pmat_quality;
6use super::types::*;
7use std::path::Path;
8
9/// BH-03: LLM-augmented static analysis (LLIFT pattern)
10pub(super) fn run_analyze_mode(
11    project_path: &Path,
12    config_val: &HuntConfig,
13    result: &mut HuntResult,
14) {
15    let clippy_output = std::process::Command::new("cargo")
16        .args(["clippy", "--all-targets", "--message-format=json"])
17        .current_dir(project_path)
18        .output();
19
20    let clippy_json = match clippy_output {
21        Ok(output) => String::from_utf8_lossy(&output.stdout).to_string(),
22        Err(_) => {
23            result.add_finding(
24                Finding::new(
25                    "BH-ANALYZE-NOCLIPPY",
26                    project_path.join("Cargo.toml"),
27                    1,
28                    "Clippy not available",
29                )
30                .with_severity(FindingSeverity::Info)
31                .with_discovered_by(HuntMode::Analyze),
32            );
33            return;
34        }
35    };
36
37    let mut finding_id = 0;
38    for line in clippy_json.lines() {
39        if let Ok(msg) = serde_json::from_str::<serde_json::Value>(line) {
40            if let Some(finding) = extract_clippy_finding(&msg, config_val, &mut finding_id) {
41                result.add_finding(finding);
42            }
43        }
44    }
45
46    analyze_common_patterns(project_path, config_val, result);
47}
48
49/// Extract a finding from a single clippy JSON message, if applicable.
50pub(super) fn extract_clippy_finding(
51    msg: &serde_json::Value,
52    config_val: &HuntConfig,
53    finding_id: &mut usize,
54) -> Option<Finding> {
55    if msg.get("reason").and_then(|r| r.as_str()) != Some("compiler-message") {
56        return None;
57    }
58    let message = msg.get("message")?;
59    let level = message.get("level").and_then(|l| l.as_str()).unwrap_or("");
60    if level != "warning" && level != "error" {
61        return None;
62    }
63    let spans = message.get("spans").and_then(|s| s.as_array())?;
64    let span = spans.first()?;
65    let file = span.get("file_name").and_then(|f| f.as_str()).unwrap_or("unknown");
66    let line_start = span.get("line_start").and_then(|l| l.as_u64()).unwrap_or(1) as usize;
67    let msg_text = message.get("message").and_then(|m| m.as_str()).unwrap_or("Unknown warning");
68    let code = message
69        .get("code")
70        .and_then(|c| c.get("code"))
71        .and_then(|c| c.as_str())
72        .unwrap_or("unknown");
73
74    if code == "dead_code" || code == "unused_imports" {
75        return None;
76    }
77
78    let (category, severity) = categorize_clippy_warning(code, msg_text);
79    let suspiciousness = match severity {
80        FindingSeverity::Critical => 0.95,
81        FindingSeverity::High => 0.8,
82        FindingSeverity::Medium => 0.6,
83        FindingSeverity::Low => 0.4,
84        FindingSeverity::Info => 0.2,
85    };
86
87    if suspiciousness < config_val.min_suspiciousness {
88        return None;
89    }
90
91    *finding_id += 1;
92    Some(
93        Finding::new(format!("BH-CLIP-{:04}", finding_id), file, line_start, msg_text)
94            .with_severity(severity)
95            .with_category(category)
96            .with_suspiciousness(suspiciousness)
97            .with_discovered_by(HuntMode::Analyze)
98            .with_evidence(FindingEvidence::static_analysis("clippy", code)),
99    )
100}
101
102/// Categorize clippy warning by code.
103pub(super) fn categorize_clippy_warning(
104    code: &str,
105    _message: &str,
106) -> (DefectCategory, FindingSeverity) {
107    match code {
108        // Memory safety
109        c if c.contains("ptr") || c.contains("mem") || c.contains("uninit") => {
110            (DefectCategory::MemorySafety, FindingSeverity::High)
111        }
112        // Concurrency
113        c if c.contains("mutex")
114            || c.contains("arc")
115            || c.contains("send")
116            || c.contains("sync") =>
117        {
118            (DefectCategory::ConcurrencyBugs, FindingSeverity::High)
119        }
120        // Security
121        c if c.contains("unsafe") || c.contains("transmute") => {
122            (DefectCategory::SecurityVulnerabilities, FindingSeverity::High)
123        }
124        // Logic errors
125        c if c.contains("unwrap") || c.contains("expect") || c.contains("panic") => {
126            (DefectCategory::LogicErrors, FindingSeverity::Medium)
127        }
128        // Type errors
129        c if c.contains("cast") || c.contains("as_") || c.contains("into") => {
130            (DefectCategory::TypeErrors, FindingSeverity::Medium)
131        }
132        // Default
133        _ => (DefectCategory::Unknown, FindingSeverity::Low),
134    }
135}
136
137/// Parse defect category from string (for custom config patterns).
138pub(super) fn parse_defect_category(s: &str) -> DefectCategory {
139    match s.to_lowercase().as_str() {
140        "logicerrors" | "logic" => DefectCategory::LogicErrors,
141        "memorysafety" | "memory" => DefectCategory::MemorySafety,
142        "concurrency" | "concurrencybugs" => DefectCategory::ConcurrencyBugs,
143        "gpukernelbugs" | "gpu" => DefectCategory::GpuKernelBugs,
144        "silentdegradation" | "silent" => DefectCategory::SilentDegradation,
145        "testdebt" | "test" => DefectCategory::TestDebt,
146        "hiddendebt" | "debt" => DefectCategory::HiddenDebt,
147        "performanceissues" | "performance" => DefectCategory::PerformanceIssues,
148        "securityvulnerabilities" | "security" => DefectCategory::SecurityVulnerabilities,
149        "contractgap" | "contract" => DefectCategory::ContractGap,
150        "modelparitygap" | "modelparity" | "parity" => DefectCategory::ModelParityGap,
151        _ => DefectCategory::LogicErrors,
152    }
153}
154
155/// Parse finding severity from string (for custom config patterns).
156pub(super) fn parse_finding_severity(s: &str) -> FindingSeverity {
157    match s.to_lowercase().as_str() {
158        "critical" => FindingSeverity::Critical,
159        "high" => FindingSeverity::High,
160        "medium" => FindingSeverity::Medium,
161        "low" => FindingSeverity::Low,
162        "info" => FindingSeverity::Info,
163        _ => FindingSeverity::Medium,
164    }
165}
166
167/// Context for pattern matching on a single source line.
168pub(super) struct PatternMatchContext<'a> {
169    pub(super) line: &'a str,
170    pub(super) line_num: usize,
171    pub(super) entry: &'a Path,
172    pub(super) in_test_code: bool,
173    pub(super) is_bug_hunter_file: bool,
174    pub(super) bh_config: &'a config::BugHunterConfig,
175    pub(super) min_susp: f64,
176}
177
178/// Check a single line against a language pattern, returning a finding if matched.
179pub(super) fn match_lang_pattern(
180    ctx: &PatternMatchContext<'_>,
181    pattern: &str,
182    category: DefectCategory,
183    severity: FindingSeverity,
184    suspiciousness: f64,
185) -> Option<Finding> {
186    use super::patterns::{is_real_pattern, should_suppress_finding};
187
188    if ctx.in_test_code
189        && category != DefectCategory::TestDebt
190        && category != DefectCategory::GpuKernelBugs
191        && category != DefectCategory::HiddenDebt
192    {
193        return None;
194    }
195    if ctx.is_bug_hunter_file && category == DefectCategory::HiddenDebt {
196        return None;
197    }
198    if ctx.bh_config.is_allowed(ctx.entry, pattern, ctx.line_num) {
199        return None;
200    }
201    if !ctx.line.contains(pattern)
202        || !is_real_pattern(ctx.line, pattern)
203        || suspiciousness < ctx.min_susp
204    {
205        return None;
206    }
207    let finding =
208        Finding::new(String::new(), ctx.entry, ctx.line_num, format!("Pattern: {}", pattern))
209            .with_description(ctx.line.trim().to_string())
210            .with_severity(severity)
211            .with_category(category)
212            .with_suspiciousness(suspiciousness)
213            .with_discovered_by(HuntMode::Analyze)
214            .with_evidence(FindingEvidence::static_analysis("pattern", pattern));
215    if should_suppress_finding(&finding, ctx.line) {
216        None
217    } else {
218        Some(finding)
219    }
220}
221
222/// Check a single line against a custom pattern, returning a finding if matched.
223pub(super) fn match_custom_pattern(
224    ctx: &PatternMatchContext<'_>,
225    pattern: &str,
226    category: DefectCategory,
227    severity: FindingSeverity,
228    suspiciousness: f64,
229) -> Option<Finding> {
230    use super::patterns::should_suppress_finding;
231
232    if suspiciousness < ctx.min_susp || ctx.bh_config.is_allowed(ctx.entry, pattern, ctx.line_num) {
233        return None;
234    }
235    if !ctx.line.contains(pattern) {
236        return None;
237    }
238    let finding =
239        Finding::new(String::new(), ctx.entry, ctx.line_num, format!("Custom: {}", pattern))
240            .with_description(ctx.line.trim().to_string())
241            .with_severity(severity)
242            .with_category(category)
243            .with_suspiciousness(suspiciousness)
244            .with_discovered_by(HuntMode::Analyze)
245            .with_evidence(FindingEvidence::static_analysis("custom_pattern", pattern));
246    if should_suppress_finding(&finding, ctx.line) {
247        None
248    } else {
249        Some(finding)
250    }
251}
252
253/// Scan a single file for pattern matches.
254pub(super) fn scan_file_for_patterns(
255    entry: &std::path::Path,
256    patterns: &[(&str, DefectCategory, FindingSeverity, f64)],
257    custom_patterns: &[(String, DefectCategory, FindingSeverity, f64)],
258    bh_config: &config::BugHunterConfig,
259    min_susp: f64,
260    findings: &mut Vec<Finding>,
261) {
262    use super::languages;
263    use super::patterns::compute_test_lines;
264
265    let Ok(content) = std::fs::read_to_string(entry) else {
266        return;
267    };
268    let test_lines = compute_test_lines(&content);
269    let lang =
270        entry.extension().and_then(|e| e.to_str()).and_then(languages::Language::from_extension);
271    let lang_patterns = lang
272        .map(languages::patterns_for_language)
273        .unwrap_or_else(|| patterns.iter().map(|&(p, c, s, su)| (p, c, s, su)).collect());
274    let is_bug_hunter_file = entry.to_str().map(|p| p.contains("bug_hunter")).unwrap_or(false);
275
276    for (line_num, line) in content.lines().enumerate() {
277        let line_num = line_num + 1;
278        let ctx = PatternMatchContext {
279            line,
280            line_num,
281            entry,
282            in_test_code: test_lines.contains(&line_num),
283            is_bug_hunter_file,
284            bh_config,
285            min_susp,
286        };
287
288        for &(pattern, category, severity, suspiciousness) in &lang_patterns {
289            if let Some(f) = match_lang_pattern(&ctx, pattern, category, severity, suspiciousness) {
290                findings.push(f);
291            }
292        }
293
294        for (pattern, category, severity, suspiciousness) in custom_patterns {
295            if let Some(f) =
296                match_custom_pattern(&ctx, pattern.as_str(), *category, *severity, *suspiciousness)
297            {
298                findings.push(f);
299            }
300        }
301    }
302}
303
304/// BH-23 helper: Generate SATD findings from PMAT quality index.
305fn run_pmat_satd_phase(
306    pmat_satd_active: bool,
307    project_path: &Path,
308    config_val: &HuntConfig,
309    result: &mut HuntResult,
310) {
311    if !pmat_satd_active {
312        return;
313    }
314    let query = config_val.pmat_query.as_deref().unwrap_or("*");
315    if let Some(index) = pmat_quality::build_quality_index(project_path, query, 200) {
316        let satd_findings = pmat_quality::generate_satd_findings(project_path, &index);
317        for f in satd_findings {
318            result.add_finding(f);
319        }
320    }
321}
322
323/// Analyze common bug patterns across source files.
324pub(super) fn analyze_common_patterns(
325    project_path: &Path,
326    config_val: &HuntConfig,
327    result: &mut HuntResult,
328) {
329    use super::blame;
330    use super::languages;
331
332    // Load bug-hunter config for allowlist and custom patterns
333    let bh_config = config::BugHunterConfig::load(project_path);
334
335    // BH-23: If PMAT SATD is enabled and pmat is available, generate SATD findings
336    // and skip the manual TODO/FIXME/HACK/XXX pattern matching
337    let pmat_satd_active = config_val.pmat_satd && pmat_quality::pmat_available();
338    run_pmat_satd_phase(pmat_satd_active, project_path, config_val, result);
339
340    let mut patterns = defect_patterns::base_defect_patterns(pmat_satd_active);
341    patterns.extend(defect_patterns::gpu_and_crosscutting_patterns());
342
343    // Convert custom patterns from config to owned patterns
344    let custom_patterns: Vec<(String, DefectCategory, FindingSeverity, f64)> = bh_config
345        .patterns
346        .iter()
347        .map(|p| {
348            let category = parse_defect_category(&p.category);
349            let severity = parse_finding_severity(&p.severity);
350            (p.pattern.clone(), category, severity, p.suspiciousness)
351        })
352        .collect();
353
354    // Collect all file paths first (multi-language support)
355    let mut all_files: Vec<std::path::PathBuf> = Vec::new();
356    for target in &config_val.targets {
357        let target_path = project_path.join(target);
358        // Scan all supported languages
359        for glob_pattern in languages::all_language_globs() {
360            if let Ok(entries) = glob::glob(&format!("{}/{}", target_path.display(), glob_pattern))
361            {
362                all_files.extend(entries.flatten());
363            }
364        }
365    }
366
367    // Parallel file scanning via std::thread::scope
368    let min_susp = config_val.min_suspiciousness;
369    let chunk_size = (all_files.len() / 4).max(1);
370    let chunks: Vec<&[std::path::PathBuf]> = all_files.chunks(chunk_size).collect();
371
372    let all_chunk_findings: Vec<Vec<Finding>> = std::thread::scope(|s| {
373        let handles: Vec<_> = chunks
374            .iter()
375            .map(|chunk| {
376                let patterns = &patterns;
377                let custom_patterns = &custom_patterns;
378                let bh_config = &bh_config;
379                s.spawn(move || {
380                    let mut chunk_findings = Vec::new();
381                    for entry in *chunk {
382                        scan_file_for_patterns(
383                            entry,
384                            patterns,
385                            custom_patterns,
386                            bh_config,
387                            min_susp,
388                            &mut chunk_findings,
389                        );
390                    }
391                    chunk_findings
392                })
393            })
394            .collect();
395
396        handles.into_iter().filter_map(|h| h.join().ok()).collect()
397    });
398
399    // Merge all findings and assign globally unique IDs
400    // Also fetch git blame info for each finding
401    let mut blame_cache = blame::BlameCache::new();
402    let mut finding_id = 0u32;
403    for chunk_findings in all_chunk_findings {
404        for mut finding in chunk_findings {
405            finding_id += 1;
406            finding.id = format!("BH-PAT-{:04}", finding_id);
407
408            // Fetch git blame info
409            if let Some(blame_info) =
410                blame_cache.get_blame(project_path, &finding.file, finding.line)
411            {
412                finding.blame_author = Some(blame_info.author);
413                finding.blame_commit = Some(blame_info.commit);
414                finding.blame_date = Some(blame_info.date);
415            }
416
417            result.add_finding(finding);
418        }
419    }
420}