batuta/bug_hunter/
mod.rs

1//! Proactive Bug Hunting Module
2//!
3//! Implements Section 11 of the Popperian Falsification Checklist (BH-01 to BH-15).
4//! Provides falsification-driven defect discovery using multiple hunting modes.
5//!
6//! # Philosophy
7//!
8//! "A theory that explains everything, explains nothing." — Karl Popper
9//!
10//! Bug hunting operationalizes falsification: we systematically attempt to break
11//! code, not merely verify it works. Each mode represents a different strategy
12//! for falsifying the implicit claim "this code is correct."
13//!
14//! # Modes
15//!
16//! - **Falsify**: Mutation-based invariant falsification (FDV pattern)
17//! - **Hunt**: SBFL without failing tests (SBEST pattern)
18//! - **Analyze**: LLM-augmented static analysis (LLIFT pattern)
19//! - **Fuzz**: Targeted unsafe Rust fuzzing (FourFuzz pattern)
20//! - **DeepHunt**: Hybrid concolic + SBFL (COTTONTAIL pattern)
21//!
22//! # Advanced Features (BH-11 to BH-15)
23//!
24//! - **Spec-Driven**: Hunt bugs guided by specification files
25//! - **Ticket-Scoped**: Focus on areas defined by PMAT work tickets
26//! - **Scoped Analysis**: --lib, --bin, --path for targeted hunting
27//! - **Bidirectional Linking**: Update specs with findings
28//! - **False Positive Suppression**: Filter known false positive patterns
29//!
30//! # Integration with OIP
31//!
32//! These modes leverage OIP's SBFL (Tarantula/Ochiai/DStar), defect classification,
33//! and RAG enhancement to proactively identify bugs before they reach production.
34
35pub mod blame;
36pub mod cache;
37pub mod config;
38#[cfg(feature = "native")]
39pub mod contracts;
40pub mod coverage;
41mod defect_patterns;
42pub mod diff;
43pub mod languages;
44pub mod localization;
45#[cfg(feature = "native")]
46pub mod model_parity;
47mod modes_analyze;
48mod modes_falsify;
49mod modes_fuzz;
50mod modes_hunt;
51pub mod patterns;
52pub mod pmat_quality;
53pub mod spec;
54pub mod ticket;
55mod types;
56
57#[allow(unused_imports)]
58pub use localization::{CrashBucketer, MultiChannelLocalizer, ScoredLocation};
59pub use patterns::{compute_test_lines, is_real_pattern, should_suppress_finding};
60pub use spec::ParsedSpec;
61pub use ticket::PmatTicket;
62pub use types::*;
63
64use std::path::Path;
65use std::time::Instant;
66
67// Re-exports for test access (functions moved to submodules in QA-002 split)
68#[cfg(test)]
69use modes_analyze::{
70    analyze_common_patterns, categorize_clippy_warning, extract_clippy_finding,
71    match_custom_pattern, match_lang_pattern, parse_defect_category, parse_finding_severity,
72    scan_file_for_patterns, PatternMatchContext,
73};
74#[cfg(test)]
75use modes_falsify::{analyze_file_for_mutations, detect_mutation_targets, run_falsify_mode};
76#[cfg(test)]
77use modes_fuzz::{
78    crate_forbids_unsafe, run_deep_hunt_mode, run_fuzz_mode, scan_file_for_deep_conditionals,
79    scan_file_for_unsafe_blocks, source_forbids_unsafe,
80};
81#[cfg(test)]
82use modes_hunt::{
83    analyze_coverage_hotspots, analyze_stack_trace, parse_lcov_da_line, parse_lcov_for_hotspots,
84    report_uncovered_hotspots, run_hunt_mode,
85};
86
87/// Print a phase progress indicator to stderr (native builds only).
88#[cfg(feature = "native")]
89fn eprint_phase(phase: &str, mode: &HuntMode) {
90    use crate::ansi_colors::Colorize;
91    eprintln!("  {} {}", format!("[{:>8}]", mode).dimmed(), phase);
92}
93
94/// Run bug hunting with the specified configuration.
95pub fn hunt(project_path: &Path, config: HuntConfig) -> HuntResult {
96    let start = Instant::now();
97
98    // Check cache first
99    if let Some(cached) = cache::load_cached(project_path, &config) {
100        #[cfg(feature = "native")]
101        {
102            use crate::ansi_colors::Colorize;
103            eprintln!("  {} hit — using cached findings", "[  cache]".dimmed());
104        }
105        let mut result = HuntResult::new(project_path, cached.mode, config);
106        result.findings = cached.findings;
107        result.duration_ms = start.elapsed().as_millis() as u64;
108        result.finalize();
109        return result;
110    }
111
112    let mut result = HuntResult::new(project_path, config.mode, config.clone());
113
114    // Phase 1: Mode dispatch (scanning)
115    #[cfg(feature = "native")]
116    eprint_phase("Scanning...", &config.mode);
117    let phase_start = Instant::now();
118
119    match config.mode {
120        HuntMode::Falsify => modes_falsify::run_falsify_mode(project_path, &config, &mut result),
121        HuntMode::Hunt => modes_hunt::run_hunt_mode(project_path, &config, &mut result),
122        HuntMode::Analyze => modes_analyze::run_analyze_mode(project_path, &config, &mut result),
123        HuntMode::Fuzz => modes_fuzz::run_fuzz_mode(project_path, &config, &mut result),
124        HuntMode::DeepHunt => modes_fuzz::run_deep_hunt_mode(project_path, &config, &mut result),
125        HuntMode::Quick => run_quick_mode(project_path, &config, &mut result),
126    }
127    result.phase_timings.mode_dispatch_ms = phase_start.elapsed().as_millis() as u64;
128
129    // Phase 2: BH-21 to BH-24: PMAT quality integration
130    #[cfg(feature = "native")]
131    if config.use_pmat_quality {
132        eprint_phase("Quality index...", &config.mode);
133        let pmat_start = Instant::now();
134        let query = config.pmat_query.as_deref().unwrap_or("*");
135        if let Some(index) = pmat_quality::build_quality_index(project_path, query, 200) {
136            result.phase_timings.pmat_index_ms = pmat_start.elapsed().as_millis() as u64;
137
138            let weights_start = Instant::now();
139            eprint_phase("Applying weights...", &config.mode);
140            pmat_quality::apply_quality_weights(
141                &mut result.findings,
142                &index,
143                config.quality_weight,
144            );
145            pmat_quality::apply_regression_risk(&mut result.findings, &index);
146            result.phase_timings.pmat_weights_ms = weights_start.elapsed().as_millis() as u64;
147        }
148    }
149
150    // Phase 2b: Coverage-based hotpath weighting
151    #[cfg(feature = "native")]
152    if config.coverage_weight > 0.0 {
153        // Try to find coverage file
154        let cov_path =
155            config.coverage_path.clone().or_else(|| coverage::find_coverage_file(project_path));
156
157        if let Some(cov_path) = cov_path {
158            if let Some(cov_index) = coverage::load_coverage_index(&cov_path) {
159                eprint_phase("Coverage weights...", &config.mode);
160                coverage::apply_coverage_weights(
161                    &mut result.findings,
162                    &cov_index,
163                    config.coverage_weight,
164                );
165            }
166        }
167    }
168
169    // Phase 2c: Contract verification gaps (BH-26)
170    #[cfg(feature = "native")]
171    run_contract_gap_phase(project_path, &config, &mut result);
172
173    // Phase 2d: Model parity gaps (BH-27)
174    #[cfg(feature = "native")]
175    run_model_parity_phase(project_path, &config, &mut result);
176
177    // Phase 3: Finalize
178    #[cfg(feature = "native")]
179    eprint_phase("Finalizing...", &config.mode);
180    let finalize_start = Instant::now();
181
182    result.duration_ms = start.elapsed().as_millis() as u64;
183    result.finalize();
184    result.phase_timings.finalize_ms = finalize_start.elapsed().as_millis() as u64;
185
186    // Save to cache
187    cache::save_cache(project_path, &config, &result.findings, result.mode);
188
189    result
190}
191
192/// Phase 2c helper: Contract verification gap analysis (BH-26).
193#[cfg(feature = "native")]
194fn run_contract_gap_phase(project_path: &Path, config: &HuntConfig, result: &mut HuntResult) {
195    if config.contracts_path.is_none() && !config.contracts_auto {
196        return;
197    }
198    let Some(dir) =
199        contracts::discover_contracts_dir(project_path, config.contracts_path.as_deref())
200    else {
201        return;
202    };
203    eprint_phase("Contract gaps...", &config.mode);
204    let contract_start = Instant::now();
205    for f in contracts::analyze_contract_gaps(&dir, project_path) {
206        if f.suspiciousness >= config.min_suspiciousness {
207            result.add_finding(f);
208        }
209    }
210    result.phase_timings.contract_gap_ms = contract_start.elapsed().as_millis() as u64;
211}
212
213/// Phase 2d helper: Model parity gap analysis (BH-27).
214#[cfg(feature = "native")]
215fn run_model_parity_phase(project_path: &Path, config: &HuntConfig, result: &mut HuntResult) {
216    if config.model_parity_path.is_none() && !config.model_parity_auto {
217        return;
218    }
219    let Some(dir) =
220        model_parity::discover_model_parity_dir(project_path, config.model_parity_path.as_deref())
221    else {
222        return;
223    };
224    eprint_phase("Model parity...", &config.mode);
225    let parity_start = Instant::now();
226    for f in model_parity::analyze_model_parity_gaps(&dir, project_path) {
227        if f.suspiciousness >= config.min_suspiciousness {
228            result.add_finding(f);
229        }
230    }
231    result.phase_timings.model_parity_ms = parity_start.elapsed().as_millis() as u64;
232}
233
234/// Run all modes and combine results (ensemble approach).
235pub fn hunt_ensemble(project_path: &Path, base_config: HuntConfig) -> HuntResult {
236    let start = Instant::now();
237    let mut combined = HuntResult::new(project_path, HuntMode::Analyze, base_config.clone());
238
239    // Run each mode and collect findings
240    for mode in [HuntMode::Analyze, HuntMode::Hunt, HuntMode::Falsify] {
241        let mut config = base_config.clone();
242        config.mode = mode;
243        let mode_result = hunt(project_path, config);
244
245        for finding in mode_result.findings {
246            // Avoid duplicates by checking location + category.
247            // Category is included so distinct finding types at the same
248            // location (e.g., multiple contract gaps in one binding.yaml)
249            // are preserved.
250            let exists = combined.findings.iter().any(|f| {
251                f.file == finding.file
252                    && f.line == finding.line
253                    && f.category == finding.category
254                    && f.title == finding.title
255            });
256            if !exists {
257                combined.add_finding(finding);
258            }
259        }
260    }
261
262    combined.duration_ms = start.elapsed().as_millis() as u64;
263    combined.finalize();
264    combined
265}
266
267/// Run spec-driven bug hunting (BH-11).
268///
269/// Parses a specification file, extracts claims, finds implementing code,
270/// and hunts bugs specifically in those areas.
271pub fn hunt_with_spec(
272    project_path: &Path,
273    spec_path: &Path,
274    section_filter: Option<&str>,
275    mut config: HuntConfig,
276) -> Result<(HuntResult, ParsedSpec), String> {
277    let start = Instant::now();
278
279    // Parse the specification
280    let mut parsed_spec = ParsedSpec::parse(spec_path)?;
281
282    // Get claim IDs to hunt (filtered by section if specified)
283    let claim_ids: Vec<String> = if let Some(section) = section_filter {
284        parsed_spec.claims_for_section(section).iter().map(|c| c.id.clone()).collect()
285    } else {
286        parsed_spec.claims.iter().map(|c| c.id.clone()).collect()
287    };
288
289    // Find implementations for each claim
290    for claim in &mut parsed_spec.claims {
291        claim.implementations = spec::find_implementations(claim, project_path);
292    }
293
294    // Collect target paths from implementations (for claims in our filter)
295    let mut target_paths: Vec<std::path::PathBuf> = parsed_spec
296        .claims
297        .iter()
298        .filter(|c| claim_ids.contains(&c.id))
299        .flat_map(|c| c.implementations.iter().map(|i| i.file.clone()))
300        .collect();
301
302    // Deduplicate
303    target_paths.sort();
304    target_paths.dedup();
305
306    // If no implementations found, use default targets
307    if target_paths.is_empty() {
308        target_paths = config.targets.clone();
309    }
310
311    // Update config with discovered targets
312    config.targets = target_paths
313        .iter()
314        .map(|p| p.parent().unwrap_or(Path::new("src")).to_path_buf())
315        .collect::<std::collections::HashSet<_>>()
316        .into_iter()
317        .collect();
318
319    if config.targets.is_empty() {
320        config.targets = vec![std::path::PathBuf::from("src")];
321    }
322
323    // Capture PMAT config before hunt() consumes config
324    let use_pmat_quality = config.use_pmat_quality;
325    let pmat_query_str = config.pmat_query.clone();
326
327    // Run the hunt
328    let mut result = hunt(project_path, config);
329
330    // Map findings to claims
331    let mapping = spec::map_findings_to_claims(&parsed_spec.claims, &result.findings, project_path);
332
333    // BH-25: Quality gate — if PMAT quality is enabled, check implementing functions
334    if use_pmat_quality {
335        let query = pmat_query_str.as_deref().unwrap_or("*");
336        apply_spec_quality_gate(&mut parsed_spec, project_path, &mut result, query);
337    }
338
339    // Update spec claims with findings
340    let findings_by_claim: Vec<(String, Vec<Finding>)> = mapping.into_iter().collect();
341    if let Ok(updated_content) = parsed_spec.update_with_findings(&findings_by_claim) {
342        parsed_spec.original_content = updated_content;
343    }
344
345    result.duration_ms = start.elapsed().as_millis() as u64;
346
347    Ok((result, parsed_spec))
348}
349
350/// Apply quality gate checks to spec claims using PMAT quality index (BH-25).
351fn apply_spec_quality_gate(
352    parsed_spec: &mut ParsedSpec,
353    project_path: &Path,
354    result: &mut HuntResult,
355    query: &str,
356) {
357    let Some(index) = pmat_quality::build_quality_index(project_path, query, 200) else {
358        return;
359    };
360    for claim in &mut parsed_spec.claims {
361        for imp in &claim.implementations {
362            let Some(pmat) = pmat_quality::lookup_quality(&index, &imp.file, imp.line) else {
363                continue;
364            };
365            let is_low_quality =
366                pmat.tdg_grade == "D" || pmat.tdg_grade == "F" || pmat.complexity > 20;
367            if !is_low_quality {
368                continue;
369            }
370            result.add_finding(
371                Finding::new(
372                    format!("BH-QGATE-{}", claim.id),
373                    &imp.file,
374                    imp.line,
375                    format!(
376                        "Quality gate: claim `{}` implemented by low-quality code",
377                        claim.id
378                    ),
379                )
380                .with_description(format!(
381                    "Function `{}` (grade {}, complexity {}) implements spec claim `{}`; consider refactoring",
382                    pmat.function_name, pmat.tdg_grade, pmat.complexity, claim.id
383                ))
384                .with_severity(FindingSeverity::Medium)
385                .with_category(DefectCategory::LogicErrors)
386                .with_suspiciousness(0.6)
387                .with_discovered_by(HuntMode::Analyze)
388                .with_evidence(FindingEvidence::quality_metrics(
389                    &pmat.tdg_grade,
390                    pmat.tdg_score,
391                    pmat.complexity,
392                )),
393            );
394        }
395    }
396}
397
398/// Run ticket-scoped bug hunting (BH-12).
399///
400/// Parses a PMAT ticket and focuses analysis on affected paths.
401pub fn hunt_with_ticket(
402    project_path: &Path,
403    ticket_ref: &str,
404    mut config: HuntConfig,
405) -> Result<HuntResult, String> {
406    // Parse the ticket
407    let ticket = PmatTicket::parse(ticket_ref, project_path)?;
408
409    // Update targets from ticket
410    config.targets = ticket.target_paths();
411
412    // Run the hunt with scoped targets
413    Ok(hunt(project_path, config))
414}
415
416/// Quick mode: pattern matching only, no clippy, no coverage analysis.
417/// Fastest mode for quick scans.
418fn run_quick_mode(project_path: &Path, config: &HuntConfig, result: &mut HuntResult) {
419    // Only run pattern analysis (from analyze mode)
420    modes_analyze::analyze_common_patterns(project_path, config, result);
421}
422
423#[cfg(test)]
424#[path = "tests_mod.rs"]
425mod tests;
batuta/bug_hunter/mod.rs

batuta/bug_hunter/
mod.rs