Skip to main content

batuta/bug_hunter/localization/
crash_bucketing.rs

1//! Semantic crash bucketing (BH-20).
2//!
3//! Groups crashes by root cause pattern (e.g., index out of bounds,
4//! null pointer dereference) for deduplication and triage.
5
6use std::collections::HashMap;
7use std::path::PathBuf;
8
9use crate::bug_hunter::types::{CrashBucketingMode, Finding, FindingSeverity, HuntMode};
10
11/// Crash bucket for semantic grouping (BH-20).
12#[derive(Debug, Clone)]
13pub struct CrashBucket {
14    /// Root cause pattern identifier
15    pub pattern: String,
16    /// Description of the root cause
17    pub description: String,
18    /// Crashes in this bucket
19    pub crashes: Vec<CrashInfo>,
20    /// Representative crash
21    pub representative: Option<CrashInfo>,
22}
23
24/// Information about a single crash.
25#[derive(Debug, Clone)]
26pub struct CrashInfo {
27    pub id: String,
28    pub file: PathBuf,
29    pub line: usize,
30    pub message: String,
31    pub stack_trace: Vec<StackFrame>,
32}
33
34/// A stack frame in a crash trace.
35#[derive(Debug, Clone)]
36pub struct StackFrame {
37    pub function: String,
38    pub file: Option<PathBuf>,
39    pub line: Option<usize>,
40}
41
42/// Root cause patterns for crash bucketing (BH-20).
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
44pub enum RootCausePattern {
45    IndexOutOfBounds,
46    NullPointerDeref,
47    IntegerOverflow,
48    DivisionByZero,
49    StackOverflow,
50    HeapOverflow,
51    UseAfterFree,
52    DoubleFree,
53    UnwrapOnNone,
54    AssertionFailed,
55    Unknown,
56}
57
58impl std::fmt::Display for RootCausePattern {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        match self {
61            Self::IndexOutOfBounds => write!(f, "index_out_of_bounds"),
62            Self::NullPointerDeref => write!(f, "null_pointer_deref"),
63            Self::IntegerOverflow => write!(f, "integer_overflow"),
64            Self::DivisionByZero => write!(f, "division_by_zero"),
65            Self::StackOverflow => write!(f, "stack_overflow"),
66            Self::HeapOverflow => write!(f, "heap_overflow"),
67            Self::UseAfterFree => write!(f, "use_after_free"),
68            Self::DoubleFree => write!(f, "double_free"),
69            Self::UnwrapOnNone => write!(f, "unwrap_on_none"),
70            Self::AssertionFailed => write!(f, "assertion_failed"),
71            Self::Unknown => write!(f, "unknown"),
72        }
73    }
74}
75
76/// Semantic crash bucketer (BH-20).
77pub struct CrashBucketer {
78    pub mode: CrashBucketingMode,
79    pub buckets: HashMap<String, CrashBucket>,
80}
81
82impl CrashBucketer {
83    pub fn new(mode: CrashBucketingMode) -> Self {
84        Self { mode, buckets: HashMap::new() }
85    }
86
87    /// Detect root cause pattern from crash message.
88    pub fn detect_pattern(message: &str) -> RootCausePattern {
89        let msg_lower = message.to_lowercase();
90        detect_pattern_from_lower(&msg_lower)
91    }
92}
93
94/// Pattern detection rules (ordered by specificity).
95const PATTERN_RULES: &[(&[&str], RootCausePattern)] = &[
96    (&["index out of bounds"], RootCausePattern::IndexOutOfBounds),
97    (&["indexoutofbounds"], RootCausePattern::IndexOutOfBounds),
98    (&["null"], RootCausePattern::NullPointerDeref),
99    (&["nullptr"], RootCausePattern::NullPointerDeref),
100    (&["division by zero"], RootCausePattern::DivisionByZero),
101    (&["divide by zero"], RootCausePattern::DivisionByZero),
102    (&["use after free"], RootCausePattern::UseAfterFree),
103    (&["double free"], RootCausePattern::DoubleFree),
104    (&["called `option::unwrap()`"], RootCausePattern::UnwrapOnNone),
105];
106
107/// Multi-keyword rules (all keywords must match).
108const MULTI_KEYWORD_RULES: &[(&[&str], RootCausePattern)] = &[
109    (&["overflow", "integer"], RootCausePattern::IntegerOverflow),
110    (&["overflow", "stack"], RootCausePattern::StackOverflow),
111    (&["unwrap", "none"], RootCausePattern::UnwrapOnNone),
112];
113
114fn detect_pattern_from_lower(msg: &str) -> RootCausePattern {
115    // Multi-keyword rules first (more specific)
116    for (keywords, pattern) in MULTI_KEYWORD_RULES {
117        if keywords.iter().all(|kw| msg.contains(kw)) {
118            return *pattern;
119        }
120    }
121    // Single-keyword rules
122    for (keywords, pattern) in PATTERN_RULES {
123        if keywords.iter().any(|kw| msg.contains(kw)) {
124            return *pattern;
125        }
126    }
127    // Fallback checks
128    if msg.contains("overflow") {
129        return RootCausePattern::HeapOverflow;
130    }
131    if msg.contains("assertion") || msg.contains("assert") {
132        return RootCausePattern::AssertionFailed;
133    }
134    RootCausePattern::Unknown
135}
136
137impl CrashBucketer {
138    /// Add a crash to the appropriate bucket.
139    pub fn add_crash(&mut self, crash: CrashInfo) {
140        let bucket_key = match self.mode {
141            CrashBucketingMode::None => {
142                // Each crash gets its own bucket
143                crash.id.clone()
144            }
145            CrashBucketingMode::StackTrace => {
146                // Bucket by top 3 stack frames
147                let frames: Vec<String> =
148                    crash.stack_trace.iter().take(3).map(|f| f.function.clone()).collect();
149                frames.join("::")
150            }
151            CrashBucketingMode::Semantic => {
152                // Bucket by root cause pattern
153                let pattern = Self::detect_pattern(&crash.message);
154                format!("{}:{}", pattern, crash.file.display())
155            }
156        };
157
158        let bucket = self.buckets.entry(bucket_key.clone()).or_insert_with(|| {
159            let pattern = Self::detect_pattern(&crash.message);
160            CrashBucket {
161                pattern: pattern.to_string(),
162                description: format!("{} in {}", pattern, crash.file.display()),
163                crashes: Vec::new(),
164                representative: None,
165            }
166        });
167
168        // First crash becomes representative
169        if bucket.representative.is_none() {
170            bucket.representative = Some(crash.clone());
171        }
172
173        bucket.crashes.push(crash);
174    }
175
176    /// Get deduplicated findings from bucketed crashes.
177    pub fn to_findings(&self) -> Vec<Finding> {
178        self.buckets
179            .values()
180            .filter_map(|bucket| {
181                bucket.representative.as_ref().map(|rep| {
182                    Finding::new(
183                        format!("BH-CRASH-{}", bucket.pattern.to_uppercase()),
184                        &rep.file,
185                        rep.line,
186                        &bucket.description,
187                    )
188                    .with_description(format!(
189                        "{} occurrence(s) of {} pattern",
190                        bucket.crashes.len(),
191                        bucket.pattern
192                    ))
193                    .with_severity(FindingSeverity::High)
194                    .with_suspiciousness(0.8)
195                    .with_discovered_by(HuntMode::Hunt)
196                })
197            })
198            .collect()
199    }
200
201    /// Get deduplication statistics.
202    pub fn stats(&self) -> (usize, usize) {
203        let total_crashes: usize = self.buckets.values().map(|b| b.crashes.len()).sum();
204        let unique_buckets = self.buckets.len();
205        (total_crashes, unique_buckets)
206    }
207}