batuta/bug_hunter/localization/
crash_bucketing.rs1use std::collections::HashMap;
7use std::path::PathBuf;
8
9use crate::bug_hunter::types::{CrashBucketingMode, Finding, FindingSeverity, HuntMode};
10
11#[derive(Debug, Clone)]
13pub struct CrashBucket {
14 pub pattern: String,
16 pub description: String,
18 pub crashes: Vec<CrashInfo>,
20 pub representative: Option<CrashInfo>,
22}
23
24#[derive(Debug, Clone)]
26pub struct CrashInfo {
27 pub id: String,
28 pub file: PathBuf,
29 pub line: usize,
30 pub message: String,
31 pub stack_trace: Vec<StackFrame>,
32}
33
34#[derive(Debug, Clone)]
36pub struct StackFrame {
37 pub function: String,
38 pub file: Option<PathBuf>,
39 pub line: Option<usize>,
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
44pub enum RootCausePattern {
45 IndexOutOfBounds,
46 NullPointerDeref,
47 IntegerOverflow,
48 DivisionByZero,
49 StackOverflow,
50 HeapOverflow,
51 UseAfterFree,
52 DoubleFree,
53 UnwrapOnNone,
54 AssertionFailed,
55 Unknown,
56}
57
58impl std::fmt::Display for RootCausePattern {
59 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60 match self {
61 Self::IndexOutOfBounds => write!(f, "index_out_of_bounds"),
62 Self::NullPointerDeref => write!(f, "null_pointer_deref"),
63 Self::IntegerOverflow => write!(f, "integer_overflow"),
64 Self::DivisionByZero => write!(f, "division_by_zero"),
65 Self::StackOverflow => write!(f, "stack_overflow"),
66 Self::HeapOverflow => write!(f, "heap_overflow"),
67 Self::UseAfterFree => write!(f, "use_after_free"),
68 Self::DoubleFree => write!(f, "double_free"),
69 Self::UnwrapOnNone => write!(f, "unwrap_on_none"),
70 Self::AssertionFailed => write!(f, "assertion_failed"),
71 Self::Unknown => write!(f, "unknown"),
72 }
73 }
74}
75
76pub struct CrashBucketer {
78 pub mode: CrashBucketingMode,
79 pub buckets: HashMap<String, CrashBucket>,
80}
81
82impl CrashBucketer {
83 pub fn new(mode: CrashBucketingMode) -> Self {
84 Self { mode, buckets: HashMap::new() }
85 }
86
87 pub fn detect_pattern(message: &str) -> RootCausePattern {
89 let msg_lower = message.to_lowercase();
90 detect_pattern_from_lower(&msg_lower)
91 }
92}
93
94const PATTERN_RULES: &[(&[&str], RootCausePattern)] = &[
96 (&["index out of bounds"], RootCausePattern::IndexOutOfBounds),
97 (&["indexoutofbounds"], RootCausePattern::IndexOutOfBounds),
98 (&["null"], RootCausePattern::NullPointerDeref),
99 (&["nullptr"], RootCausePattern::NullPointerDeref),
100 (&["division by zero"], RootCausePattern::DivisionByZero),
101 (&["divide by zero"], RootCausePattern::DivisionByZero),
102 (&["use after free"], RootCausePattern::UseAfterFree),
103 (&["double free"], RootCausePattern::DoubleFree),
104 (&["called `option::unwrap()`"], RootCausePattern::UnwrapOnNone),
105];
106
107const MULTI_KEYWORD_RULES: &[(&[&str], RootCausePattern)] = &[
109 (&["overflow", "integer"], RootCausePattern::IntegerOverflow),
110 (&["overflow", "stack"], RootCausePattern::StackOverflow),
111 (&["unwrap", "none"], RootCausePattern::UnwrapOnNone),
112];
113
114fn detect_pattern_from_lower(msg: &str) -> RootCausePattern {
115 for (keywords, pattern) in MULTI_KEYWORD_RULES {
117 if keywords.iter().all(|kw| msg.contains(kw)) {
118 return *pattern;
119 }
120 }
121 for (keywords, pattern) in PATTERN_RULES {
123 if keywords.iter().any(|kw| msg.contains(kw)) {
124 return *pattern;
125 }
126 }
127 if msg.contains("overflow") {
129 return RootCausePattern::HeapOverflow;
130 }
131 if msg.contains("assertion") || msg.contains("assert") {
132 return RootCausePattern::AssertionFailed;
133 }
134 RootCausePattern::Unknown
135}
136
137impl CrashBucketer {
138 pub fn add_crash(&mut self, crash: CrashInfo) {
140 let bucket_key = match self.mode {
141 CrashBucketingMode::None => {
142 crash.id.clone()
144 }
145 CrashBucketingMode::StackTrace => {
146 let frames: Vec<String> =
148 crash.stack_trace.iter().take(3).map(|f| f.function.clone()).collect();
149 frames.join("::")
150 }
151 CrashBucketingMode::Semantic => {
152 let pattern = Self::detect_pattern(&crash.message);
154 format!("{}:{}", pattern, crash.file.display())
155 }
156 };
157
158 let bucket = self.buckets.entry(bucket_key.clone()).or_insert_with(|| {
159 let pattern = Self::detect_pattern(&crash.message);
160 CrashBucket {
161 pattern: pattern.to_string(),
162 description: format!("{} in {}", pattern, crash.file.display()),
163 crashes: Vec::new(),
164 representative: None,
165 }
166 });
167
168 if bucket.representative.is_none() {
170 bucket.representative = Some(crash.clone());
171 }
172
173 bucket.crashes.push(crash);
174 }
175
176 pub fn to_findings(&self) -> Vec<Finding> {
178 self.buckets
179 .values()
180 .filter_map(|bucket| {
181 bucket.representative.as_ref().map(|rep| {
182 Finding::new(
183 format!("BH-CRASH-{}", bucket.pattern.to_uppercase()),
184 &rep.file,
185 rep.line,
186 &bucket.description,
187 )
188 .with_description(format!(
189 "{} occurrence(s) of {} pattern",
190 bucket.crashes.len(),
191 bucket.pattern
192 ))
193 .with_severity(FindingSeverity::High)
194 .with_suspiciousness(0.8)
195 .with_discovered_by(HuntMode::Hunt)
196 })
197 })
198 .collect()
199 }
200
201 pub fn stats(&self) -> (usize, usize) {
203 let total_crashes: usize = self.buckets.values().map(|b| b.crashes.len()).sum();
204 let unique_buckets = self.buckets.len();
205 (total_crashes, unique_buckets)
206 }
207}