use crate::{Backtrace, CrashKind, CrashReport, Frame, Side};
pub fn parse(text: &str) -> Vec<CrashReport> {
let mut out = Vec::new();
let mut cur = Parser::default();
for line in text.lines() {
cur.feed(line);
if let Some(report) = cur.take_finished() {
out.push(report);
}
}
if let Some(report) = cur.finish() {
out.push(report);
}
out
}
pub fn parse_one(text: &str) -> Option<CrashReport> {
parse(text).into_iter().next()
}
#[derive(Default)]
struct Parser {
state: State,
kind: Option<CrashKind>,
access: Vec<Frame>,
alloc: Vec<Frame>,
free: Vec<Frame>,
finished: Option<CrashReport>,
}
#[derive(Default, PartialEq, Eq)]
enum State {
#[default]
Idle,
AfterHeader, InAccessStack, InAllocStack, InFreeStack, }
impl Parser {
fn feed(&mut self, line: &str) {
let trimmed = line.trim_start();
if let Some(kind_str) = header_kind(trimmed) {
self.flush();
self.kind = map_kind(kind_str);
self.state = State::AfterHeader;
return;
}
if trimmed.starts_with("allocated by thread")
|| trimmed.starts_with("previously allocated by thread")
{
self.state = State::InAllocStack;
return;
}
if trimmed.starts_with("freed by thread") {
self.state = State::InFreeStack;
return;
}
if trimmed.starts_with("SUMMARY:") {
self.finalize();
return;
}
if let Some(side) = detect_side(trimmed) {
self.apply_side(side);
return;
}
if self.state == State::AfterHeader
&& (trimmed.starts_with("READ of size") || trimmed.starts_with("WRITE of size"))
{
self.state = State::InAccessStack;
return;
}
if let Some(frame) = parse_frame(trimmed) {
match self.state {
State::InAccessStack | State::AfterHeader => self.access.push(frame),
State::InAllocStack => self.alloc.push(frame),
State::InFreeStack => self.free.push(frame),
State::Idle => {}
}
}
}
fn apply_side(&mut self, side: Side) {
if let Some(CrashKind::HeapBufferOverflow { side: s }) = self.kind.as_mut() {
*s = side;
}
}
fn finalize(&mut self) {
if self.kind.is_some() {
self.flush();
}
}
fn flush(&mut self) {
if let Some(kind) = self.kind.take() {
let access_site = Backtrace { frames: std::mem::take(&mut self.access) };
let alloc_site = if self.alloc.is_empty() {
None
} else {
Some(Backtrace { frames: std::mem::take(&mut self.alloc) })
};
let free_site = if self.free.is_empty() {
None
} else {
Some(Backtrace { frames: std::mem::take(&mut self.free) })
};
let report = CrashReport::new(kind, access_site, alloc_site, free_site, Vec::new());
self.finished = Some(report);
}
self.state = State::Idle;
}
fn take_finished(&mut self) -> Option<CrashReport> {
self.finished.take()
}
fn finish(mut self) -> Option<CrashReport> {
self.finalize();
self.finished
}
}
fn header_kind(line: &str) -> Option<&str> {
let needle = "ERROR: AddressSanitizer: ";
let idx = line.find(needle)?;
Some(&line[idx + needle.len()..])
}
fn map_kind(tail: &str) -> Option<CrashKind> {
const TABLE: &[(&str, CrashKindCtor)] = &[
("heap-buffer-overflow", CrashKindCtor::HeapBufferOverflow),
("stack-buffer-overflow", CrashKindCtor::StackBufferOverflow),
("global-buffer-overflow", CrashKindCtor::GlobalBufferOverflow),
("stack-use-after-return", CrashKindCtor::StackUseAfterReturn),
("stack-use-after-scope", CrashKindCtor::StackUseAfterScope),
("heap-use-after-free", CrashKindCtor::UseAfterFree),
("use-after-free", CrashKindCtor::UseAfterFree),
("double-free", CrashKindCtor::DoubleFree),
("free on address which was not malloc", CrashKindCtor::InvalidFree),
("bad-free", CrashKindCtor::InvalidFree),
("invalid-free", CrashKindCtor::InvalidFree),
];
for (token, ctor) in TABLE {
if tail.contains(token) {
return Some(ctor.build());
}
}
None
}
enum CrashKindCtor {
HeapBufferOverflow,
StackBufferOverflow,
GlobalBufferOverflow,
StackUseAfterReturn,
StackUseAfterScope,
UseAfterFree,
DoubleFree,
InvalidFree,
}
impl CrashKindCtor {
fn build(&self) -> CrashKind {
match self {
Self::HeapBufferOverflow => CrashKind::HeapBufferOverflow { side: Side::Right },
Self::StackBufferOverflow => CrashKind::StackBufferOverflow,
Self::GlobalBufferOverflow => CrashKind::GlobalBufferOverflow,
Self::StackUseAfterReturn => CrashKind::StackUseAfterReturn,
Self::StackUseAfterScope => CrashKind::StackUseAfterScope,
Self::UseAfterFree => CrashKind::UseAfterFree { quarantine_residence_ms: 0 },
Self::DoubleFree => CrashKind::DoubleFree,
Self::InvalidFree => CrashKind::InvalidFree,
}
}
}
fn detect_side(line: &str) -> Option<Side> {
if line.contains("to the right of") || line.contains(" after ") {
Some(Side::Right)
} else if line.contains("to the left of") || line.contains(" before ") {
Some(Side::Left)
} else {
None
}
}
fn parse_frame(line: &str) -> Option<Frame> {
if !line.starts_with('#') {
return None;
}
let after_hash = line.trim_start_matches('#');
let (_idx, rest) = split_once_ws(after_hash)?;
let rest = rest.trim_start();
if !rest.starts_with("0x") {
return None;
}
let (ip_str, rest) = split_once_ws(rest)?;
let ip = u64::from_str_radix(ip_str.trim_start_matches("0x"), 16).ok()?;
let rest = rest.trim_start();
let rest = rest.strip_prefix("in ").unwrap_or(rest);
let (symbol, file, line_no) = parse_symbol_and_location(rest);
Some(Frame { ip, symbol, file, line: line_no })
}
fn parse_symbol_and_location(s: &str) -> (Option<String>, Option<String>, Option<u32>) {
let mut parts = s.splitn(2, ' ');
let sym = parts.next().map(|s| s.trim_end_matches('(').to_string());
let sym = sym.filter(|s| !s.is_empty());
let tail = parts.next().unwrap_or("").trim();
if tail.is_empty() {
return (sym, None, None);
}
if tail.starts_with('(') {
return (sym, None, None);
}
let tail = tail.trim_matches(|c: char| c == '(' || c == ')');
let mut bits = tail.rsplitn(3, ':');
let first = bits.next();
let second = bits.next();
let third = bits.next();
let (file, line_no) = match (third, second, first) {
(Some(file), Some(line), Some(_col)) => (Some(file.to_string()), line.parse::<u32>().ok()),
(None, Some(file), Some(line)) => (Some(file.to_string()), line.parse::<u32>().ok()),
(None, None, Some(only)) => {
(Some(only.to_string()), None)
}
_ => (None, None),
};
(sym, file, line_no)
}
fn split_once_ws(s: &str) -> Option<(&str, &str)> {
let idx = s.find(|c: char| c.is_whitespace())?;
Some((&s[..idx], &s[idx + 1..]))
}
#[cfg(test)]
mod tests {
use super::*;
const HBO_RIGHT: &str = "\
=================================================================
==12345==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x602000000014 at pc 0x55cd8e2a8b32 bp 0x7ffeabcd1230 sp 0x7ffeabcd1228
WRITE of size 1 at 0x602000000014 thread T0
#0 0x55cd8e2a8b31 in process_input /tmp/buggy.c:12:5
#1 0x55cd8e2a8a1f in main /tmp/buggy.c:23:9
#2 0x7f8d4f023d8f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x23d8f)
0x602000000014 is located 4 bytes to the right of 16-byte region [0x602000000000,0x602000000010)
allocated by thread T0 here:
#0 0x7f8d4f25ccd1 in __interceptor_malloc
#1 0x55cd8e2a89d5 in main /tmp/buggy.c:21:23
SUMMARY: AddressSanitizer: heap-buffer-overflow /tmp/buggy.c:12:5 in process_input
";
const UAF: &str = "\
==7777==ERROR: AddressSanitizer: heap-use-after-free on address 0x602000000010 at pc 0x400abc
READ of size 4 at 0x602000000010 thread T0
#0 0x400abc in use_after_free /tmp/uaf.c:10:5
#1 0x400def in main /tmp/uaf.c:20:5
0x602000000010 is located 0 bytes inside of 16-byte region [0x602000000010,0x602000000020)
freed by thread T0 here:
#0 0x7f111111 in __interceptor_free
#1 0x400444 in main /tmp/uaf.c:18:5
previously allocated by thread T0 here:
#0 0x7f000000 in __interceptor_malloc
#1 0x400333 in main /tmp/uaf.c:15:15
";
#[test]
fn parses_heap_buffer_overflow_right_side() {
let r = parse_one(HBO_RIGHT).expect("parse");
assert_eq!(
r.kind,
CrashKind::HeapBufferOverflow { side: Side::Right }
);
assert_eq!(r.access_site.frames.len(), 3);
assert_eq!(r.access_site.frames[0].symbol.as_deref(), Some("process_input"));
assert_eq!(r.access_site.frames[0].file.as_deref(), Some("/tmp/buggy.c"));
assert_eq!(r.access_site.frames[0].line, Some(12));
assert!(r.alloc_site.is_some());
assert_eq!(r.alloc_site.as_ref().unwrap().frames.len(), 2);
}
#[test]
fn parses_use_after_free_with_alloc_and_free_stacks() {
let r = parse_one(UAF).expect("parse");
assert!(matches!(r.kind, CrashKind::UseAfterFree { .. }));
assert_eq!(r.access_site.frames.len(), 2);
assert!(r.free_site.is_some());
assert!(r.alloc_site.is_some());
assert_eq!(r.free_site.as_ref().unwrap().frames[1].symbol.as_deref(), Some("main"));
assert_eq!(r.alloc_site.as_ref().unwrap().frames[1].line, Some(15));
}
#[test]
fn unknown_kind_is_skipped_not_errored() {
let txt = "==1==ERROR: AddressSanitizer: something-new-we-dont-know\n";
assert!(parse(txt).is_empty());
}
#[test]
fn empty_input_yields_no_reports() {
assert!(parse("").is_empty());
}
#[test]
fn dedup_hash_is_stable_across_parses() {
let a = parse_one(HBO_RIGHT).unwrap().dedup_hash;
let b = parse_one(HBO_RIGHT).unwrap().dedup_hash;
assert_eq!(a, b);
}
#[test]
fn frame_without_location_parses() {
let line = " #0 0x7f25ccd1 in __interceptor_malloc";
let f = parse_frame(line.trim_start()).unwrap();
assert_eq!(f.symbol.as_deref(), Some("__interceptor_malloc"));
assert_eq!(f.file, None);
assert_eq!(f.line, None);
}
#[test]
fn frame_with_module_offset_parses() {
let line = " #2 0x7f8d4f023d8f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x23d8f)";
let f = parse_frame(line.trim_start()).unwrap();
assert_eq!(f.symbol.as_deref(), Some("__libc_start_main"));
assert_eq!(f.ip, 0x7f8d4f023d8f);
}
#[test]
fn parses_double_free_prose_header() {
let txt = "==9999==ERROR: AddressSanitizer: attempting double-free on 0x602000000020 in thread T0:\n\
#0 0x7f200000 in __interceptor_free\n\
#1 0x400777 in bad_cleanup /tmp/uaf.c:40:5\n\
SUMMARY: AddressSanitizer: double-free /tmp/uaf.c:40:5 in bad_cleanup\n";
let r = parse_one(txt).expect("parse");
assert_eq!(r.kind, CrashKind::DoubleFree);
}
#[test]
fn parses_invalid_free_prose_header() {
let txt = "==1==ERROR: AddressSanitizer: attempting free on address which was not malloc()-ed: 0x42 in thread T0\n\
SUMMARY: AddressSanitizer: bad-free in foo\n";
let r = parse_one(txt).expect("parse");
assert_eq!(r.kind, CrashKind::InvalidFree);
}
#[test]
fn heap_use_after_free_matches_uaf_not_plain_use_after_free_prefix() {
let r = parse_one(UAF).expect("parse");
assert!(matches!(r.kind, CrashKind::UseAfterFree { .. }));
}
#[test]
fn left_side_is_detected() {
let txt = "==1==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x1 at pc 0x2\n\
WRITE of size 1 at 0x1 thread T0\n\
0x1 is located 8 bytes to the left of 16-byte region [0x10,0x20)\n\
SUMMARY: AddressSanitizer: heap-buffer-overflow\n";
let r = parse_one(txt).unwrap();
assert_eq!(r.kind, CrashKind::HeapBufferOverflow { side: Side::Left });
}
#[test]
fn modern_after_before_phrasing_detected() {
let after = "==1==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x1 at pc 0x2\n\
WRITE of size 1 at 0x1 thread T0\n\
0x1 is located 4 bytes after 16-byte region [0x10,0x20)\n\
SUMMARY: AddressSanitizer: heap-buffer-overflow\n";
assert_eq!(
parse_one(after).unwrap().kind,
CrashKind::HeapBufferOverflow { side: Side::Right }
);
let before = "==1==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x1 at pc 0x2\n\
WRITE of size 1 at 0x1 thread T0\n\
0x1 is located 8 bytes before 16-byte region [0x10,0x20)\n\
SUMMARY: AddressSanitizer: heap-buffer-overflow\n";
assert_eq!(
parse_one(before).unwrap().kind,
CrashKind::HeapBufferOverflow { side: Side::Left }
);
}
}