1use std::fmt::Debug;
2use std::str::FromStr;
3use std::{mem, str};
4
5use debugid::DebugId;
6use memchr::memchr;
7use nom::bytes::complete::{tag, take_while};
8use nom::character::complete::{hex_digit1, space1};
9use nom::combinator::{cut, map_res, opt, rest};
10use nom::error::{Error, ErrorKind, ParseError};
11use nom::multi::separated_list1;
12use nom::sequence::{terminated, tuple};
13use nom::{Err, IResult};
14use zerocopy::{IntoBytes, LittleEndian, Ref, U32, U64};
15use zerocopy_derive::*;
16
17use crate::CodeId;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct BreakpadIndex {
21 pub module_info_bytes: Vec<u8>,
22 pub debug_name: String,
23 pub debug_id: DebugId,
24 pub arch: String,
25 pub os: String,
26 pub name: Option<String>,
27 pub code_id: Option<CodeId>,
28 pub symbol_addresses: Vec<u32>,
29 pub symbol_offsets: Vec<BreakpadSymbolType>,
30 pub files: ItemMap<BreakpadFileLine>,
31 pub inline_origins: ItemMap<BreakpadInlineOriginLine>,
32}
33
34const HEADER_SIZE: u32 = std::mem::size_of::<BreakpadSymindexFileHeader>() as u32;
35const FILE_OR_INLINE_ORIGIN_ENTRY_SIZE: u32 = std::mem::size_of::<FileOrInlineOriginEntry>() as u32;
36const SYMBOL_ADDRESS_SIZE: u32 = std::mem::size_of::<u32>() as u32;
37const SYMBOL_ENTRY_SIZE: u32 = std::mem::size_of::<SymbolEntry>() as u32;
38
39impl BreakpadIndex {
40 pub fn parse_symindex_file(data: &[u8]) -> Result<BreakpadIndex, BreakpadSymindexParseError> {
41 let header_bytes = data
42 .get(..HEADER_SIZE as usize)
43 .ok_or(BreakpadSymindexParseError::FileTooSmallForHeader)?;
44 let header = Ref::<&[u8], BreakpadSymindexFileHeader>::from_bytes(header_bytes).unwrap();
45 if &header.magic != b"SYMINDEX" {
46 return Err(BreakpadSymindexParseError::WrongMagicBytes);
47 }
48 let module_info_end_offset = header
49 .module_info_offset
50 .get()
51 .checked_add(header.module_info_len.get())
52 .ok_or(BreakpadSymindexParseError::ModuleInfoOffsetLenOverflow)?;
53 let module_info_bytes = data
54 .get(header.module_info_offset.get() as usize..module_info_end_offset as usize)
55 .ok_or(BreakpadSymindexParseError::CouldntReadModuleInfoBytes)?;
56
57 let (debug_id, os, arch, debug_name, name, code_id) = {
58 let mut module_info = None;
59 let mut code_id = None;
60 let mut name = None;
61 let mut module_info_line_buffer = LineBuffer::default();
62 module_info_line_buffer.consume(module_info_bytes, |_offset, line_slice| {
63 if let Ok((_r, (os, arch, debug_id, debug_name))) = module_line(line_slice) {
65 module_info = Some((
66 debug_id,
67 os.to_string(),
68 arch.to_string(),
69 debug_name.to_string(),
70 ));
71 } else if let Ok((_r, (code_id_str, name_str))) = info_code_id_line(line_slice) {
72 code_id = CodeId::from_str(code_id_str).ok();
73 name = name_str.map(ToOwned::to_owned);
74 }
75 });
76 module_info_line_buffer.finish(|_offset, line_slice| {
77 if let Ok((_r, (os, arch, debug_id, debug_name))) = module_line(line_slice) {
79 module_info = Some((
80 debug_id,
81 os.to_string(),
82 arch.to_string(),
83 debug_name.to_string(),
84 ));
85 } else if let Ok((_r, (code_id_str, name_str))) = info_code_id_line(line_slice) {
86 code_id = CodeId::from_str(code_id_str).ok();
87 name = name_str.map(ToOwned::to_owned);
88 }
89 });
90 match module_info {
91 Some((debug_id, os, arch, debug_name)) => {
92 (debug_id, os, arch, debug_name, name, code_id)
93 }
94 None => return Err(BreakpadSymindexParseError::CouldntParseModuleInfoLine),
95 }
96 };
97 let file_list_bytes_len = header
98 .file_count
99 .get()
100 .checked_mul(FILE_OR_INLINE_ORIGIN_ENTRY_SIZE)
101 .ok_or(BreakpadSymindexParseError::FileListByteLenOverflow)?;
102 let file_list_end_offset = header
103 .file_entries_offset
104 .get()
105 .checked_add(file_list_bytes_len)
106 .ok_or(BreakpadSymindexParseError::FileListOffsetLenOverflow)?;
107 let file_list_bytes = data
108 .get(header.file_entries_offset.get() as usize..file_list_end_offset as usize)
109 .ok_or(BreakpadSymindexParseError::CouldntReadFileListBytes)?;
110 let file_list =
111 Ref::<&[u8], [FileOrInlineOriginEntry]>::from_bytes(file_list_bytes).unwrap();
112 let inline_origin_list_bytes_len = header
113 .inline_origin_count
114 .get()
115 .checked_mul(FILE_OR_INLINE_ORIGIN_ENTRY_SIZE)
116 .ok_or(BreakpadSymindexParseError::InlineOriginListByteLenOverflow)?;
117 let inline_origin_list_end_offset = header
118 .inline_origin_entries_offset
119 .get()
120 .checked_add(inline_origin_list_bytes_len)
121 .ok_or(BreakpadSymindexParseError::InlineOriginListOffsetLenOverflow)?;
122 let inline_origin_list_bytes = data
123 .get(
124 header.inline_origin_entries_offset.get() as usize
125 ..inline_origin_list_end_offset as usize,
126 )
127 .ok_or(BreakpadSymindexParseError::CouldntReadInlineOriginListBytes)?;
128 let inline_origin_list =
129 Ref::<&[u8], [FileOrInlineOriginEntry]>::from_bytes(inline_origin_list_bytes).unwrap();
130 let symbol_address_list_bytes_len = header
131 .symbol_count
132 .get()
133 .checked_mul(SYMBOL_ADDRESS_SIZE)
134 .ok_or(BreakpadSymindexParseError::SymbolAddressListByteLenOverflow)?;
135 let symbol_address_list_end_offset = header
136 .symbol_addresses_offset
137 .get()
138 .checked_add(symbol_address_list_bytes_len)
139 .ok_or(BreakpadSymindexParseError::SymbolAddressListOffsetLenOverflow)?;
140 let symbol_address_list_bytes = data
141 .get(
142 header.symbol_addresses_offset.get() as usize
143 ..symbol_address_list_end_offset as usize,
144 )
145 .ok_or(BreakpadSymindexParseError::CouldntReadSymbolAddressListBytes)?;
146 let symbol_address_list =
147 Ref::<&[u8], [U32<LittleEndian>]>::from_bytes(symbol_address_list_bytes).unwrap();
148 let symbol_entry_list_bytes_len = header
149 .symbol_count
150 .get()
151 .checked_mul(SYMBOL_ENTRY_SIZE)
152 .ok_or(BreakpadSymindexParseError::SymbolEntryListByteLenOverflow)?;
153 let symbol_entry_list_end_offset = header
154 .symbol_entries_offset
155 .get()
156 .checked_add(symbol_entry_list_bytes_len)
157 .ok_or(BreakpadSymindexParseError::SymbolEntryListOffsetLenOverflow)?;
158 let symbol_entry_list_bytes = data
159 .get(header.symbol_entries_offset.get() as usize..symbol_entry_list_end_offset as usize)
160 .ok_or(BreakpadSymindexParseError::CouldntReadSymbolEntryListBytes)?;
161 let symbol_entry_list =
162 Ref::<&[u8], [SymbolEntry]>::from_bytes(symbol_entry_list_bytes).unwrap();
163
164 let files: Vec<BreakpadFileLine> = file_list
165 .iter()
166 .map(|entry| BreakpadFileLine {
167 index: entry.index.get(),
168 file_offset: entry.offset.get(),
169 line_length: entry.line_len.get(),
170 })
171 .collect();
172 let inline_origins: Vec<BreakpadInlineOriginLine> = inline_origin_list
173 .iter()
174 .map(|entry| BreakpadInlineOriginLine {
175 index: entry.index.get(),
176 file_offset: entry.offset.get(),
177 line_length: entry.line_len.get(),
178 })
179 .collect();
180 let symbol_addresses: Vec<u32> = symbol_address_list.iter().map(|a| a.get()).collect();
181 let symbol_offsets: Vec<BreakpadSymbolType> = symbol_entry_list
182 .iter()
183 .map(|entry| {
184 if entry.kind.get() == SYMBOL_ENTRY_KIND_PUBLIC {
185 BreakpadSymbolType::Public(BreakpadPublicSymbol {
187 file_offset: entry.offset.get(),
188 line_length: entry.line_or_block_len.get(),
189 })
190 } else {
191 BreakpadSymbolType::Func(BreakpadFuncSymbol {
193 file_offset: entry.offset.get(),
194 block_length: entry.line_or_block_len.get(),
195 })
196 }
197 })
198 .collect();
199 Ok(BreakpadIndex {
200 module_info_bytes: module_info_bytes.to_owned(),
201 debug_name,
202 debug_id,
203 arch,
204 os,
205 name,
206 code_id,
207 symbol_addresses,
208 symbol_offsets,
209 files: ItemMap::from_sorted_vec(files),
210 inline_origins: ItemMap::from_sorted_vec(inline_origins),
211 })
212 }
213
214 pub fn serialize_to_bytes(&self) -> Vec<u8> {
215 let header_len = HEADER_SIZE;
216 let module_info_offset = header_len;
217 let module_info_len = self.module_info_bytes.len() as u32;
218 let padding_after_module_info = align_to_4_bytes(module_info_len) - module_info_len;
219 let file_entries_offset = module_info_offset + module_info_len + padding_after_module_info;
220 let file_count = self.files.len() as u32;
221 let file_entries_len = file_count * FILE_OR_INLINE_ORIGIN_ENTRY_SIZE;
222 let inline_origin_entries_offset = file_entries_offset + file_entries_len;
223 let inline_origin_count = self.inline_origins.len() as u32;
224 let inline_origin_entries_len = inline_origin_count * FILE_OR_INLINE_ORIGIN_ENTRY_SIZE;
225 let symbol_addresses_offset = inline_origin_entries_offset + inline_origin_entries_len;
226 let symbol_count = self.symbol_addresses.len() as u32;
227 let symbol_addresses_len = symbol_count * SYMBOL_ADDRESS_SIZE;
228 let symbol_entries_offset = symbol_addresses_offset + symbol_addresses_len;
229 let symbol_entries_len = symbol_count * SYMBOL_ENTRY_SIZE;
230 let total_file_len = symbol_entries_offset + symbol_entries_len;
231 let header = BreakpadSymindexFileHeader {
232 magic: *b"SYMINDEX",
233 version: 1.into(),
234 module_info_offset: module_info_offset.into(),
235 module_info_len: module_info_len.into(),
236 file_count: file_count.into(),
237 file_entries_offset: file_entries_offset.into(),
238 inline_origin_count: inline_origin_count.into(),
239 inline_origin_entries_offset: inline_origin_entries_offset.into(),
240 symbol_count: symbol_count.into(),
241 symbol_addresses_offset: symbol_addresses_offset.into(),
242 symbol_entries_offset: symbol_entries_offset.into(),
243 };
244
245 let mut vec = Vec::with_capacity(total_file_len as usize);
246 vec.extend_from_slice(header.as_bytes());
247 vec.extend_from_slice(&self.module_info_bytes);
248 vec.extend(std::iter::repeat(0).take(padding_after_module_info as usize));
249 for file in self.files.as_slice() {
250 vec.extend_from_slice(
251 FileOrInlineOriginEntry {
252 index: file.index.into(),
253 line_len: file.line_length.into(),
254 offset: file.file_offset.into(),
255 }
256 .as_bytes(),
257 );
258 }
259 for inline_origin in self.inline_origins.as_slice() {
260 vec.extend_from_slice(
261 FileOrInlineOriginEntry {
262 index: inline_origin.index.into(),
263 line_len: inline_origin.line_length.into(),
264 offset: inline_origin.file_offset.into(),
265 }
266 .as_bytes(),
267 );
268 }
269 for symbol_address in &self.symbol_addresses {
270 vec.extend_from_slice(U32::<LittleEndian>::from(*symbol_address).as_bytes());
271 }
272 for symbol in &self.symbol_offsets {
273 match symbol {
274 BreakpadSymbolType::Public(public) => {
275 vec.extend_from_slice(
276 SymbolEntry {
277 kind: SYMBOL_ENTRY_KIND_PUBLIC.into(),
278 line_or_block_len: public.line_length.into(),
279 offset: public.file_offset.into(),
280 }
281 .as_bytes(),
282 );
283 }
284 BreakpadSymbolType::Func(func) => {
285 vec.extend_from_slice(
286 SymbolEntry {
287 kind: SYMBOL_ENTRY_KIND_FUNC.into(),
288 line_or_block_len: func.block_length.into(),
289 offset: func.file_offset.into(),
290 }
291 .as_bytes(),
292 );
293 }
294 }
295 }
296
297 assert_eq!(vec.len(), total_file_len as usize);
298
299 vec
300 }
301}
302
303#[inline]
304fn round_up_to_multiple(value: u32, factor: u32) -> u32 {
305 (value + factor - 1) / factor * factor
306}
307
308fn align_to_4_bytes(value: u32) -> u32 {
309 round_up_to_multiple(value, 4)
310}
311
312#[derive(thiserror::Error, Debug)]
313#[non_exhaustive]
314pub enum BreakpadSymindexParseError {
315 #[error("Not enough bytes in the file for the file header")]
316 FileTooSmallForHeader,
317
318 #[error("Wrong magic bytes in the symindex header")]
319 WrongMagicBytes,
320
321 #[error("Module offset + len overflowed u32")]
322 ModuleInfoOffsetLenOverflow,
323
324 #[error("Module info bytes couldn't be read from the file")]
325 CouldntReadModuleInfoBytes,
326
327 #[error("Module info bytes couldn't be parsed as utf-8")]
328 ModuleInfoNotUtf8,
329
330 #[error("MODULE INFO couldn't be parsed in module info section")]
331 CouldntParseModuleInfoLine,
332
333 #[error("File count * file entry size overflowed")]
334 FileListByteLenOverflow,
335
336 #[error("File list offset + len overflowed u32")]
337 FileListOffsetLenOverflow,
338
339 #[error("File list bytes couldn't be read from the file")]
340 CouldntReadFileListBytes,
341
342 #[error("Inline origin count * inline origin entry size overflowed")]
343 InlineOriginListByteLenOverflow,
344
345 #[error("Inline origin offset + len overflowed u32")]
346 InlineOriginListOffsetLenOverflow,
347
348 #[error("InlineOrigin list bytes couldn't be read from the file")]
349 CouldntReadInlineOriginListBytes,
350
351 #[error("Symbol count * 4 bytes per address overflowed")]
352 SymbolAddressListByteLenOverflow,
353
354 #[error("Symbol address list offset + len overflowed u32")]
355 SymbolAddressListOffsetLenOverflow,
356
357 #[error("Symbol address list bytes couldn't be read from the file")]
358 CouldntReadSymbolAddressListBytes,
359
360 #[error("Symbol count * symbol entry size overflowed")]
361 SymbolEntryListByteLenOverflow,
362
363 #[error("Symbol entry list offset + len overflowed u32")]
364 SymbolEntryListOffsetLenOverflow,
365
366 #[error("Symbol entry list bytes couldn't be read from the file")]
367 CouldntReadSymbolEntryListBytes,
368}
369
370#[derive(FromBytes, KnownLayout, Immutable, IntoBytes, Unaligned)]
371#[repr(C)]
372struct BreakpadSymindexFileHeader {
373 magic: [u8; 8],
375 version: U32<LittleEndian>,
377 module_info_offset: U32<LittleEndian>,
379 module_info_len: U32<LittleEndian>,
381 file_count: U32<LittleEndian>,
383 file_entries_offset: U32<LittleEndian>,
385 inline_origin_count: U32<LittleEndian>,
387 inline_origin_entries_offset: U32<LittleEndian>,
389 symbol_count: U32<LittleEndian>,
391 symbol_addresses_offset: U32<LittleEndian>,
393 symbol_entries_offset: U32<LittleEndian>,
395}
396
397#[derive(FromBytes, KnownLayout, Immutable, IntoBytes, Unaligned)]
398#[repr(C)]
399struct FileOrInlineOriginEntry {
400 pub index: U32<LittleEndian>,
401 pub line_len: U32<LittleEndian>,
402 pub offset: U64<LittleEndian>,
403}
404
405const SYMBOL_ENTRY_KIND_PUBLIC: u32 = 0;
406const SYMBOL_ENTRY_KIND_FUNC: u32 = 1;
407
408#[derive(FromBytes, KnownLayout, Immutable, IntoBytes, Unaligned)]
409#[repr(C)]
410struct SymbolEntry {
411 pub kind: U32<LittleEndian>,
413 pub line_or_block_len: U32<LittleEndian>,
415 pub offset: U64<LittleEndian>,
417}
418
419#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
465pub enum BreakpadSymbolType {
466 Public(BreakpadPublicSymbol),
467 Func(BreakpadFuncSymbol),
468}
469
470#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
471pub struct BreakpadPublicSymbol {
472 pub file_offset: u64,
474 pub line_length: u32,
476}
477
478impl BreakpadPublicSymbol {
479 pub fn parse<'a>(
480 &self,
481 input: &'a [u8],
482 ) -> Result<BreakpadPublicSymbolInfo<'a>, BreakpadParseError> {
483 let (_rest, (_address, name)) =
484 public_line(input).map_err(|_| BreakpadParseError::ParsingPublic)?;
485 Ok(BreakpadPublicSymbolInfo {
486 name: str::from_utf8(name).map_err(|_| BreakpadParseError::BadUtf8)?,
487 })
488 }
489}
490
491fn read_line_and_advance<'a>(input: &mut &'a [u8]) -> &'a [u8] {
495 let mut line = if let Some(line_break) = memchr(b'\n', input) {
496 let line = &input[..line_break];
497 *input = &input[(line_break + 1)..];
498 line
499 } else {
500 let line = *input;
501 *input = &[];
502 line
503 };
504 while line.last() == Some(&b'\r') {
505 line = &line[..(line.len() - 1)];
506 }
507 line
508}
509
510#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
511pub struct BreakpadFuncSymbol {
512 pub file_offset: u64,
514 pub block_length: u32,
516}
517
518impl BreakpadFuncSymbol {
519 pub fn parse<'a>(
520 &self,
521 mut input: &'a [u8],
522 ) -> Result<BreakpadFuncSymbolInfo<'a>, BreakpadParseError> {
523 let first_line = read_line_and_advance(&mut input);
524 let (_rest, (_address, size, name)) =
525 func_line(first_line).map_err(|_| BreakpadParseError::ParsingFunc)?;
526 let mut inlinees = Vec::new();
527 let mut lines = Vec::new();
528 while !input.is_empty() {
529 let line = read_line_and_advance(&mut input);
530 if line.starts_with(b"INLINE ") {
531 let (_rest, new_inlinees) =
532 inline_line(line).map_err(|_| BreakpadParseError::ParsingInline)?;
533 inlinees.extend(new_inlinees);
534 } else if let Ok((_rest, line_data)) = func_line_data(line) {
535 lines.push(line_data);
536 }
537 }
538 inlinees.sort_by_key(|inlinee| (inlinee.depth, inlinee.address));
539 Ok(BreakpadFuncSymbolInfo {
540 name: str::from_utf8(name).map_err(|_| BreakpadParseError::BadUtf8)?,
541 size,
542 lines,
543 inlinees,
544 })
545 }
546}
547
548pub trait FileOrInlineOrigin {
549 fn index(&self) -> u32;
550 fn offset_and_length(&self) -> (u64, u32);
551 fn parse(line: &[u8]) -> Result<&str, BreakpadParseError>;
552}
553
554#[derive(Debug, Clone, PartialEq, Eq)]
555pub struct ItemMap<I: FileOrInlineOrigin> {
556 inner: Vec<I>,
557}
558
559impl<I: FileOrInlineOrigin> ItemMap<I> {
560 pub fn from_sorted_vec(vec: Vec<I>) -> Self {
561 Self { inner: vec }
562 }
563 pub fn len(&self) -> usize {
564 self.inner.len()
565 }
566 #[allow(unused)]
567 pub fn is_empty(&self) -> bool {
568 self.inner.is_empty()
569 }
570 pub fn as_slice(&self) -> &[I] {
571 &self.inner
572 }
573 pub fn get(&self, index: u32) -> Option<&I> {
574 Some(&self.inner[self.get_vec_index(index)?])
575 }
576 fn get_vec_index(&self, index: u32) -> Option<usize> {
577 self.inner.binary_search_by_key(&index, I::index).ok()
578 }
579}
580
581#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
582pub struct BreakpadFileLine {
583 pub index: u32,
585 pub file_offset: u64,
587 pub line_length: u32,
589}
590
591impl FileOrInlineOrigin for BreakpadFileLine {
592 fn index(&self) -> u32 {
593 self.index
594 }
595 fn offset_and_length(&self) -> (u64, u32) {
596 (self.file_offset, self.line_length)
597 }
598 fn parse(input: &[u8]) -> Result<&str, BreakpadParseError> {
599 let (_rest, (_index, name)) =
600 file_line(input).map_err(|_| BreakpadParseError::ParsingFile)?;
601 str::from_utf8(name).map_err(|_| BreakpadParseError::BadUtf8)
602 }
603}
604
605#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
606pub struct BreakpadInlineOriginLine {
607 pub index: u32,
609 pub file_offset: u64,
611 pub line_length: u32,
613}
614
615impl FileOrInlineOrigin for BreakpadInlineOriginLine {
616 fn index(&self) -> u32 {
617 self.index
618 }
619 fn offset_and_length(&self) -> (u64, u32) {
620 (self.file_offset, self.line_length)
621 }
622 fn parse(input: &[u8]) -> Result<&str, BreakpadParseError> {
623 let (_rest, (_index, name)) =
624 inline_origin_line(input).map_err(|_| BreakpadParseError::ParsingFile)?;
625 str::from_utf8(name).map_err(|_| BreakpadParseError::BadUtf8)
626 }
627}
628
629#[derive(Debug, Clone, Default)]
630pub struct BreakpadIndexParser {
631 line_buffer: LineBuffer,
632 inner: BreakpadIndexParserInner,
633}
634
635impl BreakpadIndexParser {
636 pub fn new() -> Self {
637 Self::default()
638 }
639
640 pub fn consume(&mut self, chunk: &[u8]) {
641 let inner = &mut self.inner;
642 let line_buffer = &mut self.line_buffer;
643 line_buffer.consume(chunk, |offset, line| inner.process_line(offset, line));
644 }
645
646 pub fn finish(mut self) -> Result<BreakpadIndex, BreakpadParseError> {
647 let inner = &mut self.inner;
648 let final_offset = self
649 .line_buffer
650 .finish(|offset, line| inner.process_line(offset, line));
651 self.inner.finish(final_offset)
652 }
653}
654
655#[derive(Debug, Clone)]
656struct SortedVecBuilder<I: FileOrInlineOrigin> {
657 inner: Vec<I>,
658 last_sorted_index: Option<u32>,
659 is_sorted: bool,
660}
661
662impl<I: FileOrInlineOrigin> Default for SortedVecBuilder<I> {
663 fn default() -> Self {
664 Self {
665 inner: Vec::new(),
666 last_sorted_index: None,
667 is_sorted: true,
668 }
669 }
670}
671
672impl<I: FileOrInlineOrigin> SortedVecBuilder<I> {
673 pub fn push(&mut self, item: I) {
674 if self.is_sorted {
675 let item_index = item.index();
676 match self.last_sorted_index {
677 None => {
678 self.last_sorted_index = Some(item_index);
680 }
681 Some(last_index) if item_index > last_index => {
682 self.last_sorted_index = Some(item_index);
684 }
685 Some(last_index) if item_index == last_index => {
686 return;
689 }
690 Some(_last_index) => {
691 self.is_sorted = false;
693 }
694 }
695 }
696 self.inner.push(item);
697 }
698
699 pub fn into_sorted_vec(mut self) -> Vec<I> {
700 if !self.is_sorted {
701 self.inner.sort_by_key(I::index);
702 self.inner.dedup_by_key(|item| item.index());
703 }
704 self.inner
705 }
706}
707
708#[derive(Debug, Clone, Default)]
709struct BreakpadIndexParserInner {
710 module_info_bytes: Vec<u8>,
711 module_info: Option<(DebugId, String, String, String)>,
712 name: Option<String>,
713 code_id: Option<CodeId>,
714 symbols: Vec<(u32, BreakpadSymbolType)>,
715 files: SortedVecBuilder<BreakpadFileLine>,
716 inline_origins: SortedVecBuilder<BreakpadInlineOriginLine>,
717 pending_func_block: Option<(u32, u64)>,
718}
719
720impl BreakpadIndexParserInner {
721 pub fn process_line(&mut self, file_offset: u64, line: &[u8]) {
722 let mut input = line;
723 while input.last() == Some(&b'\r') {
724 input = &input[..(input.len() - 1)];
725 }
726 if self.module_info.is_none() {
727 if let Ok((_r, (os, arch, debug_id, debug_name))) = module_line(input) {
729 self.module_info = Some((
730 debug_id,
731 os.to_string(),
732 arch.to_string(),
733 debug_name.to_string(),
734 ));
735 }
736 input.clone_into(&mut self.module_info_bytes);
737 return;
738 }
739 let line_length = input.len() as u32;
740 if let Ok((_r, (index, _filename))) = file_line(input) {
741 self.files.push(BreakpadFileLine {
742 index,
743 file_offset,
744 line_length,
745 });
746 } else if let Ok((_r, (index, _inline_origin))) = inline_origin_line(input) {
747 self.inline_origins.push(BreakpadInlineOriginLine {
748 index,
749 file_offset,
750 line_length,
751 });
752 } else if let Ok((_r, (address, _name))) = public_line(input) {
753 self.finish_pending_func_block(file_offset);
754 self.symbols.push((
755 address,
756 BreakpadSymbolType::Public(BreakpadPublicSymbol {
757 file_offset,
758 line_length,
759 }),
760 ));
761 } else if let Ok((_r, (address, _size, _name))) = func_line(input) {
762 self.finish_pending_func_block(file_offset);
763 self.pending_func_block = Some((address, file_offset));
764 } else if input.starts_with(b"INFO ") {
765 self.finish_pending_func_block(file_offset);
766 self.module_info_bytes.push(b'\n');
767 self.module_info_bytes.extend_from_slice(input);
768 if let Ok((_r, (code_id, name_str))) = info_code_id_line(input) {
769 self.code_id = CodeId::from_str(code_id).ok();
770 self.name = name_str.map(ToOwned::to_owned);
771 }
772 } else if input.starts_with(b"STACK ") {
773 self.finish_pending_func_block(file_offset);
774 }
775 }
776
777 fn finish_pending_func_block(&mut self, non_func_line_start_offset: u64) {
778 if let Some((address, file_offset)) = self.pending_func_block.take() {
779 let block_length = (non_func_line_start_offset - file_offset) as u32;
780 self.symbols.push((
781 address,
782 BreakpadSymbolType::Func(BreakpadFuncSymbol {
783 file_offset,
784 block_length,
785 }),
786 ));
787 }
788 }
789
790 pub fn finish(mut self, file_end_offset: u64) -> Result<BreakpadIndex, BreakpadParseError> {
791 self.finish_pending_func_block(file_end_offset);
792 let BreakpadIndexParserInner {
793 mut symbols,
794 module_info_bytes,
795 files,
796 inline_origins,
797 module_info,
798 name,
799 code_id,
800 ..
801 } = self;
802 symbols.sort_by_key(|(address, _)| *address);
803 symbols.dedup_by_key(|(address, _)| *address);
804 let (symbol_addresses, symbol_offsets) = symbols.into_iter().unzip();
805
806 let files = ItemMap::from_sorted_vec(files.into_sorted_vec());
807 let inline_origins = ItemMap::from_sorted_vec(inline_origins.into_sorted_vec());
808
809 let (debug_id, os, arch, debug_name) =
810 module_info.ok_or(BreakpadParseError::NoModuleInfoInSymFile)?;
811 Ok(BreakpadIndex {
812 module_info_bytes,
813 debug_name,
814 debug_id,
815 code_id,
816 name,
817 arch,
818 os,
819 symbol_addresses,
820 symbol_offsets,
821 files,
822 inline_origins,
823 })
824 }
825}
826
827#[derive(Debug, Clone, Default)]
830pub struct LineBuffer {
831 leftover_bytes: Vec<u8>,
832 current_offset: u64,
836}
837
838impl LineBuffer {
839 pub fn consume(&mut self, mut chunk: &[u8], mut f: impl FnMut(u64, &[u8])) {
840 assert!(
841 self.leftover_bytes.len() as u64 <= self.current_offset,
842 "Caller supplied more self.leftover_bytes than we could have read ourselves"
843 );
844 loop {
845 match memchr(b'\n', chunk) {
846 None => {
847 self.leftover_bytes.extend_from_slice(chunk);
848 self.current_offset += chunk.len() as u64;
849 return;
850 }
851 Some(line_break_pos_in_chunk) => {
852 let chunk_until_line_break = &chunk[..line_break_pos_in_chunk];
853 chunk = &chunk[(line_break_pos_in_chunk + 1)..];
855 let (line, line_start_offset) = if self.leftover_bytes.is_empty() {
856 (chunk_until_line_break, self.current_offset)
857 } else {
858 let line_start_offset =
859 self.current_offset - (self.leftover_bytes.len() as u64);
860 self.leftover_bytes.extend(chunk_until_line_break);
861 (self.leftover_bytes.as_slice(), line_start_offset)
862 };
863 self.current_offset += line_break_pos_in_chunk as u64 + 1;
864 f(line_start_offset, line);
865 self.leftover_bytes.clear();
866 }
867 };
868 }
869 }
870
871 pub fn finish(self, mut f: impl FnMut(u64, &[u8])) -> u64 {
872 if !self.leftover_bytes.is_empty() {
873 let line_start_offset = self.current_offset - (self.leftover_bytes.len() as u64);
874 f(line_start_offset, &self.leftover_bytes);
875 }
876 self.current_offset
877 }
878}
879
880#[derive(thiserror::Error, Debug)]
881#[non_exhaustive]
882pub enum BreakpadParseError {
883 #[error("Error parsing PUBLIC line")]
884 ParsingPublic,
885
886 #[error("Error parsing FILE line")]
887 ParsingFile,
888
889 #[error("Error parsing INLINE_ORIGIN line")]
890 ParsingInlineOrigin,
891
892 #[error("Error parsing FUNC line")]
893 ParsingFunc,
894
895 #[error("Error parsing INLINE line")]
896 ParsingInline,
897
898 #[error("Error parsing func line data line")]
899 ParsingFuncLine,
900
901 #[error("Malformed UTF-8")]
902 BadUtf8,
903
904 #[error("The Breakpad sym file did not start with a valid MODULE line")]
905 NoModuleInfoInSymFile,
906}
907
908#[derive(Debug, Clone)]
909pub struct BreakpadPublicSymbolInfo<'a> {
910 pub name: &'a str,
911}
912
913#[derive(Debug, Clone)]
914pub struct BreakpadFuncSymbolInfo<'a> {
915 pub name: &'a str,
916 pub size: u32,
917 pub lines: Vec<SourceLine>,
918 pub inlinees: Vec<Inlinee>,
919}
920
921impl BreakpadFuncSymbolInfo<'_> {
922 pub fn get_innermost_sourceloc(&self, addr: u32) -> Option<&SourceLine> {
931 let line_index = match self.lines.binary_search_by_key(&addr, |line| line.address) {
932 Ok(i) => i,
933 Err(0) => return None,
934 Err(i) => i - 1,
935 };
936 Some(&self.lines[line_index])
937 }
938
939 pub fn get_inlinee_at_depth(&self, depth: u32, addr: u32) -> Option<&Inlinee> {
947 let index = match self
948 .inlinees
949 .binary_search_by_key(&(depth, addr), |inlinee| (inlinee.depth, inlinee.address))
950 {
951 Ok(i) => i,
952 Err(0) => return None,
953 Err(i) => i - 1,
954 };
955 let inlinee = &self.inlinees[index];
956 if inlinee.depth != depth {
957 return None;
958 }
959 let end_address = inlinee.address.checked_add(inlinee.size)?;
960 if addr < end_address {
961 Some(inlinee)
962 } else {
963 None
964 }
965 }
966}
967
968fn hex_str<T: std::ops::Shl<T, Output = T> + std::ops::BitOr<T, Output = T> + From<u8>>(
970 input: &[u8],
971) -> IResult<&[u8], T> {
972 let max_len = mem::size_of::<T>() * 2;
975
976 let mut res: T = T::from(0);
977 let mut k = 0;
978 for v in input.iter().take(max_len) {
979 let digit = match (*v as char).to_digit(16) {
980 Some(v) => v,
981 None => break,
982 };
983 res = res << T::from(4);
984 res = res | T::from(digit as u8);
985 k += 1;
986 }
987 if k == 0 {
988 return Err(Err::Error(Error::from_error_kind(
989 input,
990 ErrorKind::HexDigit,
991 )));
992 }
993 let remaining = &input[k..];
994 Ok((remaining, res))
995}
996
997fn decimal_u32(input: &[u8]) -> IResult<&[u8], u32> {
1005 const MAX_LEN: usize = 10; let mut res: u64 = 0;
1007 let mut k = 0;
1008 for v in input.iter().take(MAX_LEN) {
1009 let digit = *v as char;
1010 let digit_value = match digit.to_digit(10) {
1011 Some(v) => v,
1012 None => break,
1013 };
1014 res = res * 10 + digit_value as u64;
1015 k += 1;
1016 }
1017 if k == 0 {
1018 return Err(Err::Error(Error::from_error_kind(input, ErrorKind::Digit)));
1019 }
1020 let res = u32::try_from(res)
1021 .map_err(|_| Err::Error(Error::from_error_kind(input, ErrorKind::TooLarge)))?;
1022 let remaining = &input[k..];
1023 Ok((remaining, res))
1024}
1025
1026fn non_space(input: &[u8]) -> IResult<&[u8], &[u8]> {
1028 take_while(|c: u8| c != b' ')(input)
1029}
1030
1031fn module_line(input: &[u8]) -> IResult<&[u8], (&str, &str, DebugId, &str)> {
1033 let (input, _) = terminated(tag("MODULE"), space1)(input)?;
1034 let (input, (os, cpu, debug_id, name)) = cut(tuple((
1035 terminated(map_res(non_space, str::from_utf8), space1), terminated(map_res(non_space, str::from_utf8), space1), terminated(
1038 map_res(map_res(hex_digit1, str::from_utf8), DebugId::from_breakpad),
1039 space1,
1040 ), map_res(rest, str::from_utf8), )))(input)?;
1043 Ok((input, (os, cpu, debug_id, name)))
1044}
1045
1046fn info_code_id_line(input: &[u8]) -> IResult<&[u8], (&str, Option<&str>)> {
1048 let (input, _) = terminated(tag("INFO CODE_ID"), space1)(input)?;
1049 let (input, code_id_with_name) = map_res(rest, str::from_utf8)(input)?;
1050 match code_id_with_name.split_once(' ') {
1051 Some((code_id, name)) => Ok((input, (code_id, Some(name)))),
1052 None => Ok((input, (code_id_with_name, None))),
1053 }
1054}
1055
1056fn file_line(input: &[u8]) -> IResult<&[u8], (u32, &[u8])> {
1058 let (input, _) = terminated(tag("FILE"), space1)(input)?;
1059 let (input, (id, filename)) = cut(tuple((terminated(decimal_u32, space1), rest)))(input)?;
1060 Ok((input, (id, filename)))
1061}
1062
1063fn inline_origin_line(input: &[u8]) -> IResult<&[u8], (u32, &[u8])> {
1065 let (input, _) = terminated(tag("INLINE_ORIGIN"), space1)(input)?;
1066 let (input, (id, function)) = cut(tuple((terminated(decimal_u32, space1), rest)))(input)?;
1067 Ok((input, (id, function)))
1068}
1069
1070fn public_line(input: &[u8]) -> IResult<&[u8], (u32, &[u8])> {
1072 let (input, _) = terminated(tag("PUBLIC"), space1)(input)?;
1073 let (input, (_multiple, address, _parameter_size, name)) = cut(tuple((
1074 opt(terminated(tag("m"), space1)),
1075 terminated(hex_str::<u64>, space1),
1076 terminated(hex_str::<u32>, space1),
1077 rest,
1078 )))(input)?;
1079 Ok((input, (address as u32, name)))
1080}
1081
1082#[derive(Clone, Debug, PartialEq, Eq)]
1084pub struct SourceLine {
1085 pub address: u32,
1087 pub size: u32,
1089 pub file: u32,
1093 pub line: u32,
1095}
1096
1097#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
1099pub struct Inlinee {
1100 pub depth: u32,
1102 pub address: u32,
1104 pub size: u32,
1106 pub call_file: u32,
1110 pub call_line: u32,
1112 pub origin_id: u32,
1114}
1115
1116fn func_line_data(input: &[u8]) -> IResult<&[u8], SourceLine> {
1120 let (input, (address, size, line, file)) = tuple((
1121 terminated(hex_str::<u64>, space1),
1122 terminated(hex_str::<u32>, space1),
1123 terminated(decimal_u32, space1),
1124 decimal_u32,
1125 ))(input)?;
1126 Ok((
1127 input,
1128 SourceLine {
1129 address: address as u32,
1130 size,
1131 file,
1132 line,
1133 },
1134 ))
1135}
1136
1137fn func_line(input: &[u8]) -> IResult<&[u8], (u32, u32, &[u8])> {
1139 let (input, _) = terminated(tag("FUNC"), space1)(input)?;
1140 let (input, (_multiple, address, size, _parameter_size, name)) = cut(tuple((
1141 opt(terminated(tag("m"), space1)),
1142 terminated(hex_str::<u32>, space1),
1143 terminated(hex_str::<u32>, space1),
1144 terminated(hex_str::<u32>, space1),
1145 rest,
1146 )))(input)?;
1147 Ok((input, (address, size, name)))
1148}
1149
1150fn inline_address_range(input: &[u8]) -> IResult<&[u8], (u32, u32)> {
1152 tuple((terminated(hex_str::<u32>, space1), hex_str::<u32>))(input)
1153}
1154
1155fn inline_line(input: &[u8]) -> IResult<&[u8], impl Iterator<Item = Inlinee>> {
1159 let (input, _) = terminated(tag("INLINE"), space1)(input)?;
1160 let (input, (depth, call_line, call_file, origin_id)) = cut(tuple((
1161 terminated(decimal_u32, space1),
1162 terminated(decimal_u32, space1),
1163 terminated(decimal_u32, space1),
1164 terminated(decimal_u32, space1),
1165 )))(input)?;
1166 let (input, address_ranges) = cut(separated_list1(space1, inline_address_range))(input)?;
1167 Ok((
1168 input,
1169 address_ranges
1170 .into_iter()
1171 .map(move |(address, size)| Inlinee {
1172 address,
1173 size,
1174 call_file,
1175 call_line,
1176 depth,
1177 origin_id,
1178 }),
1179 ))
1180}
1181
1182#[cfg(test)]
1183mod test {
1184 use std::str::FromStr;
1185
1186 use super::*;
1187 use crate::{ElfBuildId, PeCodeId};
1188
1189 #[test]
1190 fn test1() {
1191 let mut parser = BreakpadIndexParser::new();
1192 parser.consume(b"MODULE Linux x86_64 39CA3106713C8D0FFEE4605AFA2526670 libmozsandbox.so\nINFO CODE_ID ");
1193 parser.consume(b"0631CA393C710F8DFEE4605AFA2526671AD4EF17\nFILE 0 hg:hg.mozilla.org/mozilla-central:se");
1194 parser.consume(b"curity/sandbox/chromium/base/strings/safe_sprintf.cc:f150bc1f71d09e1e1941065951f0f5a3");
1195 parser.consume(b"8628f080");
1196 let index = parser.finish().unwrap();
1197 assert_eq!(
1198 index.files.get(0).unwrap(),
1199 &BreakpadFileLine {
1200 index: 0,
1201 file_offset: 125,
1202 line_length: 136,
1203 }
1204 );
1205 assert_eq!(
1206 index.debug_id,
1207 DebugId::from_breakpad("39CA3106713C8D0FFEE4605AFA2526670").unwrap()
1208 );
1209 assert_eq!(
1210 index.code_id,
1211 Some(CodeId::ElfBuildId(
1212 ElfBuildId::from_str("0631ca393c710f8dfee4605afa2526671ad4ef17").unwrap()
1213 ))
1214 );
1215
1216 let v = index.serialize_to_bytes();
1217 let index2 = BreakpadIndex::parse_symindex_file(&v).unwrap();
1218
1219 assert_eq!(index2, index);
1220 }
1221
1222 #[test]
1223 fn test2() {
1224 let mut parser = BreakpadIndexParser::new();
1225 parser.consume(b"MODULE windows x86_64 F1E853FD662672044C4C44205044422E1 firefox.pdb\nIN");
1226 parser.consume(b"FO CODE_ID 63C036DBA7000 firefox.exe\nINFO GENERATOR mozilla/dump_syms ");
1227 parser.consume(b"2.1.1\nFILE 0 /builds/worker/workspace/obj-build/browser/app/d:/agent/_");
1228 parser.consume(b"work/2/s/src/vctools/delayimp/dloadsup.h\nFILE 1 /builds/worker/workspa");
1229 parser.consume(b"ce/obj-build/browser/app/d:/agent/_work/2/s/src/externalapis/windows/10");
1230 parser.consume(b"/sdk/inc/winnt.h\nINLINE_ORIGIN 0 DloadLock()\nINLINE_ORIGIN 1 DloadUnl");
1231 parser.consume(b"ock()\nINLINE_ORIGIN 2 WritePointerRelease(void**, void*)\nINLINE_ORIGI");
1232 parser.consume(b"N 3 WriteRelease64(long long*, long long)\nFUNC 2b754 aa 0 DloadAcquire");
1233 parser.consume(b"SectionWriteAccess()\nINLINE 0 658 0 0 2b76a 3d\nINLINE 0 665 0 1 2b7ca");
1234 parser.consume(b" 17 2b7e6 12\nINLINE 1 345 0 2 2b7ed b\nINLINE 2 8358 1 3 2b7ed b\n2b75");
1235 parser.consume(b"4 6 644 0\n2b75a 10 650 0\n2b76a e 299 0\n2b778 14 300 0\n2b78c 2 301 0");
1236 parser.consume(b"\n2b78e 2 306 0\n2b790 c 305 0\n2b79c b 309 0\n2b7a7 10 660 0\n2b7b7 2 ");
1237 parser.consume(b"661 0\n2b7b9 11 662 0\n2b7ca 9 340 0\n2b7d3 e 341 0\n2b7e1 c 668 0\n2b7");
1238 parser.consume(b"ed b 7729 1\n2b7f8 6 668 0");
1239 let index = parser.finish().unwrap();
1240 assert_eq!(&index.debug_name, "firefox.pdb");
1241 assert_eq!(
1242 index.debug_id,
1243 DebugId::from_breakpad("F1E853FD662672044C4C44205044422E1").unwrap()
1244 );
1245 assert_eq!(index.name.as_deref(), Some("firefox.exe"));
1246 assert_eq!(
1247 index.code_id,
1248 Some(CodeId::PeCodeId(
1249 PeCodeId::from_str("63C036DBA7000").unwrap()
1250 ))
1251 );
1252 assert!(std::str::from_utf8(&index.module_info_bytes)
1253 .unwrap()
1254 .contains("INFO GENERATOR mozilla/dump_syms 2.1.1"));
1255
1256 let v = index.serialize_to_bytes();
1257 let index2 = BreakpadIndex::parse_symindex_file(&v).unwrap();
1258
1259 assert_eq!(index2, index);
1260 }
1261
1262 #[test]
1263 fn func_parsing() {
1264 let block =
1265 b"JUNK\nFUNC 1130 28 0 main\n1130 f 24 0\n113f 7 25 0\n1146 9 26 0\n114f 9 27 0\nJUNK";
1266 let func = BreakpadFuncSymbol {
1267 file_offset: "JUNK\n".len() as u64,
1268 block_length: (block.len() - "JUNK\n".len() - "\nJUNK".len()) as u32,
1269 };
1270 let input = &block[func.file_offset as usize..][..func.block_length as usize];
1271 let func = func.parse(input).unwrap();
1272 assert_eq!(func.name, "main");
1273 assert_eq!(func.size, 0x28);
1274 assert_eq!(func.lines.len(), 4);
1275 assert_eq!(
1276 func.lines[0],
1277 SourceLine {
1278 address: 0x1130,
1279 size: 0xf,
1280 file: 0,
1281 line: 24,
1282 }
1283 );
1284 assert_eq!(
1285 func.lines[3],
1286 SourceLine {
1287 address: 0x114f,
1288 size: 0x9,
1289 file: 0,
1290 line: 27,
1291 }
1292 );
1293 }
1294}