samply_symbols/breakpad/
index.rs

1use std::fmt::Debug;
2use std::str::FromStr;
3use std::{mem, str};
4
5use debugid::DebugId;
6use memchr::memchr;
7use nom::bytes::complete::{tag, take_while};
8use nom::character::complete::{hex_digit1, space1};
9use nom::combinator::{cut, map_res, opt, rest};
10use nom::error::{Error, ErrorKind, ParseError};
11use nom::multi::separated_list1;
12use nom::sequence::{terminated, tuple};
13use nom::{Err, IResult};
14use zerocopy::{IntoBytes, LittleEndian, Ref, U32, U64};
15use zerocopy_derive::*;
16
17use crate::CodeId;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct BreakpadIndex {
21    pub module_info_bytes: Vec<u8>,
22    pub debug_name: String,
23    pub debug_id: DebugId,
24    pub arch: String,
25    pub os: String,
26    pub name: Option<String>,
27    pub code_id: Option<CodeId>,
28    pub symbol_addresses: Vec<u32>,
29    pub symbol_offsets: Vec<BreakpadSymbolType>,
30    pub files: ItemMap<BreakpadFileLine>,
31    pub inline_origins: ItemMap<BreakpadInlineOriginLine>,
32}
33
34const HEADER_SIZE: u32 = std::mem::size_of::<BreakpadSymindexFileHeader>() as u32;
35const FILE_OR_INLINE_ORIGIN_ENTRY_SIZE: u32 = std::mem::size_of::<FileOrInlineOriginEntry>() as u32;
36const SYMBOL_ADDRESS_SIZE: u32 = std::mem::size_of::<u32>() as u32;
37const SYMBOL_ENTRY_SIZE: u32 = std::mem::size_of::<SymbolEntry>() as u32;
38
39impl BreakpadIndex {
40    pub fn parse_symindex_file(data: &[u8]) -> Result<BreakpadIndex, BreakpadSymindexParseError> {
41        let header_bytes = data
42            .get(..HEADER_SIZE as usize)
43            .ok_or(BreakpadSymindexParseError::FileTooSmallForHeader)?;
44        let header = Ref::<&[u8], BreakpadSymindexFileHeader>::from_bytes(header_bytes).unwrap();
45        if &header.magic != b"SYMINDEX" {
46            return Err(BreakpadSymindexParseError::WrongMagicBytes);
47        }
48        let module_info_end_offset = header
49            .module_info_offset
50            .get()
51            .checked_add(header.module_info_len.get())
52            .ok_or(BreakpadSymindexParseError::ModuleInfoOffsetLenOverflow)?;
53        let module_info_bytes = data
54            .get(header.module_info_offset.get() as usize..module_info_end_offset as usize)
55            .ok_or(BreakpadSymindexParseError::CouldntReadModuleInfoBytes)?;
56
57        let (debug_id, os, arch, debug_name, name, code_id) = {
58            let mut module_info = None;
59            let mut code_id = None;
60            let mut name = None;
61            let mut module_info_line_buffer = LineBuffer::default();
62            module_info_line_buffer.consume(module_info_bytes, |_offset, line_slice| {
63                // Every file must start with a "MODULE " line.
64                if let Ok((_r, (os, arch, debug_id, debug_name))) = module_line(line_slice) {
65                    module_info = Some((
66                        debug_id,
67                        os.to_string(),
68                        arch.to_string(),
69                        debug_name.to_string(),
70                    ));
71                } else if let Ok((_r, (code_id_str, name_str))) = info_code_id_line(line_slice) {
72                    code_id = CodeId::from_str(code_id_str).ok();
73                    name = name_str.map(ToOwned::to_owned);
74                }
75            });
76            module_info_line_buffer.finish(|_offset, line_slice| {
77                // Every file must start with a "MODULE " line.
78                if let Ok((_r, (os, arch, debug_id, debug_name))) = module_line(line_slice) {
79                    module_info = Some((
80                        debug_id,
81                        os.to_string(),
82                        arch.to_string(),
83                        debug_name.to_string(),
84                    ));
85                } else if let Ok((_r, (code_id_str, name_str))) = info_code_id_line(line_slice) {
86                    code_id = CodeId::from_str(code_id_str).ok();
87                    name = name_str.map(ToOwned::to_owned);
88                }
89            });
90            match module_info {
91                Some((debug_id, os, arch, debug_name)) => {
92                    (debug_id, os, arch, debug_name, name, code_id)
93                }
94                None => return Err(BreakpadSymindexParseError::CouldntParseModuleInfoLine),
95            }
96        };
97        let file_list_bytes_len = header
98            .file_count
99            .get()
100            .checked_mul(FILE_OR_INLINE_ORIGIN_ENTRY_SIZE)
101            .ok_or(BreakpadSymindexParseError::FileListByteLenOverflow)?;
102        let file_list_end_offset = header
103            .file_entries_offset
104            .get()
105            .checked_add(file_list_bytes_len)
106            .ok_or(BreakpadSymindexParseError::FileListOffsetLenOverflow)?;
107        let file_list_bytes = data
108            .get(header.file_entries_offset.get() as usize..file_list_end_offset as usize)
109            .ok_or(BreakpadSymindexParseError::CouldntReadFileListBytes)?;
110        let file_list =
111            Ref::<&[u8], [FileOrInlineOriginEntry]>::from_bytes(file_list_bytes).unwrap();
112        let inline_origin_list_bytes_len = header
113            .inline_origin_count
114            .get()
115            .checked_mul(FILE_OR_INLINE_ORIGIN_ENTRY_SIZE)
116            .ok_or(BreakpadSymindexParseError::InlineOriginListByteLenOverflow)?;
117        let inline_origin_list_end_offset = header
118            .inline_origin_entries_offset
119            .get()
120            .checked_add(inline_origin_list_bytes_len)
121            .ok_or(BreakpadSymindexParseError::InlineOriginListOffsetLenOverflow)?;
122        let inline_origin_list_bytes = data
123            .get(
124                header.inline_origin_entries_offset.get() as usize
125                    ..inline_origin_list_end_offset as usize,
126            )
127            .ok_or(BreakpadSymindexParseError::CouldntReadInlineOriginListBytes)?;
128        let inline_origin_list =
129            Ref::<&[u8], [FileOrInlineOriginEntry]>::from_bytes(inline_origin_list_bytes).unwrap();
130        let symbol_address_list_bytes_len = header
131            .symbol_count
132            .get()
133            .checked_mul(SYMBOL_ADDRESS_SIZE)
134            .ok_or(BreakpadSymindexParseError::SymbolAddressListByteLenOverflow)?;
135        let symbol_address_list_end_offset = header
136            .symbol_addresses_offset
137            .get()
138            .checked_add(symbol_address_list_bytes_len)
139            .ok_or(BreakpadSymindexParseError::SymbolAddressListOffsetLenOverflow)?;
140        let symbol_address_list_bytes = data
141            .get(
142                header.symbol_addresses_offset.get() as usize
143                    ..symbol_address_list_end_offset as usize,
144            )
145            .ok_or(BreakpadSymindexParseError::CouldntReadSymbolAddressListBytes)?;
146        let symbol_address_list =
147            Ref::<&[u8], [U32<LittleEndian>]>::from_bytes(symbol_address_list_bytes).unwrap();
148        let symbol_entry_list_bytes_len = header
149            .symbol_count
150            .get()
151            .checked_mul(SYMBOL_ENTRY_SIZE)
152            .ok_or(BreakpadSymindexParseError::SymbolEntryListByteLenOverflow)?;
153        let symbol_entry_list_end_offset = header
154            .symbol_entries_offset
155            .get()
156            .checked_add(symbol_entry_list_bytes_len)
157            .ok_or(BreakpadSymindexParseError::SymbolEntryListOffsetLenOverflow)?;
158        let symbol_entry_list_bytes = data
159            .get(header.symbol_entries_offset.get() as usize..symbol_entry_list_end_offset as usize)
160            .ok_or(BreakpadSymindexParseError::CouldntReadSymbolEntryListBytes)?;
161        let symbol_entry_list =
162            Ref::<&[u8], [SymbolEntry]>::from_bytes(symbol_entry_list_bytes).unwrap();
163
164        let files: Vec<BreakpadFileLine> = file_list
165            .iter()
166            .map(|entry| BreakpadFileLine {
167                index: entry.index.get(),
168                file_offset: entry.offset.get(),
169                line_length: entry.line_len.get(),
170            })
171            .collect();
172        let inline_origins: Vec<BreakpadInlineOriginLine> = inline_origin_list
173            .iter()
174            .map(|entry| BreakpadInlineOriginLine {
175                index: entry.index.get(),
176                file_offset: entry.offset.get(),
177                line_length: entry.line_len.get(),
178            })
179            .collect();
180        let symbol_addresses: Vec<u32> = symbol_address_list.iter().map(|a| a.get()).collect();
181        let symbol_offsets: Vec<BreakpadSymbolType> = symbol_entry_list
182            .iter()
183            .map(|entry| {
184                if entry.kind.get() == SYMBOL_ENTRY_KIND_PUBLIC {
185                    // PUBLIC
186                    BreakpadSymbolType::Public(BreakpadPublicSymbol {
187                        file_offset: entry.offset.get(),
188                        line_length: entry.line_or_block_len.get(),
189                    })
190                } else {
191                    // FUNC
192                    BreakpadSymbolType::Func(BreakpadFuncSymbol {
193                        file_offset: entry.offset.get(),
194                        block_length: entry.line_or_block_len.get(),
195                    })
196                }
197            })
198            .collect();
199        Ok(BreakpadIndex {
200            module_info_bytes: module_info_bytes.to_owned(),
201            debug_name,
202            debug_id,
203            arch,
204            os,
205            name,
206            code_id,
207            symbol_addresses,
208            symbol_offsets,
209            files: ItemMap::from_sorted_vec(files),
210            inline_origins: ItemMap::from_sorted_vec(inline_origins),
211        })
212    }
213
214    pub fn serialize_to_bytes(&self) -> Vec<u8> {
215        let header_len = HEADER_SIZE;
216        let module_info_offset = header_len;
217        let module_info_len = self.module_info_bytes.len() as u32;
218        let padding_after_module_info = align_to_4_bytes(module_info_len) - module_info_len;
219        let file_entries_offset = module_info_offset + module_info_len + padding_after_module_info;
220        let file_count = self.files.len() as u32;
221        let file_entries_len = file_count * FILE_OR_INLINE_ORIGIN_ENTRY_SIZE;
222        let inline_origin_entries_offset = file_entries_offset + file_entries_len;
223        let inline_origin_count = self.inline_origins.len() as u32;
224        let inline_origin_entries_len = inline_origin_count * FILE_OR_INLINE_ORIGIN_ENTRY_SIZE;
225        let symbol_addresses_offset = inline_origin_entries_offset + inline_origin_entries_len;
226        let symbol_count = self.symbol_addresses.len() as u32;
227        let symbol_addresses_len = symbol_count * SYMBOL_ADDRESS_SIZE;
228        let symbol_entries_offset = symbol_addresses_offset + symbol_addresses_len;
229        let symbol_entries_len = symbol_count * SYMBOL_ENTRY_SIZE;
230        let total_file_len = symbol_entries_offset + symbol_entries_len;
231        let header = BreakpadSymindexFileHeader {
232            magic: *b"SYMINDEX",
233            version: 1.into(),
234            module_info_offset: module_info_offset.into(),
235            module_info_len: module_info_len.into(),
236            file_count: file_count.into(),
237            file_entries_offset: file_entries_offset.into(),
238            inline_origin_count: inline_origin_count.into(),
239            inline_origin_entries_offset: inline_origin_entries_offset.into(),
240            symbol_count: symbol_count.into(),
241            symbol_addresses_offset: symbol_addresses_offset.into(),
242            symbol_entries_offset: symbol_entries_offset.into(),
243        };
244
245        let mut vec = Vec::with_capacity(total_file_len as usize);
246        vec.extend_from_slice(header.as_bytes());
247        vec.extend_from_slice(&self.module_info_bytes);
248        vec.extend(std::iter::repeat(0).take(padding_after_module_info as usize));
249        for file in self.files.as_slice() {
250            vec.extend_from_slice(
251                FileOrInlineOriginEntry {
252                    index: file.index.into(),
253                    line_len: file.line_length.into(),
254                    offset: file.file_offset.into(),
255                }
256                .as_bytes(),
257            );
258        }
259        for inline_origin in self.inline_origins.as_slice() {
260            vec.extend_from_slice(
261                FileOrInlineOriginEntry {
262                    index: inline_origin.index.into(),
263                    line_len: inline_origin.line_length.into(),
264                    offset: inline_origin.file_offset.into(),
265                }
266                .as_bytes(),
267            );
268        }
269        for symbol_address in &self.symbol_addresses {
270            vec.extend_from_slice(U32::<LittleEndian>::from(*symbol_address).as_bytes());
271        }
272        for symbol in &self.symbol_offsets {
273            match symbol {
274                BreakpadSymbolType::Public(public) => {
275                    vec.extend_from_slice(
276                        SymbolEntry {
277                            kind: SYMBOL_ENTRY_KIND_PUBLIC.into(),
278                            line_or_block_len: public.line_length.into(),
279                            offset: public.file_offset.into(),
280                        }
281                        .as_bytes(),
282                    );
283                }
284                BreakpadSymbolType::Func(func) => {
285                    vec.extend_from_slice(
286                        SymbolEntry {
287                            kind: SYMBOL_ENTRY_KIND_FUNC.into(),
288                            line_or_block_len: func.block_length.into(),
289                            offset: func.file_offset.into(),
290                        }
291                        .as_bytes(),
292                    );
293                }
294            }
295        }
296
297        assert_eq!(vec.len(), total_file_len as usize);
298
299        vec
300    }
301}
302
303#[inline]
304fn round_up_to_multiple(value: u32, factor: u32) -> u32 {
305    (value + factor - 1) / factor * factor
306}
307
308fn align_to_4_bytes(value: u32) -> u32 {
309    round_up_to_multiple(value, 4)
310}
311
312#[derive(thiserror::Error, Debug)]
313#[non_exhaustive]
314pub enum BreakpadSymindexParseError {
315    #[error("Not enough bytes in the file for the file header")]
316    FileTooSmallForHeader,
317
318    #[error("Wrong magic bytes in the symindex header")]
319    WrongMagicBytes,
320
321    #[error("Module offset + len overflowed u32")]
322    ModuleInfoOffsetLenOverflow,
323
324    #[error("Module info bytes couldn't be read from the file")]
325    CouldntReadModuleInfoBytes,
326
327    #[error("Module info bytes couldn't be parsed as utf-8")]
328    ModuleInfoNotUtf8,
329
330    #[error("MODULE INFO couldn't be parsed in module info section")]
331    CouldntParseModuleInfoLine,
332
333    #[error("File count * file entry size overflowed")]
334    FileListByteLenOverflow,
335
336    #[error("File list offset + len overflowed u32")]
337    FileListOffsetLenOverflow,
338
339    #[error("File list bytes couldn't be read from the file")]
340    CouldntReadFileListBytes,
341
342    #[error("Inline origin count * inline origin entry size overflowed")]
343    InlineOriginListByteLenOverflow,
344
345    #[error("Inline origin offset + len overflowed u32")]
346    InlineOriginListOffsetLenOverflow,
347
348    #[error("InlineOrigin list bytes couldn't be read from the file")]
349    CouldntReadInlineOriginListBytes,
350
351    #[error("Symbol count * 4 bytes per address overflowed")]
352    SymbolAddressListByteLenOverflow,
353
354    #[error("Symbol address list offset + len overflowed u32")]
355    SymbolAddressListOffsetLenOverflow,
356
357    #[error("Symbol address list bytes couldn't be read from the file")]
358    CouldntReadSymbolAddressListBytes,
359
360    #[error("Symbol count * symbol entry size overflowed")]
361    SymbolEntryListByteLenOverflow,
362
363    #[error("Symbol entry list offset + len overflowed u32")]
364    SymbolEntryListOffsetLenOverflow,
365
366    #[error("Symbol entry list bytes couldn't be read from the file")]
367    CouldntReadSymbolEntryListBytes,
368}
369
370#[derive(FromBytes, KnownLayout, Immutable, IntoBytes, Unaligned)]
371#[repr(C)]
372struct BreakpadSymindexFileHeader {
373    /// Always b"SYMINDEX", at 0
374    magic: [u8; 8],
375    /// Always 1, at 8
376    version: U32<LittleEndian>,
377    /// Points right after header, to where the module info starts, 4-byte aligned, at 12
378    module_info_offset: U32<LittleEndian>,
379    /// The length, in bytes, of the module info, at 16
380    module_info_len: U32<LittleEndian>,
381    /// The number of entries in the file list, at 20
382    file_count: U32<LittleEndian>,
383    /// Points to the start of the file list, 4-byte aligned, at 24
384    file_entries_offset: U32<LittleEndian>,
385    /// The number of entries in the inline origin list, at 28
386    inline_origin_count: U32<LittleEndian>,
387    /// Poinst to the start of the inline origin list, 4-byte aligned, at 32
388    inline_origin_entries_offset: U32<LittleEndian>,
389    /// The number of symbols, at 36
390    symbol_count: U32<LittleEndian>,
391    /// Points to the start of the symbol address list, 4-byte aligned, at 40
392    symbol_addresses_offset: U32<LittleEndian>,
393    /// Points to the start of the symbol entry list, 4-byte aligned, at 44
394    symbol_entries_offset: U32<LittleEndian>,
395}
396
397#[derive(FromBytes, KnownLayout, Immutable, IntoBytes, Unaligned)]
398#[repr(C)]
399struct FileOrInlineOriginEntry {
400    pub index: U32<LittleEndian>,
401    pub line_len: U32<LittleEndian>,
402    pub offset: U64<LittleEndian>,
403}
404
405const SYMBOL_ENTRY_KIND_PUBLIC: u32 = 0;
406const SYMBOL_ENTRY_KIND_FUNC: u32 = 1;
407
408#[derive(FromBytes, KnownLayout, Immutable, IntoBytes, Unaligned)]
409#[repr(C)]
410struct SymbolEntry {
411    /// Uses `SYMBOL_ENTRY_KIND_*` constants (0 for PUBLIC, 1 for FUNC)
412    pub kind: U32<LittleEndian>,
413    /// For PUBLIC: The length of the line, starting at PUBLIC and ending before the line break. For FUNC: The length of the func block, starting at the FUNC and ending at the next top-level sym entry (symbol, stack record) or file end
414    pub line_or_block_len: U32<LittleEndian>,
415    /// File offset of the PUBLIC / FUNC string.
416    pub offset: U64<LittleEndian>,
417}
418
419/// File extension: .symindex
420///
421/// Format: (all numbers encoded as little-endian)
422///
423/// magic: [u8; 8], // always b"SYMINDEX", at 0
424/// version: u32, // always 1, at 8
425/// module_info_offset: u32, // points right after header, to where the module info starts, 4-byte aligned, at 12
426/// module_info_len: u32, // the length, in bytes, of the module info, at 16
427/// file_count: u32, // the number of entries in the file list, at 20
428/// file_entries_offset: u32, // points to the start of the file list, 4-byte aligned, at 24
429/// inline_origin_count: u32, // the number of entries in the inline origin list, at 28
430/// inline_origin_entries_offset: u32, // poinst to the start of the inline origin list, 4-byte aligned, at 32
431/// symbol_count: u32, // the number of symbols, at 36
432/// symbol_addresses_offset: u32, // points to the start of the symbol address list, 4-byte aligned, at 40
433/// symbol_entries_offset: u32, // points to the start of the symbol entry list, 4-byte aligned, at 44
434///
435/// /// Module info: utf-8 encoded string, contains line breaks, and the lines start with MODULE and INFO
436/// module_info: [u8; module_info_len], // located at module_info_offset
437///
438/// /// File list:
439/// file_list: [FileOrInlineOriginEntry; file_count], // located at file_entries_offset
440///
441/// /// Inline list:
442/// inline_origin_list: [FileOrInlineOriginEntry; inline_origin_count], // located at file_entries_offset
443///
444/// /// Symbol addresses:
445/// symbol_addresses: [u32; symbol_count], // located at symbol_addresses_offset
446///
447/// /// Symbol entries:
448/// symbol_entries: [SymbolEntry; symbol_count], // located at symbol_entries_offset
449///
450/// #[repr(C)]
451/// struct FileOrInlineOriginEntry {
452///   pub index: u32,
453///   pub line_len: u32,
454///   pub offset: u64,
455/// }
456///
457/// #[repr(C)]
458/// struct SymbolEntry {
459///   pub kind: u32, // 0 or 1, 0 meaning Public and 1 meaning Func
460///   pub line_or_block_len: u32, // For PUBLIC: The length of the line, starting at PUBLIC and ending before the line break. For FUNC: The length of the func block, starting at the FUNC and ending at the next top-level sym entry (symbol, stack record) or file end
461///   pub offset: u64, // File offset of the PUBLIC / FUNC string.
462/// }
463
464#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
465pub enum BreakpadSymbolType {
466    Public(BreakpadPublicSymbol),
467    Func(BreakpadFuncSymbol),
468}
469
470#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
471pub struct BreakpadPublicSymbol {
472    /// The file offset at which there is the string `PUBLIC ` at the start of the line
473    pub file_offset: u64,
474    /// The length of the line, excluding line break (`\r*\n`). PUBLIC symbols only occupy a single line.
475    pub line_length: u32,
476}
477
478impl BreakpadPublicSymbol {
479    pub fn parse<'a>(
480        &self,
481        input: &'a [u8],
482    ) -> Result<BreakpadPublicSymbolInfo<'a>, BreakpadParseError> {
483        let (_rest, (_address, name)) =
484            public_line(input).map_err(|_| BreakpadParseError::ParsingPublic)?;
485        Ok(BreakpadPublicSymbolInfo {
486            name: str::from_utf8(name).map_err(|_| BreakpadParseError::BadUtf8)?,
487        })
488    }
489}
490
491/// Returns the first line, excluding trailing `\r*\n`.
492///
493/// Advances the input to just after `\n`.
494fn read_line_and_advance<'a>(input: &mut &'a [u8]) -> &'a [u8] {
495    let mut line = if let Some(line_break) = memchr(b'\n', input) {
496        let line = &input[..line_break];
497        *input = &input[(line_break + 1)..];
498        line
499    } else {
500        let line = *input;
501        *input = &[];
502        line
503    };
504    while line.last() == Some(&b'\r') {
505        line = &line[..(line.len() - 1)];
506    }
507    line
508}
509
510#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
511pub struct BreakpadFuncSymbol {
512    /// The file offset at which there is the string `FUNC ` at the start of the line
513    pub file_offset: u64,
514    /// The number of bytes in the file taken up by this `FUNC` block, including its line record lines.
515    pub block_length: u32,
516}
517
518impl BreakpadFuncSymbol {
519    pub fn parse<'a>(
520        &self,
521        mut input: &'a [u8],
522    ) -> Result<BreakpadFuncSymbolInfo<'a>, BreakpadParseError> {
523        let first_line = read_line_and_advance(&mut input);
524        let (_rest, (_address, size, name)) =
525            func_line(first_line).map_err(|_| BreakpadParseError::ParsingFunc)?;
526        let mut inlinees = Vec::new();
527        let mut lines = Vec::new();
528        while !input.is_empty() {
529            let line = read_line_and_advance(&mut input);
530            if line.starts_with(b"INLINE ") {
531                let (_rest, new_inlinees) =
532                    inline_line(line).map_err(|_| BreakpadParseError::ParsingInline)?;
533                inlinees.extend(new_inlinees);
534            } else if let Ok((_rest, line_data)) = func_line_data(line) {
535                lines.push(line_data);
536            }
537        }
538        inlinees.sort_by_key(|inlinee| (inlinee.depth, inlinee.address));
539        Ok(BreakpadFuncSymbolInfo {
540            name: str::from_utf8(name).map_err(|_| BreakpadParseError::BadUtf8)?,
541            size,
542            lines,
543            inlinees,
544        })
545    }
546}
547
548pub trait FileOrInlineOrigin {
549    fn index(&self) -> u32;
550    fn offset_and_length(&self) -> (u64, u32);
551    fn parse(line: &[u8]) -> Result<&str, BreakpadParseError>;
552}
553
554#[derive(Debug, Clone, PartialEq, Eq)]
555pub struct ItemMap<I: FileOrInlineOrigin> {
556    inner: Vec<I>,
557}
558
559impl<I: FileOrInlineOrigin> ItemMap<I> {
560    pub fn from_sorted_vec(vec: Vec<I>) -> Self {
561        Self { inner: vec }
562    }
563    pub fn len(&self) -> usize {
564        self.inner.len()
565    }
566    #[allow(unused)]
567    pub fn is_empty(&self) -> bool {
568        self.inner.is_empty()
569    }
570    pub fn as_slice(&self) -> &[I] {
571        &self.inner
572    }
573    pub fn get(&self, index: u32) -> Option<&I> {
574        Some(&self.inner[self.get_vec_index(index)?])
575    }
576    fn get_vec_index(&self, index: u32) -> Option<usize> {
577        self.inner.binary_search_by_key(&index, I::index).ok()
578    }
579}
580
581#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
582pub struct BreakpadFileLine {
583    /// The file index of this file.
584    pub index: u32,
585    /// The file offset at which there is the string `FILE ` at the start of the line
586    pub file_offset: u64,
587    /// The length of the line, excluding line break (`\r*\n`). `FILE` symbols only occupy a single line.
588    pub line_length: u32,
589}
590
591impl FileOrInlineOrigin for BreakpadFileLine {
592    fn index(&self) -> u32 {
593        self.index
594    }
595    fn offset_and_length(&self) -> (u64, u32) {
596        (self.file_offset, self.line_length)
597    }
598    fn parse(input: &[u8]) -> Result<&str, BreakpadParseError> {
599        let (_rest, (_index, name)) =
600            file_line(input).map_err(|_| BreakpadParseError::ParsingFile)?;
601        str::from_utf8(name).map_err(|_| BreakpadParseError::BadUtf8)
602    }
603}
604
605#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
606pub struct BreakpadInlineOriginLine {
607    /// The inline origin index of this inline origin.
608    pub index: u32,
609    /// The file offset at which there is the string `INLINE_ORIGIN ` at the start of the line
610    pub file_offset: u64,
611    /// The length of the line, excluding line break (`\r*\n`). `INLINE_ORIGIN` symbols only occupy a single line.
612    pub line_length: u32,
613}
614
615impl FileOrInlineOrigin for BreakpadInlineOriginLine {
616    fn index(&self) -> u32 {
617        self.index
618    }
619    fn offset_and_length(&self) -> (u64, u32) {
620        (self.file_offset, self.line_length)
621    }
622    fn parse(input: &[u8]) -> Result<&str, BreakpadParseError> {
623        let (_rest, (_index, name)) =
624            inline_origin_line(input).map_err(|_| BreakpadParseError::ParsingFile)?;
625        str::from_utf8(name).map_err(|_| BreakpadParseError::BadUtf8)
626    }
627}
628
629#[derive(Debug, Clone, Default)]
630pub struct BreakpadIndexParser {
631    line_buffer: LineBuffer,
632    inner: BreakpadIndexParserInner,
633}
634
635impl BreakpadIndexParser {
636    pub fn new() -> Self {
637        Self::default()
638    }
639
640    pub fn consume(&mut self, chunk: &[u8]) {
641        let inner = &mut self.inner;
642        let line_buffer = &mut self.line_buffer;
643        line_buffer.consume(chunk, |offset, line| inner.process_line(offset, line));
644    }
645
646    pub fn finish(mut self) -> Result<BreakpadIndex, BreakpadParseError> {
647        let inner = &mut self.inner;
648        let final_offset = self
649            .line_buffer
650            .finish(|offset, line| inner.process_line(offset, line));
651        self.inner.finish(final_offset)
652    }
653}
654
655#[derive(Debug, Clone)]
656struct SortedVecBuilder<I: FileOrInlineOrigin> {
657    inner: Vec<I>,
658    last_sorted_index: Option<u32>,
659    is_sorted: bool,
660}
661
662impl<I: FileOrInlineOrigin> Default for SortedVecBuilder<I> {
663    fn default() -> Self {
664        Self {
665            inner: Vec::new(),
666            last_sorted_index: None,
667            is_sorted: true,
668        }
669    }
670}
671
672impl<I: FileOrInlineOrigin> SortedVecBuilder<I> {
673    pub fn push(&mut self, item: I) {
674        if self.is_sorted {
675            let item_index = item.index();
676            match self.last_sorted_index {
677                None => {
678                    // This is the first item.
679                    self.last_sorted_index = Some(item_index);
680                }
681                Some(last_index) if item_index > last_index => {
682                    // This is the common case.
683                    self.last_sorted_index = Some(item_index);
684                }
685                Some(last_index) if item_index == last_index => {
686                    // Discard this item. We only keep the first item with this index.
687                    // Valid files don't have duplicate indexes.
688                    return;
689                }
690                Some(_last_index) => {
691                    // item_index < last_index
692                    self.is_sorted = false;
693                }
694            }
695        }
696        self.inner.push(item);
697    }
698
699    pub fn into_sorted_vec(mut self) -> Vec<I> {
700        if !self.is_sorted {
701            self.inner.sort_by_key(I::index);
702            self.inner.dedup_by_key(|item| item.index());
703        }
704        self.inner
705    }
706}
707
708#[derive(Debug, Clone, Default)]
709struct BreakpadIndexParserInner {
710    module_info_bytes: Vec<u8>,
711    module_info: Option<(DebugId, String, String, String)>,
712    name: Option<String>,
713    code_id: Option<CodeId>,
714    symbols: Vec<(u32, BreakpadSymbolType)>,
715    files: SortedVecBuilder<BreakpadFileLine>,
716    inline_origins: SortedVecBuilder<BreakpadInlineOriginLine>,
717    pending_func_block: Option<(u32, u64)>,
718}
719
720impl BreakpadIndexParserInner {
721    pub fn process_line(&mut self, file_offset: u64, line: &[u8]) {
722        let mut input = line;
723        while input.last() == Some(&b'\r') {
724            input = &input[..(input.len() - 1)];
725        }
726        if self.module_info.is_none() {
727            // Every file must start with a "MODULE " line.
728            if let Ok((_r, (os, arch, debug_id, debug_name))) = module_line(input) {
729                self.module_info = Some((
730                    debug_id,
731                    os.to_string(),
732                    arch.to_string(),
733                    debug_name.to_string(),
734                ));
735            }
736            input.clone_into(&mut self.module_info_bytes);
737            return;
738        }
739        let line_length = input.len() as u32;
740        if let Ok((_r, (index, _filename))) = file_line(input) {
741            self.files.push(BreakpadFileLine {
742                index,
743                file_offset,
744                line_length,
745            });
746        } else if let Ok((_r, (index, _inline_origin))) = inline_origin_line(input) {
747            self.inline_origins.push(BreakpadInlineOriginLine {
748                index,
749                file_offset,
750                line_length,
751            });
752        } else if let Ok((_r, (address, _name))) = public_line(input) {
753            self.finish_pending_func_block(file_offset);
754            self.symbols.push((
755                address,
756                BreakpadSymbolType::Public(BreakpadPublicSymbol {
757                    file_offset,
758                    line_length,
759                }),
760            ));
761        } else if let Ok((_r, (address, _size, _name))) = func_line(input) {
762            self.finish_pending_func_block(file_offset);
763            self.pending_func_block = Some((address, file_offset));
764        } else if input.starts_with(b"INFO ") {
765            self.finish_pending_func_block(file_offset);
766            self.module_info_bytes.push(b'\n');
767            self.module_info_bytes.extend_from_slice(input);
768            if let Ok((_r, (code_id, name_str))) = info_code_id_line(input) {
769                self.code_id = CodeId::from_str(code_id).ok();
770                self.name = name_str.map(ToOwned::to_owned);
771            }
772        } else if input.starts_with(b"STACK ") {
773            self.finish_pending_func_block(file_offset);
774        }
775    }
776
777    fn finish_pending_func_block(&mut self, non_func_line_start_offset: u64) {
778        if let Some((address, file_offset)) = self.pending_func_block.take() {
779            let block_length = (non_func_line_start_offset - file_offset) as u32;
780            self.symbols.push((
781                address,
782                BreakpadSymbolType::Func(BreakpadFuncSymbol {
783                    file_offset,
784                    block_length,
785                }),
786            ));
787        }
788    }
789
790    pub fn finish(mut self, file_end_offset: u64) -> Result<BreakpadIndex, BreakpadParseError> {
791        self.finish_pending_func_block(file_end_offset);
792        let BreakpadIndexParserInner {
793            mut symbols,
794            module_info_bytes,
795            files,
796            inline_origins,
797            module_info,
798            name,
799            code_id,
800            ..
801        } = self;
802        symbols.sort_by_key(|(address, _)| *address);
803        symbols.dedup_by_key(|(address, _)| *address);
804        let (symbol_addresses, symbol_offsets) = symbols.into_iter().unzip();
805
806        let files = ItemMap::from_sorted_vec(files.into_sorted_vec());
807        let inline_origins = ItemMap::from_sorted_vec(inline_origins.into_sorted_vec());
808
809        let (debug_id, os, arch, debug_name) =
810            module_info.ok_or(BreakpadParseError::NoModuleInfoInSymFile)?;
811        Ok(BreakpadIndex {
812            module_info_bytes,
813            debug_name,
814            debug_id,
815            code_id,
816            name,
817            arch,
818            os,
819            symbol_addresses,
820            symbol_offsets,
821            files,
822            inline_origins,
823        })
824    }
825}
826
827/// Consumes chunks and calls a callback for each line.
828/// Leftover pieces are stored in a dynamically growing `Vec` in this object.
829#[derive(Debug, Clone, Default)]
830pub struct LineBuffer {
831    leftover_bytes: Vec<u8>,
832    /// The current offset in the file, taking into account all the bytes
833    /// that have been consumed from the chunks. This also counts bytes that
834    /// have been "consumed" by having been transferred to `leftover_bytes`.
835    current_offset: u64,
836}
837
838impl LineBuffer {
839    pub fn consume(&mut self, mut chunk: &[u8], mut f: impl FnMut(u64, &[u8])) {
840        assert!(
841            self.leftover_bytes.len() as u64 <= self.current_offset,
842            "Caller supplied more self.leftover_bytes than we could have read ourselves"
843        );
844        loop {
845            match memchr(b'\n', chunk) {
846                None => {
847                    self.leftover_bytes.extend_from_slice(chunk);
848                    self.current_offset += chunk.len() as u64;
849                    return;
850                }
851                Some(line_break_pos_in_chunk) => {
852                    let chunk_until_line_break = &chunk[..line_break_pos_in_chunk];
853                    // let chunk_until_line_break = (&chunk[..line_break_pos_in_chunk]).trim_end_matches(b'\r');
854                    chunk = &chunk[(line_break_pos_in_chunk + 1)..];
855                    let (line, line_start_offset) = if self.leftover_bytes.is_empty() {
856                        (chunk_until_line_break, self.current_offset)
857                    } else {
858                        let line_start_offset =
859                            self.current_offset - (self.leftover_bytes.len() as u64);
860                        self.leftover_bytes.extend(chunk_until_line_break);
861                        (self.leftover_bytes.as_slice(), line_start_offset)
862                    };
863                    self.current_offset += line_break_pos_in_chunk as u64 + 1;
864                    f(line_start_offset, line);
865                    self.leftover_bytes.clear();
866                }
867            };
868        }
869    }
870
871    pub fn finish(self, mut f: impl FnMut(u64, &[u8])) -> u64 {
872        if !self.leftover_bytes.is_empty() {
873            let line_start_offset = self.current_offset - (self.leftover_bytes.len() as u64);
874            f(line_start_offset, &self.leftover_bytes);
875        }
876        self.current_offset
877    }
878}
879
880#[derive(thiserror::Error, Debug)]
881#[non_exhaustive]
882pub enum BreakpadParseError {
883    #[error("Error parsing PUBLIC line")]
884    ParsingPublic,
885
886    #[error("Error parsing FILE line")]
887    ParsingFile,
888
889    #[error("Error parsing INLINE_ORIGIN line")]
890    ParsingInlineOrigin,
891
892    #[error("Error parsing FUNC line")]
893    ParsingFunc,
894
895    #[error("Error parsing INLINE line")]
896    ParsingInline,
897
898    #[error("Error parsing func line data line")]
899    ParsingFuncLine,
900
901    #[error("Malformed UTF-8")]
902    BadUtf8,
903
904    #[error("The Breakpad sym file did not start with a valid MODULE line")]
905    NoModuleInfoInSymFile,
906}
907
908#[derive(Debug, Clone)]
909pub struct BreakpadPublicSymbolInfo<'a> {
910    pub name: &'a str,
911}
912
913#[derive(Debug, Clone)]
914pub struct BreakpadFuncSymbolInfo<'a> {
915    pub name: &'a str,
916    pub size: u32,
917    pub lines: Vec<SourceLine>,
918    pub inlinees: Vec<Inlinee>,
919}
920
921impl BreakpadFuncSymbolInfo<'_> {
922    /// Returns `(file_id, line, address)` of the line record that covers the
923    /// given address. Line records describe locations at the deepest level of
924    /// inlining at that address.
925    ///
926    /// For example, if we have an "inline call stack" A -> B -> C at this
927    /// address, i.e. both the call to B and the call to C have been inlined all
928    /// the way into A (A being the "outer function"), then this method reports
929    /// locations in C.
930    pub fn get_innermost_sourceloc(&self, addr: u32) -> Option<&SourceLine> {
931        let line_index = match self.lines.binary_search_by_key(&addr, |line| line.address) {
932            Ok(i) => i,
933            Err(0) => return None,
934            Err(i) => i - 1,
935        };
936        Some(&self.lines[line_index])
937    }
938
939    /// Returns `(call_file_id, call_line, address, inline_origin)` of the
940    /// inlinee record that covers the given address at the given depth.
941    ///
942    /// We start at depth zero. For example, if we have an "inline call stack"
943    /// A -> B -> C at an address, i.e. both the call to B and the call to C have
944    /// been inlined all the way into A (A being the "outer function"), then the
945    /// call A -> B is at level zero, and the call B -> C is at level one.
946    pub fn get_inlinee_at_depth(&self, depth: u32, addr: u32) -> Option<&Inlinee> {
947        let index = match self
948            .inlinees
949            .binary_search_by_key(&(depth, addr), |inlinee| (inlinee.depth, inlinee.address))
950        {
951            Ok(i) => i,
952            Err(0) => return None,
953            Err(i) => i - 1,
954        };
955        let inlinee = &self.inlinees[index];
956        if inlinee.depth != depth {
957            return None;
958        }
959        let end_address = inlinee.address.checked_add(inlinee.size)?;
960        if addr < end_address {
961            Some(inlinee)
962        } else {
963            None
964        }
965    }
966}
967
968/// Match a hex string, parse it to a u32 or a u64.
969fn hex_str<T: std::ops::Shl<T, Output = T> + std::ops::BitOr<T, Output = T> + From<u8>>(
970    input: &[u8],
971) -> IResult<&[u8], T> {
972    // Consume up to max_len digits. For u32 that's 8 digits and for u64 that's 16 digits.
973    // Two hex digits form one byte.
974    let max_len = mem::size_of::<T>() * 2;
975
976    let mut res: T = T::from(0);
977    let mut k = 0;
978    for v in input.iter().take(max_len) {
979        let digit = match (*v as char).to_digit(16) {
980            Some(v) => v,
981            None => break,
982        };
983        res = res << T::from(4);
984        res = res | T::from(digit as u8);
985        k += 1;
986    }
987    if k == 0 {
988        return Err(Err::Error(Error::from_error_kind(
989            input,
990            ErrorKind::HexDigit,
991        )));
992    }
993    let remaining = &input[k..];
994    Ok((remaining, res))
995}
996
997/// Match a decimal string, parse it to a u32.
998///
999/// This is doing everything manually so that we only look at each byte once.
1000/// With a naive implementation you might be looking at them three times: First
1001/// you might get a slice of acceptable characters from nom, then you might parse
1002/// that slice into a str (checking for utf-8 unnecessarily), and then you might
1003/// parse that string into a decimal number.
1004fn decimal_u32(input: &[u8]) -> IResult<&[u8], u32> {
1005    const MAX_LEN: usize = 10; // u32::MAX has 10 decimal digits
1006    let mut res: u64 = 0;
1007    let mut k = 0;
1008    for v in input.iter().take(MAX_LEN) {
1009        let digit = *v as char;
1010        let digit_value = match digit.to_digit(10) {
1011            Some(v) => v,
1012            None => break,
1013        };
1014        res = res * 10 + digit_value as u64;
1015        k += 1;
1016    }
1017    if k == 0 {
1018        return Err(Err::Error(Error::from_error_kind(input, ErrorKind::Digit)));
1019    }
1020    let res = u32::try_from(res)
1021        .map_err(|_| Err::Error(Error::from_error_kind(input, ErrorKind::TooLarge)))?;
1022    let remaining = &input[k..];
1023    Ok((remaining, res))
1024}
1025
1026/// Take 0 or more non-space bytes.
1027fn non_space(input: &[u8]) -> IResult<&[u8], &[u8]> {
1028    take_while(|c: u8| c != b' ')(input)
1029}
1030
1031// Matches a MODULE record.
1032fn module_line(input: &[u8]) -> IResult<&[u8], (&str, &str, DebugId, &str)> {
1033    let (input, _) = terminated(tag("MODULE"), space1)(input)?;
1034    let (input, (os, cpu, debug_id, name)) = cut(tuple((
1035        terminated(map_res(non_space, str::from_utf8), space1), // os
1036        terminated(map_res(non_space, str::from_utf8), space1), // cpu
1037        terminated(
1038            map_res(map_res(hex_digit1, str::from_utf8), DebugId::from_breakpad),
1039            space1,
1040        ), // debug id
1041        map_res(rest, str::from_utf8),                          // filename
1042    )))(input)?;
1043    Ok((input, (os, cpu, debug_id, name)))
1044}
1045
1046// Matches an INFO CODE_ID record.
1047fn info_code_id_line(input: &[u8]) -> IResult<&[u8], (&str, Option<&str>)> {
1048    let (input, _) = terminated(tag("INFO CODE_ID"), space1)(input)?;
1049    let (input, code_id_with_name) = map_res(rest, str::from_utf8)(input)?;
1050    match code_id_with_name.split_once(' ') {
1051        Some((code_id, name)) => Ok((input, (code_id, Some(name)))),
1052        None => Ok((input, (code_id_with_name, None))),
1053    }
1054}
1055
1056// Matches a FILE record.
1057fn file_line(input: &[u8]) -> IResult<&[u8], (u32, &[u8])> {
1058    let (input, _) = terminated(tag("FILE"), space1)(input)?;
1059    let (input, (id, filename)) = cut(tuple((terminated(decimal_u32, space1), rest)))(input)?;
1060    Ok((input, (id, filename)))
1061}
1062
1063// Matches an INLINE_ORIGIN record.
1064fn inline_origin_line(input: &[u8]) -> IResult<&[u8], (u32, &[u8])> {
1065    let (input, _) = terminated(tag("INLINE_ORIGIN"), space1)(input)?;
1066    let (input, (id, function)) = cut(tuple((terminated(decimal_u32, space1), rest)))(input)?;
1067    Ok((input, (id, function)))
1068}
1069
1070// Matches a PUBLIC record.
1071fn public_line(input: &[u8]) -> IResult<&[u8], (u32, &[u8])> {
1072    let (input, _) = terminated(tag("PUBLIC"), space1)(input)?;
1073    let (input, (_multiple, address, _parameter_size, name)) = cut(tuple((
1074        opt(terminated(tag("m"), space1)),
1075        terminated(hex_str::<u64>, space1),
1076        terminated(hex_str::<u32>, space1),
1077        rest,
1078    )))(input)?;
1079    Ok((input, (address as u32, name)))
1080}
1081
1082/// A mapping from machine code bytes to source line and file.
1083#[derive(Clone, Debug, PartialEq, Eq)]
1084pub struct SourceLine {
1085    /// The start address relative to the module's load address.
1086    pub address: u32,
1087    /// The size of this range of instructions in bytes.
1088    pub size: u32,
1089    /// The source file name that generated this machine code.
1090    ///
1091    /// This is an index into `SymbolFile::files`.
1092    pub file: u32,
1093    /// The line number in `file` that generated this machine code.
1094    pub line: u32,
1095}
1096
1097/// A single range which is covered by an inlined function call.
1098#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
1099pub struct Inlinee {
1100    /// The depth of the inline call.
1101    pub depth: u32,
1102    /// The start address relative to the module's load address.
1103    pub address: u32,
1104    /// The size of this range of instructions in bytes.
1105    pub size: u32,
1106    /// The source file which contains the function call.
1107    ///
1108    /// This is an index into `SymbolFile::files`.
1109    pub call_file: u32,
1110    /// The line number in `call_file` for the function call.
1111    pub call_line: u32,
1112    /// The function name, as an index into `SymbolFile::inline_origins`.
1113    pub origin_id: u32,
1114}
1115
1116// Matches line data after a FUNC record.
1117///
1118/// A line record has the form <hex_addr> <hex_size> <line> <file_id>
1119fn func_line_data(input: &[u8]) -> IResult<&[u8], SourceLine> {
1120    let (input, (address, size, line, file)) = tuple((
1121        terminated(hex_str::<u64>, space1),
1122        terminated(hex_str::<u32>, space1),
1123        terminated(decimal_u32, space1),
1124        decimal_u32,
1125    ))(input)?;
1126    Ok((
1127        input,
1128        SourceLine {
1129            address: address as u32,
1130            size,
1131            file,
1132            line,
1133        },
1134    ))
1135}
1136
1137// Matches a FUNC record.
1138fn func_line(input: &[u8]) -> IResult<&[u8], (u32, u32, &[u8])> {
1139    let (input, _) = terminated(tag("FUNC"), space1)(input)?;
1140    let (input, (_multiple, address, size, _parameter_size, name)) = cut(tuple((
1141        opt(terminated(tag("m"), space1)),
1142        terminated(hex_str::<u32>, space1),
1143        terminated(hex_str::<u32>, space1),
1144        terminated(hex_str::<u32>, space1),
1145        rest,
1146    )))(input)?;
1147    Ok((input, (address, size, name)))
1148}
1149
1150// Matches one entry of the form <address> <size> which is used at the end of an INLINE record
1151fn inline_address_range(input: &[u8]) -> IResult<&[u8], (u32, u32)> {
1152    tuple((terminated(hex_str::<u32>, space1), hex_str::<u32>))(input)
1153}
1154
1155// Matches an INLINE record.
1156///
1157/// An INLINE record has the form `INLINE <inline_nest_level> <call_site_line> <call_site_file_id> <origin_id> [<address> <size>]+`.
1158fn inline_line(input: &[u8]) -> IResult<&[u8], impl Iterator<Item = Inlinee>> {
1159    let (input, _) = terminated(tag("INLINE"), space1)(input)?;
1160    let (input, (depth, call_line, call_file, origin_id)) = cut(tuple((
1161        terminated(decimal_u32, space1),
1162        terminated(decimal_u32, space1),
1163        terminated(decimal_u32, space1),
1164        terminated(decimal_u32, space1),
1165    )))(input)?;
1166    let (input, address_ranges) = cut(separated_list1(space1, inline_address_range))(input)?;
1167    Ok((
1168        input,
1169        address_ranges
1170            .into_iter()
1171            .map(move |(address, size)| Inlinee {
1172                address,
1173                size,
1174                call_file,
1175                call_line,
1176                depth,
1177                origin_id,
1178            }),
1179    ))
1180}
1181
1182#[cfg(test)]
1183mod test {
1184    use std::str::FromStr;
1185
1186    use super::*;
1187    use crate::{ElfBuildId, PeCodeId};
1188
1189    #[test]
1190    fn test1() {
1191        let mut parser = BreakpadIndexParser::new();
1192        parser.consume(b"MODULE Linux x86_64 39CA3106713C8D0FFEE4605AFA2526670 libmozsandbox.so\nINFO CODE_ID ");
1193        parser.consume(b"0631CA393C710F8DFEE4605AFA2526671AD4EF17\nFILE 0 hg:hg.mozilla.org/mozilla-central:se");
1194        parser.consume(b"curity/sandbox/chromium/base/strings/safe_sprintf.cc:f150bc1f71d09e1e1941065951f0f5a3");
1195        parser.consume(b"8628f080");
1196        let index = parser.finish().unwrap();
1197        assert_eq!(
1198            index.files.get(0).unwrap(),
1199            &BreakpadFileLine {
1200                index: 0,
1201                file_offset: 125,
1202                line_length: 136,
1203            }
1204        );
1205        assert_eq!(
1206            index.debug_id,
1207            DebugId::from_breakpad("39CA3106713C8D0FFEE4605AFA2526670").unwrap()
1208        );
1209        assert_eq!(
1210            index.code_id,
1211            Some(CodeId::ElfBuildId(
1212                ElfBuildId::from_str("0631ca393c710f8dfee4605afa2526671ad4ef17").unwrap()
1213            ))
1214        );
1215
1216        let v = index.serialize_to_bytes();
1217        let index2 = BreakpadIndex::parse_symindex_file(&v).unwrap();
1218
1219        assert_eq!(index2, index);
1220    }
1221
1222    #[test]
1223    fn test2() {
1224        let mut parser = BreakpadIndexParser::new();
1225        parser.consume(b"MODULE windows x86_64 F1E853FD662672044C4C44205044422E1 firefox.pdb\nIN");
1226        parser.consume(b"FO CODE_ID 63C036DBA7000 firefox.exe\nINFO GENERATOR mozilla/dump_syms ");
1227        parser.consume(b"2.1.1\nFILE 0 /builds/worker/workspace/obj-build/browser/app/d:/agent/_");
1228        parser.consume(b"work/2/s/src/vctools/delayimp/dloadsup.h\nFILE 1 /builds/worker/workspa");
1229        parser.consume(b"ce/obj-build/browser/app/d:/agent/_work/2/s/src/externalapis/windows/10");
1230        parser.consume(b"/sdk/inc/winnt.h\nINLINE_ORIGIN 0 DloadLock()\nINLINE_ORIGIN 1 DloadUnl");
1231        parser.consume(b"ock()\nINLINE_ORIGIN 2 WritePointerRelease(void**, void*)\nINLINE_ORIGI");
1232        parser.consume(b"N 3 WriteRelease64(long long*, long long)\nFUNC 2b754 aa 0 DloadAcquire");
1233        parser.consume(b"SectionWriteAccess()\nINLINE 0 658 0 0 2b76a 3d\nINLINE 0 665 0 1 2b7ca");
1234        parser.consume(b" 17 2b7e6 12\nINLINE 1 345 0 2 2b7ed b\nINLINE 2 8358 1 3 2b7ed b\n2b75");
1235        parser.consume(b"4 6 644 0\n2b75a 10 650 0\n2b76a e 299 0\n2b778 14 300 0\n2b78c 2 301 0");
1236        parser.consume(b"\n2b78e 2 306 0\n2b790 c 305 0\n2b79c b 309 0\n2b7a7 10 660 0\n2b7b7 2 ");
1237        parser.consume(b"661 0\n2b7b9 11 662 0\n2b7ca 9 340 0\n2b7d3 e 341 0\n2b7e1 c 668 0\n2b7");
1238        parser.consume(b"ed b 7729 1\n2b7f8 6 668 0");
1239        let index = parser.finish().unwrap();
1240        assert_eq!(&index.debug_name, "firefox.pdb");
1241        assert_eq!(
1242            index.debug_id,
1243            DebugId::from_breakpad("F1E853FD662672044C4C44205044422E1").unwrap()
1244        );
1245        assert_eq!(index.name.as_deref(), Some("firefox.exe"));
1246        assert_eq!(
1247            index.code_id,
1248            Some(CodeId::PeCodeId(
1249                PeCodeId::from_str("63C036DBA7000").unwrap()
1250            ))
1251        );
1252        assert!(std::str::from_utf8(&index.module_info_bytes)
1253            .unwrap()
1254            .contains("INFO GENERATOR mozilla/dump_syms 2.1.1"));
1255
1256        let v = index.serialize_to_bytes();
1257        let index2 = BreakpadIndex::parse_symindex_file(&v).unwrap();
1258
1259        assert_eq!(index2, index);
1260    }
1261
1262    #[test]
1263    fn func_parsing() {
1264        let block =
1265            b"JUNK\nFUNC 1130 28 0 main\n1130 f 24 0\n113f 7 25 0\n1146 9 26 0\n114f 9 27 0\nJUNK";
1266        let func = BreakpadFuncSymbol {
1267            file_offset: "JUNK\n".len() as u64,
1268            block_length: (block.len() - "JUNK\n".len() - "\nJUNK".len()) as u32,
1269        };
1270        let input = &block[func.file_offset as usize..][..func.block_length as usize];
1271        let func = func.parse(input).unwrap();
1272        assert_eq!(func.name, "main");
1273        assert_eq!(func.size, 0x28);
1274        assert_eq!(func.lines.len(), 4);
1275        assert_eq!(
1276            func.lines[0],
1277            SourceLine {
1278                address: 0x1130,
1279                size: 0xf,
1280                file: 0,
1281                line: 24,
1282            }
1283        );
1284        assert_eq!(
1285            func.lines[3],
1286            SourceLine {
1287                address: 0x114f,
1288                size: 0x9,
1289                file: 0,
1290                line: 27,
1291            }
1292        );
1293    }
1294}