apple_crash_report_parser/
parser.rs

1use std::borrow::Cow;
2use std::collections::BTreeMap;
3use std::fmt;
4use std::io::{self, BufRead, BufReader, Read};
5
6use chrono::{DateTime, FixedOffset, Utc};
7use lazy_static::lazy_static;
8use regex::Regex;
9#[cfg(feature = "with_serde")]
10use serde::{Serialize, Serializer};
11use uuid::Uuid;
12
13lazy_static! {
14    static ref KEY_VALUE_RE: Regex = Regex::new(
15        r#"(?x)
16        ^\s*(.*?)\s*:\s*(.*?)\s*$
17    "#
18    )
19    .unwrap();
20    static ref THREAD_RE: Regex = Regex::new(
21        r#"(?x)
22        ^Thread\ ([0-9]+)(\ Crashed)?:\s*(.+?)?\s*$
23    "#
24    )
25    .unwrap();
26    static ref THREAD_NAME_RE: Regex = Regex::new(
27        r#"(?x)
28        ^Thread\ ([0-9]+)\ name:\s*(.+?)
29        (?:\s+Dispatch\ queue:\s*(.*?))?\s*$
30    "#
31    )
32    .unwrap();
33    static ref THREAD_STATE_RE: Regex = Regex::new(
34        r#"(?x)
35        ^Thread\ ([0-9]+)\ crashed\ with\ .*?\ Thread\ State:\s*$
36    "#
37    )
38    .unwrap();
39    static ref REGISTER_RE: Regex = Regex::new(
40        r#"(?x)
41        \s*
42        ([a-z0-9]+):\s+
43        (0x[0-9a-fA-F]+)\s*
44    "#
45    )
46    .unwrap();
47    static ref FRAME_RE: Regex = Regex::new(
48        r#"(?x)
49        ^
50            [0-9]+ \s+
51            (.+?) \s+
52            (0x[0-9a-fA-F]+)\s+
53            (.*?)
54            (?:\ (?:\+\ [0-9]+|\((.*?):([0-9]+)\)))?
55            \s*
56        $
57    "#
58    )
59    .unwrap();
60    static ref BINARY_IMAGE_RE: Regex = Regex::new(
61        r#"(?x)
62        ^
63            \s*
64            (0x[0-9a-fA-F]+) \s*
65            -
66            \s*
67            (0x[0-9a-fA-F]+) \s+
68            \+?(.+)\s+
69            (\S+?)\s+
70            (?:\(([^)]+?)\))?\s+
71            <([^>]+?)>\s+
72            (.*?)
73        $
74    "#
75    )
76    .unwrap();
77}
78
79/// A newtype for addresses.
80#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
81pub struct Addr(pub u64);
82
83#[cfg(feature = "with_serde")]
84impl Serialize for Addr {
85    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
86    where
87        S: Serializer,
88    {
89        format!("{:#x}", self.0).serialize(serializer)
90    }
91}
92
93/// Holds a parsed apple crash report.
94#[derive(Debug, Default)]
95#[cfg_attr(feature = "with_serde", derive(Serialize))]
96pub struct AppleCrashReport {
97    /// The unique crash ID.
98    pub incident_identifier: Uuid,
99    /// The timestamp of the crash.
100    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
101    pub timestamp: Option<DateTime<Utc>>,
102    /// The architecture of the crash (might require further parsing)
103    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
104    pub code_type: Option<String>,
105    /// The path to the application.
106    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
107    pub path: Option<String>,
108    /// Optional application specific crash information as string.
109    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
110    pub application_specific_information: Option<String>,
111    /// Optional syslog info
112    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
113    pub filtered_syslog: Option<String>,
114    /// The internal report version.
115    pub report_version: u32,
116    /// Extra metdata.
117    pub metadata: BTreeMap<String, String>,
118    /// A list of threads.
119    pub threads: Vec<Thread>,
120    /// A list of referenced binary images.
121    pub binary_images: Vec<BinaryImage>,
122}
123
124/// A single binary image in the crash.
125#[derive(Debug)]
126#[cfg_attr(feature = "with_serde", derive(Serialize))]
127pub struct BinaryImage {
128    /// The address of the image,
129    pub addr: Addr,
130    /// The size of the image,
131    pub size: u64,
132    /// The unique ID of the image,
133    pub uuid: Uuid,
134    /// The architecture of the image,
135    pub arch: String,
136    /// The version of the image if available. This might require further parsing.
137    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
138    pub version: Option<String>,
139    /// The short name of the image.
140    pub name: String,
141    /// The full path of the image.
142    pub path: String,
143}
144
145/// Represents a single frame.
146#[derive(Debug)]
147#[cfg_attr(feature = "with_serde", derive(Serialize))]
148pub struct Frame {
149    /// The module of the frame.
150    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
151    pub module: Option<String>,
152    /// The symbol of the frame if available.
153    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
154    pub symbol: Option<String>,
155    /// The filename of the frame if available.
156    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
157    pub filename: Option<String>,
158    /// The line number of the frame if available.
159    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
160    pub lineno: Option<u32>,
161    //// The instruction address of the frame.
162    pub instruction_addr: Addr,
163}
164
165/// A single thread in the crash.
166#[derive(Debug)]
167#[cfg_attr(feature = "with_serde", derive(Serialize))]
168pub struct Thread {
169    /// The ID (index) of the thread.
170    pub id: u64,
171    /// The name of the thread if available.
172    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
173    pub name: Option<String>,
174    /// The name of the dispatch queue
175    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
176    pub dispatch_queue: Option<String>,
177    /// `true` if this thread crashed.
178    pub crashed: bool,
179    /// The list of frames
180    pub frames: Vec<Frame>,
181    /// A dump of all the registers of the thread if available.
182    #[cfg_attr(feature = "with_serde", serde(skip_serializing_if = "Option::is_none"))]
183    pub registers: Option<BTreeMap<String, Addr>>,
184}
185
186enum ParsingState {
187    Root,
188    Thread,
189    BinaryImages,
190    ThreadState,
191    FilteredSyslog,
192    ApplicationSpecificInformation,
193}
194
195/// Represents a parsing error.
196#[derive(Debug)]
197pub enum ParseError {
198    Io(io::Error),
199    InvalidIncidentIdentifier(uuid::Error),
200    InvalidImageIdentifier(uuid::Error),
201    InvalidReportVersion(std::num::ParseIntError),
202    InvalidTimestamp(chrono::ParseError),
203}
204
205impl std::error::Error for ParseError {
206    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
207        match *self {
208            ParseError::Io(ref err) => Some(err),
209            ParseError::InvalidIncidentIdentifier(ref err) => Some(err),
210            ParseError::InvalidImageIdentifier(ref err) => Some(err),
211            ParseError::InvalidReportVersion(ref err) => Some(err),
212            ParseError::InvalidTimestamp(ref err) => Some(err),
213        }
214    }
215}
216
217impl fmt::Display for ParseError {
218    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
219        match *self {
220            ParseError::Io(..) => write!(f, "io error during parsing"),
221            ParseError::InvalidIncidentIdentifier(..) => write!(f, "invalid incident identifier"),
222            ParseError::InvalidImageIdentifier(..) => write!(f, "invalid binary image identifier"),
223            ParseError::InvalidReportVersion(..) => write!(f, "invalid report version"),
224            ParseError::InvalidTimestamp(..) => write!(f, "invalid timestamp"),
225        }
226    }
227}
228
229impl std::str::FromStr for AppleCrashReport {
230    type Err = ParseError;
231
232    fn from_str(s: &str) -> Result<AppleCrashReport, ParseError> {
233        AppleCrashReport::from_line_iter(s.lines().map(|x| Ok(Cow::Borrowed(x))))
234    }
235}
236
237impl AppleCrashReport {
238    /// Consumes a reader and parses it.
239    pub fn from_reader<R: Read>(r: R) -> Result<AppleCrashReport, ParseError> {
240        let reader = BufReader::new(r);
241        AppleCrashReport::from_line_iter(reader.lines().map(|x| x.map(Cow::Owned)))
242    }
243
244    #[allow(clippy::cognitive_complexity)]
245    fn from_line_iter<'a, I>(iter: I) -> Result<AppleCrashReport, ParseError>
246    where
247        I: Iterator<Item = Result<Cow<'a, str>, io::Error>>,
248    {
249        let mut state = ParsingState::Root;
250        let mut thread = None;
251        let mut thread_names = BTreeMap::new();
252        let mut registers = BTreeMap::new();
253        let mut application_specific_information = String::new();
254        let mut filtered_syslog = String::new();
255
256        let mut rv = AppleCrashReport::default();
257
258        for line in iter {
259            let line = line.map_err(ParseError::Io)?;
260            let line = line.trim_end();
261
262            if line.starts_with("Binary Images:") {
263                state = ParsingState::BinaryImages;
264                continue;
265            } else if line.starts_with("Application Specific Information:") {
266                state = ParsingState::ApplicationSpecificInformation;
267                continue;
268            } else if line.starts_with("Filtered syslog:") {
269                state = ParsingState::FilteredSyslog;
270                continue;
271            } else if THREAD_STATE_RE.is_match(line) {
272                state = ParsingState::ThreadState;
273                continue;
274            } else if let Some(caps) = THREAD_RE.captures(line) {
275                if let Some(thread) = thread.take() {
276                    rv.threads.push(thread);
277                }
278                thread = Some(Thread {
279                    id: caps[1].parse().unwrap(),
280                    name: caps.get(3).map(|m| m.as_str().to_string()),
281                    dispatch_queue: None,
282                    frames: vec![],
283                    crashed: caps.get(2).is_some(),
284                    registers: None,
285                });
286                state = ParsingState::Thread;
287                continue;
288            } else if let Some(caps) = THREAD_NAME_RE.captures(line) {
289                thread_names.insert(
290                    caps[1].parse::<u64>().unwrap(),
291                    (
292                        caps[2].to_string(),
293                        caps.get(3).map(|x| x.as_str().to_string()),
294                    ),
295                );
296                state = ParsingState::Root;
297                continue;
298            }
299
300            state = match state {
301                ParsingState::Root => {
302                    if let Some(caps) = KEY_VALUE_RE.captures(line) {
303                        match &caps[1] {
304                            "Incident Identifier" => {
305                                rv.incident_identifier = caps[2]
306                                    .parse()
307                                    .map_err(ParseError::InvalidIncidentIdentifier)?;
308                            }
309                            "Report Version" => {
310                                rv.report_version =
311                                    caps[2].parse().map_err(ParseError::InvalidReportVersion)?;
312                            }
313                            "Path" => {
314                                rv.path = Some(caps[2].to_string());
315                            }
316                            "Code Type" => {
317                                rv.code_type = Some(caps[2].to_string());
318                            }
319                            "Date/Time" => {
320                                let timestamp = DateTime::<FixedOffset>::parse_from_str(
321                                    &caps[2],
322                                    "%Y-%m-%d %H:%M:%S%.3f %z",
323                                )
324                                .map_err(ParseError::InvalidTimestamp)?;
325                                rv.timestamp = Some(timestamp.with_timezone(&Utc));
326                            }
327                            "Crashed Thread" => {}
328                            _ => {
329                                rv.metadata.insert(caps[1].to_string(), caps[2].to_string());
330                            }
331                        }
332                    }
333                    ParsingState::Root
334                }
335                ParsingState::ThreadState => {
336                    if line.is_empty() {
337                        ParsingState::Root
338                    } else {
339                        for caps in REGISTER_RE.captures_iter(line) {
340                            registers.insert(
341                                caps[1].to_string(),
342                                Addr(u64::from_str_radix(&caps[2][2..], 16).unwrap()),
343                            );
344                        }
345                        ParsingState::ThreadState
346                    }
347                }
348                ParsingState::Thread => {
349                    if let Some(caps) = FRAME_RE.captures(line) {
350                        thread.as_mut().unwrap().frames.push(Frame {
351                            module: if &caps[1] == "???" {
352                                None
353                            } else {
354                                Some(caps[1].to_string())
355                            },
356                            symbol: caps.get(3).and_then(|x| {
357                                if x.as_str().starts_with("0x")
358                                    && u64::from_str_radix(&x.as_str()[2..], 16).is_ok()
359                                {
360                                    None
361                                } else {
362                                    Some(x.as_str().to_string())
363                                }
364                            }),
365                            filename: caps.get(4).map(|x| x.as_str().to_string()),
366                            lineno: caps.get(5).map(|x| x.as_str().parse().unwrap()),
367                            instruction_addr: Addr(u64::from_str_radix(&caps[2][2..], 16).unwrap()),
368                        });
369                        ParsingState::Thread
370                    } else {
371                        ParsingState::Root
372                    }
373                }
374                ParsingState::BinaryImages => {
375                    if line.is_empty() {
376                        ParsingState::BinaryImages
377                    } else if let Some(caps) = BINARY_IMAGE_RE.captures(line) {
378                        let addr = u64::from_str_radix(&caps[1][2..], 16).unwrap();
379                        rv.binary_images.push(BinaryImage {
380                            addr: Addr(addr),
381                            size: u64::from_str_radix(&caps[2][2..], 16).unwrap() - addr,
382                            uuid: caps[6]
383                                .parse()
384                                .map_err(ParseError::InvalidImageIdentifier)?,
385                            arch: caps[4].to_string(),
386                            version: caps.get(5).map(|x| x.as_str().to_string()),
387                            name: caps[3].to_string(),
388                            path: caps[7].to_string(),
389                        });
390                        ParsingState::BinaryImages
391                    } else {
392                        ParsingState::Root
393                    }
394                }
395                ParsingState::ApplicationSpecificInformation => {
396                    if !application_specific_information.is_empty() {
397                        application_specific_information.push('\n');
398                    }
399                    application_specific_information.push_str(line);
400                    ParsingState::ApplicationSpecificInformation
401                }
402                ParsingState::FilteredSyslog => {
403                    if !filtered_syslog.is_empty() {
404                        filtered_syslog.push('\n');
405                    }
406                    filtered_syslog.push_str(line);
407                    ParsingState::FilteredSyslog
408                }
409            }
410        }
411
412        if let Some(thread) = thread.take() {
413            rv.threads.push(thread);
414        }
415
416        for thread in rv.threads.iter_mut() {
417            if let Some((name, dispatch_queue)) = thread_names.remove(&thread.id) {
418                thread.name = Some(name);
419                thread.dispatch_queue = dispatch_queue;
420            }
421        }
422
423        if !registers.is_empty() {
424            for thread in rv.threads.iter_mut() {
425                if thread.crashed {
426                    thread.registers = Some(registers);
427                    break;
428                }
429            }
430        }
431
432        if !application_specific_information.is_empty() {
433            if application_specific_information.ends_with('\n') {
434                application_specific_information
435                    .truncate(application_specific_information.len() - 1);
436            }
437            rv.application_specific_information = Some(application_specific_information);
438        }
439        if !filtered_syslog.is_empty() {
440            if filtered_syslog.ends_with('\n') {
441                filtered_syslog.truncate(filtered_syslog.len() - 1);
442            }
443            rv.filtered_syslog = Some(filtered_syslog);
444        }
445
446        Ok(rv)
447    }
448}