std_logger_parser/
lib.rs

1//! See the [`Parser`] type.
2
3use std::collections::HashMap;
4use std::convert::Infallible;
5use std::fmt;
6use std::io::{self, Read};
7use std::str::{self, FromStr};
8use std::time::{Duration, SystemTime};
9
10use log::Level;
11
12/// Create a new [`Parser`].
13pub fn parse<R>(reader: R) -> Parser<R>
14where
15    R: Read,
16{
17    Parser {
18        parsed: 0,
19        reader,
20        buf: Vec::with_capacity(4096),
21        needs_read: true,
22        hit_eof: false,
23    }
24}
25
26/// A struct to parse logfmt formatted logs.
27///
28/// See the example below for usage.
29///
30/// # Notes
31///
32/// The parser assumses the log lines are mostly correct. This means it will
33/// loosely check values but isn't too strict about it.
34///
35/// If this parser returns an [error] it will skip the problematic line and
36/// continue with the next one. Note however that if a problem exists in
37/// multi-line log message the records returned after might be invalid.
38///
39/// [error]: ParseError
40///
41/// # Examples
42///
43/// The API is simple, just call [`parse`] in a for loop.
44///
45/// ```
46/// use std_logger_parser::parse;
47///
48/// # fn main() -> Result<(), std_logger_parser::ParseError> {
49/// let logs = /* Open some log file, anything that implements `io::Read`. */
50/// #    b"" as &[u8];
51///
52/// for record in parse(logs) {
53///     let record = record?;
54///
55///     println!("parsed a record: {:?}", record);
56/// }
57/// # Ok(())
58/// # }
59/// ```
60#[derive(Debug)]
61pub struct Parser<R> {
62    reader: R,
63    /// Amount of bytes parsed from the start of `buf`.
64    parsed: usize,
65    buf: Vec<u8>,
66    /// If `true` `next` will read from `R` into `buf`.
67    needs_read: bool,
68    /// If `fale` `parse_line` will not return `None` if it hits the end of the
69    /// item. Once its `false` `next` will return `None` and `parse_line` will
70    /// return the remainder of the record (if any).
71    hit_eof: bool,
72}
73
74impl<R: Read> Parser<R> {
75    fn fill_buf(&mut self) -> io::Result<()> {
76        self.remove_spaces();
77        // Remove already processed bytes.
78        drop(self.buf.drain(..self.parsed));
79        self.parsed = 0;
80
81        // If a log message is the same size as the buffer's capacity double the
82        // capacity to read more bytes.
83        if self.buf.len() == self.buf.capacity() {
84            self.buf.reserve(self.buf.capacity());
85        }
86
87        // Resize the buffer to read into the unused space.
88        let original_len = self.buf.len();
89        self.buf.resize(self.buf.capacity(), 0);
90        match self.reader.read(&mut self.buf[original_len..]) {
91            Ok(n) => {
92                self.buf.truncate(original_len + n);
93                if n == 0 {
94                    self.hit_eof = true;
95                }
96                Ok(())
97            }
98            Err(err) => {
99                self.buf.truncate(original_len);
100                Err(err)
101            }
102        }
103    }
104
105    /// Updates `parsed` to remove all spaces from the start of `buf`.
106    fn remove_spaces(&mut self) {
107        let input = &self.buf[self.parsed..];
108        let input_left = eat_space(input);
109        self.parsed += input.len() - input_left.len();
110    }
111
112    /// Returns `None` the log message is incomplete.
113    fn parse_line(&mut self) -> Result<Option<Record>, ParseError> {
114        let mut record = Record::empty();
115        let mut record_is_empty = true;
116        // Remove spaces from the start to ensure `create_line_error` doesn't
117        // include a bunch of empty spaces.
118        self.remove_spaces();
119        let mut input = &self.buf[self.parsed..];
120
121        loop {
122            input = eat_space(input);
123            if input.is_empty() || input[0] == b'\n' {
124                // Mark the line (new line included) as parser.
125                self.parsed = (self.buf.len() - input.len()) + if input.is_empty() { 0 } else { 1 };
126
127                return Ok((!record_is_empty).then(|| record));
128            }
129
130            let (i, key) = parse_key(input).map_err(|err| self.create_line_error(err))?;
131            if i.is_empty() {
132                return Ok(None);
133            }
134            input = i;
135
136            let (i, value) = parse_value(input);
137            if i.is_empty() && !self.hit_eof {
138                // If this is the end of the input we expect it to be the end of
139                // the value as well and we don't return here.
140                return Ok(None);
141            }
142            input = i;
143
144            match key {
145                "ts" => {
146                    let timestamp =
147                        parse_timestamp(value).map_err(|err| self.create_line_error(err))?;
148                    record.timestamp = Some(timestamp);
149                }
150                "lvl" => {
151                    let level =
152                        parse_log_level(value).map_err(|err| self.create_line_error(err))?;
153                    record.level = level;
154                }
155                "msg" => {
156                    let msg = parse_string(value).map_err(|err| self.create_line_error(err))?;
157                    record.msg = msg.to_owned();
158                }
159                "target" => {
160                    let target = parse_string(value).map_err(|err| self.create_line_error(err))?;
161                    record.target = target.to_owned();
162                }
163                "module" => {
164                    let module = parse_string(value).map_err(|err| self.create_line_error(err))?;
165                    if !module.is_empty() {
166                        record.module = Some(module.to_owned());
167                    }
168                }
169                "file" => {
170                    let (file, line) =
171                        parse_file(value).map_err(|err| self.create_line_error(err))?;
172                    record.file = Some((file.to_owned(), line));
173                }
174                _ => {
175                    let value = parse_string(value).map_err(|err| self.create_line_error(err))?;
176                    // Safety: `FromStr` for `Value` never fails.
177                    // TODO: what to do when overwriting a key?
178                    let _ = record
179                        .key_values
180                        .insert(key.to_owned(), value.parse().unwrap());
181                }
182            }
183            // If we get to here we've assigned at least a single field so we
184            // want to keep the record.
185            record_is_empty = false;
186        }
187    }
188
189    fn create_line_error(&self, kind: ParseErrorKind) -> ParseError {
190        let line = single_line(&self.buf[self.parsed..])
191            .to_owned()
192            .into_boxed_slice();
193        ParseError {
194            line: Some(line),
195            kind,
196        }
197    }
198}
199
200impl<R: Read> Iterator for Parser<R> {
201    type Item = Result<Record, ParseError>;
202
203    fn next(&mut self) -> Option<Self::Item> {
204        loop {
205            if self.needs_read {
206                match self.fill_buf() {
207                    Ok(()) => { /* Continue below. */ }
208                    Err(err) => {
209                        return Some(Err(ParseError {
210                            line: None,
211                            kind: ParseErrorKind::Io(err),
212                        }));
213                    }
214                }
215            }
216
217            match self.parse_line() {
218                Ok(Some(record)) => return Some(Ok(record)),
219                Ok(None) if self.hit_eof => return None,
220                Ok(None) => {
221                    self.needs_read = true;
222                    continue; // Read again.
223                }
224                Err(err) => {
225                    // Skip the troublesome line.
226                    if let Some(line) = err.line.as_ref() {
227                        self.parsed += line.len();
228                        if let Some(b'\n') = self.buf.get(self.parsed) {
229                            // Also skip the next new line.
230                            self.parsed += 1
231                        }
232                    }
233                    return Some(Err(err));
234                }
235            }
236        }
237    }
238}
239
240/// Result returned by parsing functions.
241type ParseResult<'a, T> = Result<(&'a [u8], T), ParseErrorKind>;
242
243/// Error returned by the [`Parser`].
244#[non_exhaustive]
245pub struct ParseError {
246    /// The line in which the error occurred. This will be `None` for [I/O]
247    /// errors.
248    ///
249    /// [I/O]: ParseErrorKind::Io
250    pub line: Option<Box<[u8]>>,
251    /// Error detail.
252    pub kind: ParseErrorKind,
253}
254
255impl fmt::Display for ParseError {
256    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
257        if let Some(line) = self.line.as_ref() {
258            write!(
259                f,
260                "error parsing log message: {}, in line `{:?}`",
261                self.kind,
262                str::from_utf8(line)
263                    .as_ref()
264                    .map_or_else(|line| line as &dyn fmt::Debug, |_| line as &dyn fmt::Debug)
265            )
266        } else {
267            write!(f, "error reading: {}", self.kind)
268        }
269    }
270}
271
272impl fmt::Debug for ParseError {
273    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
274        fmt::Display::fmt(self, f)
275    }
276}
277
278/// Error detail for [`ParseError`].
279#[derive(Debug)]
280pub enum ParseErrorKind {
281    /// Key contains invalid UTF-8.
282    KeyInvalidUt8,
283    /// Timestamp (key `ts`) is invalid.
284    InvalidTimestamp,
285    /// Log level (key `lvl`) is not valid.
286    InvalidLevel,
287    /// File and line number from where the message oriented (key `file`) is
288    /// invalid.
289    InvalidFile,
290    /// A value contains invalid UTF-8.
291    InvalidValue,
292    /// I/O error.
293    Io(io::Error),
294}
295
296#[doc(hidden)] // This is here for testing purposes.
297impl PartialEq for ParseErrorKind {
298    fn eq(&self, other: &Self) -> bool {
299        use ParseErrorKind::*;
300        match (&self, &other) {
301            (KeyInvalidUt8, KeyInvalidUt8)
302            | (InvalidTimestamp, InvalidTimestamp)
303            | (InvalidLevel, InvalidLevel)
304            | (InvalidFile, InvalidFile)
305            | (InvalidValue, InvalidValue) => true,
306            (Io(s_err), Io(o_err)) => match (s_err.raw_os_error(), o_err.raw_os_error()) {
307                (Some(s), Some(o)) => s == o,
308                _ => false,
309            },
310            _ => false,
311        }
312    }
313}
314
315impl fmt::Display for ParseErrorKind {
316    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
317        use ParseErrorKind::*;
318        let msg = match self {
319            KeyInvalidUt8 => "invalid UTF-8 in key",
320            InvalidTimestamp => "invalid timestamp",
321            InvalidLevel => "invalid level",
322            InvalidFile => "invalid file",
323            InvalidValue => "invalid UTF-8 in value",
324            Io(err) => return err.fmt(f),
325        };
326        f.write_str(msg)
327    }
328}
329
330/// Returns a single line.
331fn single_line<'a>(input: &'a [u8]) -> &'a [u8] {
332    let mut i = 0;
333    let mut quote_count = 0;
334    for b in input.iter().copied() {
335        match b {
336            b'"' => quote_count += 1,
337            // Ignore new lines inside quotes, e.g. in backtraces.
338            b'\n' if quote_count % 2 == 0 => break,
339            _ => {}
340        }
341        i += 1;
342    }
343    &input[..i]
344}
345
346/// Removes all spaces and tabs at the start of `input`. It does not remove new
347/// lines.
348fn eat_space<'a>(input: &'a [u8]) -> &'a [u8] {
349    let mut i = 0;
350    for b in input.iter().copied() {
351        if b != b' ' && b != b'\t' {
352            break;
353        }
354        i += 1;
355    }
356    &input[i..]
357}
358
359/// Same as [`eat_space`], but removes from the start of the input.
360fn eat_space_end<'a>(input: &'a [u8]) -> &'a [u8] {
361    let mut i = 0;
362    for b in input.iter().rev().copied() {
363        if b != b' ' && b != b'\t' {
364            break;
365        }
366        i += 1;
367    }
368    &input[..input.len() - i]
369}
370
371/// Calls both [`eat_space`] and [`eat_space_end`].
372fn eat_space_both<'a>(input: &'a [u8]) -> &'a [u8] {
373    eat_space(eat_space_end(input))
374}
375
376/// Parses a key, i.e. `key=`.
377fn parse_key<'a>(input: &'a [u8]) -> ParseResult<'a, &'a str> {
378    let mut i = 0;
379    for b in input.iter().copied() {
380        if b == b'=' {
381            break;
382        }
383        i += 1;
384    }
385    let (mut key_bytes, mut input) = input.split_at(i);
386    if !input.is_empty() {
387        input = &input[1..]; // Remove the `=`.
388    }
389    key_bytes = eat_space_both(key_bytes);
390    // Remove starting and ending quote, if any.
391    if let (Some(b'"'), Some(b'"')) = (key_bytes.first(), key_bytes.last()) {
392        key_bytes = eat_space_both(&key_bytes[1..key_bytes.len() - 1]);
393    }
394
395    match str::from_utf8(key_bytes) {
396        Ok(key) => Ok((input, key)),
397        Err(_) => Err(ParseErrorKind::KeyInvalidUt8),
398    }
399}
400
401/// Parse a timestamp with the format: `yyyy-mm-ddThh:mm:ss.nnnnnnZ`, e.g.
402/// `2021-02-23T13:15:48.624447Z`.
403fn parse_timestamp<'a>(value: &'a [u8]) -> Result<SystemTime, ParseErrorKind> {
404    // Invalid length or format.
405    if value.len() != 27
406        || value[4] != b'-'
407        || value[7] != b'-'
408        || value[10] != b'T'
409        || value[13] != b':'
410        || value[16] != b':'
411        || value[19] != b'.'
412        || value[26] != b'Z'
413    {
414        return Err(ParseErrorKind::InvalidTimestamp);
415    }
416    let value = match str::from_utf8(value) {
417        Ok(value) => value,
418        Err(_) => return Err(ParseErrorKind::InvalidTimestamp),
419    };
420
421    #[rustfmt::skip] // Rustfmt makes it 3 lines, it's fits on a single one just fine.
422    let year: i32 = value[0..4].parse().map_err(|_| ParseErrorKind::InvalidTimestamp)?;
423    #[rustfmt::skip]
424    let month: i32 = value[5..7].parse().map_err(|_| ParseErrorKind::InvalidTimestamp)?;
425    #[rustfmt::skip]
426    let day: i32 = value[8..10].parse().map_err(|_| ParseErrorKind::InvalidTimestamp)?;
427    #[rustfmt::skip]
428    let hour: i32 = value[11..13].parse().map_err(|_| ParseErrorKind::InvalidTimestamp)?;
429    #[rustfmt::skip]
430    let min: i32 = value[14..16].parse().map_err(|_| ParseErrorKind::InvalidTimestamp)?;
431    #[rustfmt::skip]
432    let sec: i32 = value[17..19].parse().map_err(|_| ParseErrorKind::InvalidTimestamp)?;
433    #[rustfmt::skip]
434    let nanos: u32 = value[20..26].parse().map_err(|_| ParseErrorKind::InvalidTimestamp)?;
435
436    // Convert the timestamp into the number of seconds sinch Unix Epoch.
437    let mut tm = libc::tm {
438        tm_sec: sec,
439        tm_min: min,
440        tm_hour: hour,
441        tm_mday: day,
442        tm_mon: month - 1,
443        tm_year: year - 1900,
444        tm_wday: 0,
445        tm_yday: 0,
446        tm_isdst: 0,
447        tm_gmtoff: 0,
448        tm_zone: std::ptr::null_mut(),
449    };
450    let time_offset = unsafe { libc::timegm(&mut tm) };
451    // Create the timestamp from the time offset and the nanosecond precision.
452    Ok(SystemTime::UNIX_EPOCH + Duration::new(time_offset as u64, nanos))
453}
454
455/// Parse a log level, using [`Level::from_str`].
456fn parse_log_level<'a>(value: &'a [u8]) -> Result<Level, ParseErrorKind> {
457    match str::from_utf8(value) {
458        Ok(value) => match value.parse() {
459            Ok(level) => Ok(level),
460            Err(_) => Err(ParseErrorKind::InvalidLevel),
461        },
462        Err(_) => Err(ParseErrorKind::InvalidLevel),
463    }
464}
465
466fn parse_string<'a>(value: &'a [u8]) -> Result<&'a str, ParseErrorKind> {
467    match str::from_utf8(value) {
468        Ok(value) => Ok(value),
469        Err(_) => Err(ParseErrorKind::InvalidValue),
470    }
471}
472
473/// Parse file value, format: `path/to/file:column`, e.g.
474/// `examples/simple.rs:51`.
475fn parse_file<'a>(value: &'a [u8]) -> Result<(&'a str, u32), ParseErrorKind> {
476    match str::from_utf8(value) {
477        Ok(value) => {
478            if let Some((file, column)) = value.rsplit_once(':') {
479                match column.parse() {
480                    Ok(column) => Ok((file, column)),
481                    Err(_) => Err(ParseErrorKind::InvalidFile),
482                }
483            } else {
484                Err(ParseErrorKind::InvalidFile)
485            }
486        }
487        Err(_) => Err(ParseErrorKind::InvalidFile),
488    }
489}
490
491/// Returns `(remaining_input, value)`.
492fn parse_value<'a>(input: &'a [u8]) -> (&'a [u8], &'a [u8]) {
493    let input = eat_space(input);
494    if input.first().copied() == Some(b'"') {
495        parse_quoted_value(input)
496    } else {
497        parse_naked_value(input)
498    }
499}
500
501/// See [`parse_value`], expects `input` to contain a quoted value, i.e. it
502/// starts and ends with `"`.
503fn parse_quoted_value<'a>(input: &'a [u8]) -> (&'a [u8], &'a [u8]) {
504    debug_assert!(input[0] == b'"');
505    let mut i = 1;
506    let mut quote_count = 1; // Support quotes inside quotes.
507    let bytes = input.iter().skip(1).copied();
508    // Set `i` to the index of the `=` of the next key-value pair.
509    for b in bytes {
510        match b {
511            b'"' => quote_count += 1,
512            b'=' if quote_count % 2 == 0 => break,
513            _ => {}
514        }
515        i += 1;
516    }
517
518    // This is include the key of the next key-value pair.
519    // Skip start quote.
520    let input_value = &input[1..i];
521    // Reduce `i` to index of the last quote (`"`) from the value.
522    for b in input_value.iter().rev().copied() {
523        i -= 1;
524        if b == b'"' {
525            break;
526        }
527    }
528
529    let value = &input[1..i]; // Skip start quote.
530    let input = if i == input.len() {
531        &[]
532    } else {
533        &input[i + 1..] // Skip end quote.
534    };
535    (input, value)
536}
537
538/// Parses a single value, expecting a space (` `) as value end.
539fn parse_naked_value<'a>(input: &'a [u8]) -> (&'a [u8], &'a [u8]) {
540    let mut i = 0;
541    for b in input.iter().copied() {
542        if b == b' ' || b == b'\n' {
543            break;
544        }
545        i += 1;
546    }
547    let value = &input[..i];
548    let input = &input[i..];
549    (input, value)
550}
551
552/// A parser log record.
553#[derive(Debug, PartialEq)]
554#[non_exhaustive]
555pub struct Record {
556    /// Timestamp *in UTC* (key `ts`).
557    pub timestamp: Option<SystemTime>,
558    /// Log level (key `lvl`).
559    pub level: Level,
560    /// Log message (key `msg`).
561    pub msg: String,
562    /// Log message (key `target`).
563    pub target: String,
564    /// Module that logged the message (key `module`).
565    pub module: Option<String>,
566    /// File and line number from where the message oriented (key `file`).
567    pub file: Option<(String, u32)>,
568    /// Additional key value pairs.
569    pub key_values: HashMap<String, Value>,
570}
571
572/// A parsed value from a key-value pair.
573///
574/// Note that parsing is done based on a best-effort basis, which means
575/// integers, floats etc. might actual be represented as a [`Value::String`].
576#[derive(Debug, PartialEq)]
577pub enum Value {
578    /// Parsed boolean.
579    Bool(bool),
580    /// Parsed integer.
581    Int(i64),
582    /// Parsed floating pointer number.
583    Float(f64),
584    /// Unparsed string.
585    String(String),
586}
587
588impl FromStr for Value {
589    /// This can always return [`Value::String`].
590    type Err = Infallible;
591
592    fn from_str(value: &str) -> Result<Self, Self::Err> {
593        if let Ok(b) = value.parse() {
594            Ok(Value::Bool(b))
595        } else if let Ok(i) = value.parse() {
596            Ok(Value::Int(i))
597        } else if let Ok(f) = value.parse() {
598            Ok(Value::Float(f))
599        } else {
600            Ok(Value::String(value.to_owned()))
601        }
602    }
603}
604
605impl Record {
606    /// Create a new empty record.
607    #[doc(hidden)] // This is only public for testing purposes.
608    pub fn empty() -> Record {
609        Record {
610            timestamp: None,
611            level: Level::Info,
612            msg: String::new(),
613            target: String::new(),
614            module: None,
615            file: None,
616            key_values: HashMap::new(),
617        }
618    }
619}