linux_audit_parser/
parser.rs

1use std::convert::{From, TryFrom};
2use std::str;
3
4use nom::{
5    branch::*, bytes::complete::*, character::complete::*, character::*, combinator::*, multi::*,
6    sequence::*, IResult,
7};
8
9use nom::character::complete::{i64 as dec_i64, u16 as dec_u16, u32 as dec_u32, u64 as dec_u64};
10
11use thiserror::Error;
12
13use crate::constants::*;
14use crate::*;
15
16/// Parser for Linux Audit messages, with a few configurable options
17#[derive(Debug)]
18pub struct Parser {
19    /// Process enriched (i.e. ALL-CAPS keys). Default: true
20    pub enriched: bool,
21    /// Try to process common msg='…' strings into key/value maps. Default: true
22    pub split_msg: bool,
23}
24
25impl Default for Parser {
26    fn default() -> Self {
27        Self {
28            enriched: true,
29            split_msg: true,
30        }
31    }
32}
33
34/// Audit parser error type
35#[derive(Debug, Error)]
36pub enum ParseError {
37    /// The header (`type= … msg=audit(…):`) could not be parsed.
38    #[error("cannot parse header: {}", String::from_utf8_lossy(.0))]
39    MalformedHeader(Vec<u8>),
40    /// The body (everything after the event ID) could not be parsed.
41    #[error("cannot parse body: {}", String::from_utf8_lossy(.0))]
42    MalformedBody(Vec<u8>),
43    /// Garbage text was found at the end of the body.
44    #[error("garbage at end of message: {}", String::from_utf8_lossy(.0))]
45    TrailingGarbage(Vec<u8>),
46    /// A value in hexadecimal encoding could not be converted.
47    #[error("{id} ({ty}) can't hex-decode {}", String::from_utf8_lossy(.hex_str))]
48    HexDecodeError {
49        ty: MessageType,
50        id: EventID,
51        hex_str: Vec<u8>,
52    },
53}
54
55/// Parse a single log line as produced by _auditd(8)_
56///
57/// If `skip_enriched` is set and _auditd_ has been configured to
58/// produce `log_format=ENRICHED` logs, i.e. to resolve `uid`, `gid`,
59/// `syscall`, `arch`, `sockaddr` fields, those resolved values are
60/// dropped by the parser.
61///
62/// To maintain compatibility, `parse` does not attempt to process
63/// single-quoted `msg='…'` strings into key/value maps.
64pub fn parse<'a>(raw: &[u8], skip_enriched: bool) -> Result<Message<'a>, ParseError> {
65    Parser {
66        enriched: !skip_enriched,
67        ..Parser::default()
68    }
69    .parse(raw)
70}
71
72impl Parser {
73    /// Parse a single log line as produced by _auditd(8)_
74    pub fn parse<'a, 'b>(&'a self, raw: &'a [u8]) -> Result<Message<'b>, ParseError> {
75        let (rest, (node, ty, id)) =
76            parse_header(raw).map_err(|_| ParseError::MalformedHeader(raw.to_vec()))?;
77
78        let (rest, kv) = self
79            .parse_body(rest, ty)
80            .map_err(|_| ParseError::MalformedBody(rest.to_vec()))?;
81
82        if !rest.is_empty() {
83            return Err(ParseError::TrailingGarbage(rest.to_vec()));
84        }
85
86        let node = node.map(|s| s.to_vec());
87
88        let mut body = Body::new();
89        for (k, v) in kv {
90            body.push((k, v));
91        }
92
93        Ok(Message { id, node, ty, body })
94    }
95
96    /// Recognize the body: Multiple key/value pairs, with special cases
97    /// for some irregular messages
98    #[inline(always)]
99    fn parse_body<'a>(
100        &'a self,
101        input: &'a [u8],
102        ty: MessageType,
103    ) -> IResult<&'a [u8], Vec<(Key, Value<'a>)>> {
104        // Handle some corner cases that don't fit the general key=value
105        // scheme.
106        let (input, special) = match ty {
107            MessageType::AVC => opt(map(
108                tuple((
109                    preceded(
110                        pair(tag("avc:"), space0),
111                        alt((tag("granted"), tag("denied"))),
112                    ),
113                    delimited(
114                        tuple((space0, tag("{"), space0)),
115                        many1(terminated(parse_identifier, space0)),
116                        tuple((tag("}"), space0, tag("for"), space0)),
117                    ),
118                )),
119                |(k, v)| {
120                    (
121                        Key::Name(NVec::from(k)),
122                        Value::List(
123                            v.iter()
124                                .map(|e| Value::Str(e, Quote::None))
125                                .collect::<Vec<_>>(),
126                        ),
127                    )
128                },
129            ))(input)?,
130            MessageType::TTY => {
131                let (input, _) = opt(tag("tty "))(input)?;
132                (input, None)
133            }
134            MessageType::MAC_POLICY_LOAD => {
135                let (input, _) = opt(tag("policy loaded "))(input)?;
136                (input, None)
137            }
138            _ => opt(map(
139                terminated(tag("netlabel"), pair(tag(":"), space0)),
140                |s| (Key::Name(NVec::from(s)), Value::Empty),
141            ))(input)?,
142        };
143
144        let (input, mut kv) = if !self.enriched {
145            terminated(
146                separated_list0(take_while1(|c| c == b' '), |input| self.parse_kv(input, ty)),
147                alt((
148                    value((), tuple((tag("\x1d"), is_not("\n"), tag("\n")))),
149                    value((), tag("\n")),
150                )),
151            )(input)?
152        } else {
153            terminated(
154                separated_list0(take_while1(|c| c == b' ' || c == b'\x1d'), |input| {
155                    self.parse_kv(input, ty)
156                }),
157                newline,
158            )(input)?
159        };
160
161        if let Some(s) = special {
162            kv.push(s)
163        }
164
165        Ok((input, kv))
166    }
167
168    /// Recognize one key/value pair
169    #[inline(always)]
170    fn parse_kv<'a>(
171        &'a self,
172        input: &'a [u8],
173        ty: MessageType,
174    ) -> IResult<&'a [u8], (Key, Value<'a>)> {
175        let (input, key) = match ty {
176            // Special case for execve arguments: aX, aX[Y], aX_len
177            MessageType::EXECVE
178                if !input.is_empty() && input[0] == b'a' && !input.starts_with(b"argc") =>
179            {
180                terminated(
181                    alt((parse_key_a_x_len, parse_key_a_xy, parse_key_a_x)),
182                    tag("="),
183                )(input)
184            }
185            // Special case for syscall params: aX
186            MessageType::SYSCALL => terminated(alt((parse_key_a_x, parse_key)), tag("="))(input),
187            _ => terminated(parse_key, tag("="))(input),
188        }?;
189
190        let (input, value) = match (ty, &key) {
191            (MessageType::SYSCALL, Key::Arg(_, None)) => map(
192                recognize(terminated(
193                    many1_count(take_while1(is_hex_digit)),
194                    peek(take_while1(is_sep)),
195                )),
196                |s| {
197                    let ps = unsafe { str::from_utf8_unchecked(s) };
198                    match u64::from_str_radix(ps, 16) {
199                        Ok(n) => Value::Number(Number::Hex(n)),
200                        Err(_) => Value::Str(s, Quote::None),
201                    }
202                },
203            )(input)?,
204            (MessageType::SYSCALL, Key::Common(c)) => self.parse_common(input, ty, *c)?,
205            (MessageType::EXECVE, Key::Arg(_, _)) => parse_encoded(input)?,
206            (MessageType::EXECVE, Key::ArgLen(_)) => parse_dec(input)?,
207            (_, Key::Name(name)) => parse_named(input, ty, name)?,
208            (_, Key::Common(c)) => self.parse_common(input, ty, *c)?,
209            (_, Key::NameUID(name)) | (_, Key::NameGID(name)) => {
210                alt((parse_dec, |input| parse_unspec_value(input, ty, name)))(input)?
211            }
212            _ => parse_encoded(input)?,
213        };
214
215        Ok((input, (key, value)))
216    }
217
218    #[inline(always)]
219    fn parse_common<'a>(
220        &'a self,
221        input: &'a [u8],
222        ty: MessageType,
223        c: Common,
224    ) -> IResult<&'a [u8], Value<'a>> {
225        let name = <&str>::from(c).as_bytes();
226        match c {
227            Common::Arch | Common::CapFi | Common::CapFp | Common::CapFver => {
228                alt((parse_hex, |input| parse_unspec_value(input, ty, name)))(input)
229            }
230            Common::Argc
231            | Common::Exit
232            | Common::CapFe
233            | Common::Inode
234            | Common::Item
235            | Common::Items
236            | Common::Pid
237            | Common::PPid
238            | Common::Ses
239            | Common::Syscall => {
240                alt((parse_dec, |input| parse_unspec_value(input, ty, name)))(input)
241            }
242            Common::Success
243            | Common::Cwd
244            | Common::Dev
245            | Common::Tty
246            | Common::Comm
247            | Common::Exe
248            | Common::Name
249            | Common::Nametype
250            | Common::Subj
251            | Common::Key => {
252                alt((parse_encoded, |input| parse_unspec_value(input, ty, name)))(input)
253            }
254            Common::Mode => alt((parse_oct, |input| parse_unspec_value(input, ty, name)))(input),
255            Common::Msg => {
256                if self.split_msg {
257                    alt((parse_kv_sq_as_map, |input| {
258                        parse_unspec_value(input, ty, name)
259                    }))(input)
260                } else {
261                    alt((parse_encoded, |input| parse_unspec_value(input, ty, name)))(input)
262                }
263            }
264        }
265    }
266}
267
268/// Recognize the header: node, type, event identifier
269#[inline(always)]
270#[allow(clippy::type_complexity)]
271fn parse_header(input: &[u8]) -> IResult<&[u8], (Option<&[u8]>, MessageType, EventID)> {
272    tuple((
273        opt(terminated(parse_node, is_a(" "))),
274        terminated(parse_type, is_a(" ")),
275        parse_msgid,
276    ))(input)
277}
278
279/// Recognize the node name
280#[inline(always)]
281fn parse_node(input: &[u8]) -> IResult<&[u8], &[u8]> {
282    preceded(tag("node="), is_not(" \t\r\n"))(input)
283}
284
285/// Recognize event type
286#[inline(always)]
287fn parse_type(input: &[u8]) -> IResult<&[u8], MessageType> {
288    preceded(
289        tag("type="),
290        alt((
291            map_res(
292                recognize(many1_count(alt((alphanumeric1, tag("_"))))),
293                |s| {
294                    EVENT_IDS
295                        .get(s)
296                        .ok_or(format!("unknown event id {}", String::from_utf8_lossy(s)))
297                        .map(|n| MessageType(*n))
298                },
299            ),
300            map(delimited(tag("UNKNOWN["), dec_u32, tag("]")), MessageType),
301        )),
302    )(input)
303}
304
305/// Recognize the "msg=audit(…):" event identifier
306#[inline(always)]
307fn parse_msgid(input: &[u8]) -> IResult<&[u8], EventID> {
308    map(
309        tuple((
310            preceded(tag("msg=audit("), dec_u64),
311            delimited(tag("."), dec_u64, tag(":")),
312            terminated(dec_u32, pair(tag("):"), space0)),
313        )),
314        |(sec, msec, sequence)| EventID {
315            timestamp: 1000 * sec + msec,
316            sequence,
317        },
318    )(input)
319}
320
321#[inline(always)]
322fn parse_named<'a>(input: &'a [u8], ty: MessageType, name: &[u8]) -> IResult<&'a [u8], Value<'a>> {
323    match FIELD_TYPES.get(name) {
324        Some(&FieldType::Encoded) => {
325            alt((parse_encoded, |input| parse_unspec_value(input, ty, name)))(input)
326        }
327        Some(&FieldType::NumericHex) => {
328            alt((parse_hex, |input| parse_unspec_value(input, ty, name)))(input)
329        }
330        Some(&FieldType::NumericDec) => {
331            alt((parse_dec, |input| parse_unspec_value(input, ty, name)))(input)
332        }
333        Some(&FieldType::NumericOct) => {
334            alt((parse_oct, |input| parse_unspec_value(input, ty, name)))(input)
335        }
336        // FIXME: Some(&FieldType::Numeric)
337        _ => alt((parse_encoded, |input| parse_unspec_value(input, ty, name)))(input),
338    }
339}
340
341/// Recognize encoded value:
342///
343/// May be double-quoted string, hex-encoded blob, (null), ?.
344#[inline(always)]
345fn parse_encoded(input: &[u8]) -> IResult<&[u8], Value> {
346    alt((
347        map(parse_str_dq_safe, |s| Value::Str(s, Quote::Double)),
348        terminated(
349            map(
350                recognize(many1_count(take_while_m_n(2, 2, is_hex_digit))),
351                |hexstr: &[u8]| {
352                    let mut recoded = Vec::with_capacity(hexstr.len() / 2);
353                    for i in 0..hexstr.len() / 2 {
354                        let d = unsafe { str::from_utf8_unchecked(&hexstr[2 * i..2 * i + 2]) };
355                        recoded.push(u8::from_str_radix(d, 16).unwrap());
356                    }
357                    Value::Owned(recoded)
358                },
359            ),
360            peek(take_while1(is_sep)),
361        ),
362        terminated(
363            value(Value::Empty, alt((tag("(null)"), tag("?")))),
364            peek(take_while1(is_sep)),
365        ),
366    ))(input)
367}
368
369/// Recognize hexadecimal value
370#[inline(always)]
371fn parse_hex(input: &[u8]) -> IResult<&[u8], Value> {
372    map_res(
373        terminated(take_while1(is_hex_digit), peek(take_while1(is_sep))),
374        |digits| -> Result<_, std::num::ParseIntError> {
375            let digits = unsafe { str::from_utf8_unchecked(digits) };
376            Ok(Value::Number(Number::Hex(u64::from_str_radix(digits, 16)?)))
377        },
378    )(input)
379}
380
381/// Recognize decimal value
382#[inline(always)]
383fn parse_dec(input: &[u8]) -> IResult<&[u8], Value> {
384    map(terminated(dec_i64, peek(take_while1(is_sep))), |n| {
385        Value::Number(Number::Dec(n))
386    })(input)
387}
388
389/// Recognize octal value
390#[inline(always)]
391fn parse_oct(input: &[u8]) -> IResult<&[u8], Value> {
392    map_res(
393        terminated(take_while1(is_oct_digit), peek(take_while1(is_sep))),
394        |digits| -> Result<_, std::num::ParseIntError> {
395            let digits = unsafe { str::from_utf8_unchecked(digits) };
396            Ok(Value::Number(Number::Oct(u64::from_str_radix(digits, 8)?)))
397        },
398    )(input)
399}
400
401#[inline(always)]
402fn parse_unspec_value<'a>(
403    input: &'a [u8],
404    ty: MessageType,
405    name: &[u8],
406) -> IResult<&'a [u8], Value<'a>> {
407    // work around apparent AppArmor breakage
408    match (ty, name) {
409        (_, b"subj") => {
410            if let Ok((input, s)) = recognize(tuple((
411                opt(tag("=")),
412                parse_str_unq,
413                opt(delimited(tag(" ("), parse_identifier, tag(")"))),
414            )))(input)
415            {
416                return Ok((input, Value::Str(s, Quote::None)));
417            }
418        }
419        (MessageType::AVC, b"info") => {
420            if let Ok((input, s)) = parse_str_dq(input) {
421                return Ok((input, Value::Str(s, Quote::None)));
422            }
423        }
424        (MessageType::SOCKADDR, b"SADDR") => {
425            let broken_string: IResult<&[u8], &[u8]> =
426                recognize(pair(tag("unknown family"), opt(take_till(is_sep))))(input);
427            if let Ok((input, s)) = broken_string {
428                return Ok((input, Value::Str(s, Quote::None)));
429            }
430        }
431        _ => (),
432    };
433
434    alt((
435        terminated(
436            map(take_while1(is_safe_unquoted_chr), |s| {
437                Value::Str(s, Quote::None)
438            }),
439            peek(take_while1(is_sep)),
440        ),
441        map(parse_kv_sq, |s| Value::Str(s, Quote::Single)),
442        map(parse_str_sq, |s| Value::Str(s, Quote::Single)),
443        map(parse_str_dq, |s| Value::Str(s, Quote::Double)),
444        map(parse_kv_braced, |s| Value::Str(s, Quote::Braces)),
445        map(parse_str_braced, |s| Value::Str(s, Quote::Braces)),
446        value(Value::Empty, peek(take_while1(is_sep))),
447    ))(input)
448}
449
450#[inline(always)]
451fn parse_str_sq(input: &[u8]) -> IResult<&[u8], &[u8]> {
452    delimited(tag("'"), take_while(|c| c != b'\''), tag("'"))(input)
453}
454
455#[inline(always)]
456fn parse_str_dq_safe(input: &[u8]) -> IResult<&[u8], &[u8]> {
457    delimited(tag("\""), take_while(is_safe_chr), tag("\""))(input)
458}
459
460#[inline(always)]
461fn parse_str_dq(input: &[u8]) -> IResult<&[u8], &[u8]> {
462    delimited(tag("\""), take_while(|c| c != b'"'), tag("\""))(input)
463}
464
465#[inline(always)]
466fn parse_str_braced(input: &[u8]) -> IResult<&[u8], &[u8]> {
467    delimited(tag("{ "), take_until(" }"), tag(" }"))(input)
468}
469
470#[inline(always)]
471fn parse_str_unq(input: &[u8]) -> IResult<&[u8], &[u8]> {
472    take_while(is_safe_chr)(input)
473}
474
475#[inline(always)]
476fn parse_str_unq_inside_sq(input: &[u8]) -> IResult<&[u8], &[u8]> {
477    take_while(|c| is_safe_chr(c) && c != b'\'')(input)
478}
479
480#[inline(always)]
481fn parse_str_words_inside_sq(input: &[u8]) -> IResult<&[u8], &[u8]> {
482    let mut rest = input;
483    loop {
484        (rest, _) = take_while(|c| !b"' ".contains(&c))(rest)?;
485        if alt((recognize(tuple((space1, parse_key, tag("=")))), tag("'")))(rest).is_ok() {
486            break;
487        }
488        (rest, _) = space1(rest)?;
489    }
490    let l = input.len() - rest.len();
491    Ok((rest, &input[..l]))
492}
493
494/// More "correct" variant of parse_str_sq
495#[inline(always)]
496fn parse_kv_sq(input: &[u8]) -> IResult<&[u8], &[u8]> {
497    delimited(
498        tag("'"),
499        recognize(separated_list0(
500            tag(" "),
501            tuple((
502                recognize(pair(alpha1, many0_count(alt((alphanumeric1, is_a("-_")))))),
503                tag("="),
504                alt((parse_str_dq, parse_str_braced, parse_str_unq_inside_sq)),
505            )),
506        )),
507        tag("'"),
508    )(input)
509}
510
511/// Recognize a map enclosed in single quotes
512#[inline(always)]
513fn parse_kv_sq_as_map(input: &[u8]) -> IResult<&[u8], Value> {
514    map(
515        delimited(
516            tag("'"),
517            separated_list0(
518                space1,
519                alt((separated_pair(
520                    parse_key,
521                    alt((
522                        tag("="),
523                        recognize(tuple((tag(":"), space0))), // for 'avc:  mumble mumble mumble …'
524                    )),
525                    alt((
526                        parse_encoded,
527                        map(parse_str_words_inside_sq, |v| Value::Str(v, Quote::None)),
528                        map(parse_str_unq_inside_sq, |v| Value::Str(v, Quote::None)),
529                    )),
530                ),)),
531            ),
532            tag("'"),
533        ),
534        Value::Map,
535    )(input)
536}
537
538/// More "correct" variant of parse_str_braced
539#[inline(always)]
540fn parse_kv_braced(input: &[u8]) -> IResult<&[u8], &[u8]> {
541    delimited(
542        tag("{ "),
543        recognize(separated_list0(
544            tag(" "),
545            tuple((
546                recognize(pair(alpha1, many0_count(alt((alphanumeric1, is_a("-_")))))),
547                tag("="),
548                alt((parse_str_sq, parse_str_dq, parse_str_unq)),
549            )),
550        )),
551        tag(" }"),
552    )(input)
553}
554
555/// Recognize regular keys of key/value pairs
556#[inline(always)]
557fn parse_key(input: &[u8]) -> IResult<&[u8], Key> {
558    map(
559        recognize(pair(alpha1, many0_count(alt((alphanumeric1, is_a("-_")))))),
560        |s: &[u8]| {
561            if let Ok(c) = Common::try_from(s) {
562                Key::Common(c)
563            } else if s.ends_with(b"uid") {
564                Key::NameUID(NVec::from(s))
565            } else if s.ends_with(b"gid") {
566                Key::NameGID(NVec::from(s))
567            } else {
568                Key::Name(NVec::from(s))
569            }
570        },
571    )(input)
572}
573
574/// Recognize length specifier for EXECVE split arguments, e.g. a1_len
575#[inline(always)]
576fn parse_key_a_x_len(input: &[u8]) -> IResult<&[u8], Key> {
577    map(delimited(tag("a"), dec_u32, tag("_len")), Key::ArgLen)(input)
578}
579
580/// Recognize EXECVE split arguments, e.g. a1[3]
581#[inline(always)]
582fn parse_key_a_xy(input: &[u8]) -> IResult<&[u8], Key> {
583    map(
584        pair(
585            preceded(tag("a"), dec_u32),
586            delimited(tag("["), dec_u16, tag("]")),
587        ),
588        |(x, y)| Key::Arg(x, Some(y)),
589    )(input)
590}
591
592/// Recognize SYSCALL, EXECVE regular argument keys, e.g. a1, a2, a3…
593#[inline(always)]
594fn parse_key_a_x(input: &[u8]) -> IResult<&[u8], Key> {
595    map(preceded(tag("a"), u32), |x| Key::Arg(x, None))(input)
596}
597
598/// Recognize identifiers (used in some irregular messages)
599/// Like [A-Za-z_][A-Za-z0-9_]*
600#[inline(always)]
601fn parse_identifier(input: &[u8]) -> IResult<&[u8], &[u8]> {
602    recognize(pair(
603        alt((alpha1, tag("_"))),
604        many0_count(alt((alphanumeric1, tag("_")))),
605    ))(input)
606}
607
608/// Characters permitted in kernel "encoded" strings that would
609/// otherwise be hex-encoded.
610#[inline(always)]
611fn is_safe_chr(c: u8) -> bool {
612    c == b'!' || (b'#'..=b'~').contains(&c)
613}
614
615/// Characters permitted in kernel "encoded" strings, minus
616/// single-quotes, braces
617#[inline(always)]
618fn is_safe_unquoted_chr(c: u8) -> bool {
619    (b'#'..=b'&').contains(&c) || (b'('..=b'z').contains(&c) || c == b'!' || c == b'|' || c == b'~'
620}
621
622/// Separator characters
623#[inline(always)]
624fn is_sep(c: u8) -> bool {
625    c == b' ' || c == b'\x1d' || c == b'\n'
626}