1use std::convert::{From, TryFrom};
2use std::str;
3
4use nom::{
5 branch::*, bytes::complete::*, character::complete::*, character::*, combinator::*, multi::*,
6 sequence::*, IResult,
7};
8
9use nom::character::complete::{i64 as dec_i64, u16 as dec_u16, u32 as dec_u32, u64 as dec_u64};
10
11use thiserror::Error;
12
13use crate::constants::*;
14use crate::*;
15
16#[derive(Debug)]
18pub struct Parser {
19 pub enriched: bool,
21 pub split_msg: bool,
23}
24
25impl Default for Parser {
26 fn default() -> Self {
27 Self {
28 enriched: true,
29 split_msg: true,
30 }
31 }
32}
33
34#[derive(Debug, Error)]
36pub enum ParseError {
37 #[error("cannot parse header: {}", String::from_utf8_lossy(.0))]
39 MalformedHeader(Vec<u8>),
40 #[error("cannot parse body: {}", String::from_utf8_lossy(.0))]
42 MalformedBody(Vec<u8>),
43 #[error("garbage at end of message: {}", String::from_utf8_lossy(.0))]
45 TrailingGarbage(Vec<u8>),
46 #[error("{id} ({ty}) can't hex-decode {}", String::from_utf8_lossy(.hex_str))]
48 HexDecodeError {
49 ty: MessageType,
50 id: EventID,
51 hex_str: Vec<u8>,
52 },
53}
54
55pub fn parse<'a>(raw: &[u8], skip_enriched: bool) -> Result<Message<'a>, ParseError> {
65 Parser {
66 enriched: !skip_enriched,
67 ..Parser::default()
68 }
69 .parse(raw)
70}
71
72impl Parser {
73 pub fn parse<'a, 'b>(&'a self, raw: &'a [u8]) -> Result<Message<'b>, ParseError> {
75 let (rest, (node, ty, id)) =
76 parse_header(raw).map_err(|_| ParseError::MalformedHeader(raw.to_vec()))?;
77
78 let (rest, kv) = self
79 .parse_body(rest, ty)
80 .map_err(|_| ParseError::MalformedBody(rest.to_vec()))?;
81
82 if !rest.is_empty() {
83 return Err(ParseError::TrailingGarbage(rest.to_vec()));
84 }
85
86 let node = node.map(|s| s.to_vec());
87
88 let mut body = Body::new();
89 for (k, v) in kv {
90 body.push((k, v));
91 }
92
93 Ok(Message { id, node, ty, body })
94 }
95
96 #[inline(always)]
99 fn parse_body<'a>(
100 &'a self,
101 input: &'a [u8],
102 ty: MessageType,
103 ) -> IResult<&'a [u8], Vec<(Key, Value<'a>)>> {
104 let (input, special) = match ty {
107 MessageType::AVC => opt(map(
108 tuple((
109 preceded(
110 pair(tag("avc:"), space0),
111 alt((tag("granted"), tag("denied"))),
112 ),
113 delimited(
114 tuple((space0, tag("{"), space0)),
115 many1(terminated(parse_identifier, space0)),
116 tuple((tag("}"), space0, tag("for"), space0)),
117 ),
118 )),
119 |(k, v)| {
120 (
121 Key::Name(NVec::from(k)),
122 Value::List(
123 v.iter()
124 .map(|e| Value::Str(e, Quote::None))
125 .collect::<Vec<_>>(),
126 ),
127 )
128 },
129 ))(input)?,
130 MessageType::TTY => {
131 let (input, _) = opt(tag("tty "))(input)?;
132 (input, None)
133 }
134 MessageType::MAC_POLICY_LOAD => {
135 let (input, _) = opt(tag("policy loaded "))(input)?;
136 (input, None)
137 }
138 _ => opt(map(
139 terminated(tag("netlabel"), pair(tag(":"), space0)),
140 |s| (Key::Name(NVec::from(s)), Value::Empty),
141 ))(input)?,
142 };
143
144 let (input, mut kv) = if !self.enriched {
145 terminated(
146 separated_list0(take_while1(|c| c == b' '), |input| self.parse_kv(input, ty)),
147 alt((
148 value((), tuple((tag("\x1d"), is_not("\n"), tag("\n")))),
149 value((), tag("\n")),
150 )),
151 )(input)?
152 } else {
153 terminated(
154 separated_list0(take_while1(|c| c == b' ' || c == b'\x1d'), |input| {
155 self.parse_kv(input, ty)
156 }),
157 newline,
158 )(input)?
159 };
160
161 if let Some(s) = special {
162 kv.push(s)
163 }
164
165 Ok((input, kv))
166 }
167
168 #[inline(always)]
170 fn parse_kv<'a>(
171 &'a self,
172 input: &'a [u8],
173 ty: MessageType,
174 ) -> IResult<&'a [u8], (Key, Value<'a>)> {
175 let (input, key) = match ty {
176 MessageType::EXECVE
178 if !input.is_empty() && input[0] == b'a' && !input.starts_with(b"argc") =>
179 {
180 terminated(
181 alt((parse_key_a_x_len, parse_key_a_xy, parse_key_a_x)),
182 tag("="),
183 )(input)
184 }
185 MessageType::SYSCALL => terminated(alt((parse_key_a_x, parse_key)), tag("="))(input),
187 _ => terminated(parse_key, tag("="))(input),
188 }?;
189
190 let (input, value) = match (ty, &key) {
191 (MessageType::SYSCALL, Key::Arg(_, None)) => map(
192 recognize(terminated(
193 many1_count(take_while1(is_hex_digit)),
194 peek(take_while1(is_sep)),
195 )),
196 |s| {
197 let ps = unsafe { str::from_utf8_unchecked(s) };
198 match u64::from_str_radix(ps, 16) {
199 Ok(n) => Value::Number(Number::Hex(n)),
200 Err(_) => Value::Str(s, Quote::None),
201 }
202 },
203 )(input)?,
204 (MessageType::SYSCALL, Key::Common(c)) => self.parse_common(input, ty, *c)?,
205 (MessageType::EXECVE, Key::Arg(_, _)) => parse_encoded(input)?,
206 (MessageType::EXECVE, Key::ArgLen(_)) => parse_dec(input)?,
207 (_, Key::Name(name)) => parse_named(input, ty, name)?,
208 (_, Key::Common(c)) => self.parse_common(input, ty, *c)?,
209 (_, Key::NameUID(name)) | (_, Key::NameGID(name)) => {
210 alt((parse_dec, |input| parse_unspec_value(input, ty, name)))(input)?
211 }
212 _ => parse_encoded(input)?,
213 };
214
215 Ok((input, (key, value)))
216 }
217
218 #[inline(always)]
219 fn parse_common<'a>(
220 &'a self,
221 input: &'a [u8],
222 ty: MessageType,
223 c: Common,
224 ) -> IResult<&'a [u8], Value<'a>> {
225 let name = <&str>::from(c).as_bytes();
226 match c {
227 Common::Arch | Common::CapFi | Common::CapFp | Common::CapFver => {
228 alt((parse_hex, |input| parse_unspec_value(input, ty, name)))(input)
229 }
230 Common::Argc
231 | Common::Exit
232 | Common::CapFe
233 | Common::Inode
234 | Common::Item
235 | Common::Items
236 | Common::Pid
237 | Common::PPid
238 | Common::Ses
239 | Common::Syscall => {
240 alt((parse_dec, |input| parse_unspec_value(input, ty, name)))(input)
241 }
242 Common::Success
243 | Common::Cwd
244 | Common::Dev
245 | Common::Tty
246 | Common::Comm
247 | Common::Exe
248 | Common::Name
249 | Common::Nametype
250 | Common::Subj
251 | Common::Key => {
252 alt((parse_encoded, |input| parse_unspec_value(input, ty, name)))(input)
253 }
254 Common::Mode => alt((parse_oct, |input| parse_unspec_value(input, ty, name)))(input),
255 Common::Msg => {
256 if self.split_msg {
257 alt((parse_kv_sq_as_map, |input| {
258 parse_unspec_value(input, ty, name)
259 }))(input)
260 } else {
261 alt((parse_encoded, |input| parse_unspec_value(input, ty, name)))(input)
262 }
263 }
264 }
265 }
266}
267
268#[inline(always)]
270#[allow(clippy::type_complexity)]
271fn parse_header(input: &[u8]) -> IResult<&[u8], (Option<&[u8]>, MessageType, EventID)> {
272 tuple((
273 opt(terminated(parse_node, is_a(" "))),
274 terminated(parse_type, is_a(" ")),
275 parse_msgid,
276 ))(input)
277}
278
279#[inline(always)]
281fn parse_node(input: &[u8]) -> IResult<&[u8], &[u8]> {
282 preceded(tag("node="), is_not(" \t\r\n"))(input)
283}
284
285#[inline(always)]
287fn parse_type(input: &[u8]) -> IResult<&[u8], MessageType> {
288 preceded(
289 tag("type="),
290 alt((
291 map_res(
292 recognize(many1_count(alt((alphanumeric1, tag("_"))))),
293 |s| {
294 EVENT_IDS
295 .get(s)
296 .ok_or(format!("unknown event id {}", String::from_utf8_lossy(s)))
297 .map(|n| MessageType(*n))
298 },
299 ),
300 map(delimited(tag("UNKNOWN["), dec_u32, tag("]")), MessageType),
301 )),
302 )(input)
303}
304
305#[inline(always)]
307fn parse_msgid(input: &[u8]) -> IResult<&[u8], EventID> {
308 map(
309 tuple((
310 preceded(tag("msg=audit("), dec_u64),
311 delimited(tag("."), dec_u64, tag(":")),
312 terminated(dec_u32, pair(tag("):"), space0)),
313 )),
314 |(sec, msec, sequence)| EventID {
315 timestamp: 1000 * sec + msec,
316 sequence,
317 },
318 )(input)
319}
320
321#[inline(always)]
322fn parse_named<'a>(input: &'a [u8], ty: MessageType, name: &[u8]) -> IResult<&'a [u8], Value<'a>> {
323 match FIELD_TYPES.get(name) {
324 Some(&FieldType::Encoded) => {
325 alt((parse_encoded, |input| parse_unspec_value(input, ty, name)))(input)
326 }
327 Some(&FieldType::NumericHex) => {
328 alt((parse_hex, |input| parse_unspec_value(input, ty, name)))(input)
329 }
330 Some(&FieldType::NumericDec) => {
331 alt((parse_dec, |input| parse_unspec_value(input, ty, name)))(input)
332 }
333 Some(&FieldType::NumericOct) => {
334 alt((parse_oct, |input| parse_unspec_value(input, ty, name)))(input)
335 }
336 _ => alt((parse_encoded, |input| parse_unspec_value(input, ty, name)))(input),
338 }
339}
340
341#[inline(always)]
345fn parse_encoded(input: &[u8]) -> IResult<&[u8], Value> {
346 alt((
347 map(parse_str_dq_safe, |s| Value::Str(s, Quote::Double)),
348 terminated(
349 map(
350 recognize(many1_count(take_while_m_n(2, 2, is_hex_digit))),
351 |hexstr: &[u8]| {
352 let mut recoded = Vec::with_capacity(hexstr.len() / 2);
353 for i in 0..hexstr.len() / 2 {
354 let d = unsafe { str::from_utf8_unchecked(&hexstr[2 * i..2 * i + 2]) };
355 recoded.push(u8::from_str_radix(d, 16).unwrap());
356 }
357 Value::Owned(recoded)
358 },
359 ),
360 peek(take_while1(is_sep)),
361 ),
362 terminated(
363 value(Value::Empty, alt((tag("(null)"), tag("?")))),
364 peek(take_while1(is_sep)),
365 ),
366 ))(input)
367}
368
369#[inline(always)]
371fn parse_hex(input: &[u8]) -> IResult<&[u8], Value> {
372 map_res(
373 terminated(take_while1(is_hex_digit), peek(take_while1(is_sep))),
374 |digits| -> Result<_, std::num::ParseIntError> {
375 let digits = unsafe { str::from_utf8_unchecked(digits) };
376 Ok(Value::Number(Number::Hex(u64::from_str_radix(digits, 16)?)))
377 },
378 )(input)
379}
380
381#[inline(always)]
383fn parse_dec(input: &[u8]) -> IResult<&[u8], Value> {
384 map(terminated(dec_i64, peek(take_while1(is_sep))), |n| {
385 Value::Number(Number::Dec(n))
386 })(input)
387}
388
389#[inline(always)]
391fn parse_oct(input: &[u8]) -> IResult<&[u8], Value> {
392 map_res(
393 terminated(take_while1(is_oct_digit), peek(take_while1(is_sep))),
394 |digits| -> Result<_, std::num::ParseIntError> {
395 let digits = unsafe { str::from_utf8_unchecked(digits) };
396 Ok(Value::Number(Number::Oct(u64::from_str_radix(digits, 8)?)))
397 },
398 )(input)
399}
400
401#[inline(always)]
402fn parse_unspec_value<'a>(
403 input: &'a [u8],
404 ty: MessageType,
405 name: &[u8],
406) -> IResult<&'a [u8], Value<'a>> {
407 match (ty, name) {
409 (_, b"subj") => {
410 if let Ok((input, s)) = recognize(tuple((
411 opt(tag("=")),
412 parse_str_unq,
413 opt(delimited(tag(" ("), parse_identifier, tag(")"))),
414 )))(input)
415 {
416 return Ok((input, Value::Str(s, Quote::None)));
417 }
418 }
419 (MessageType::AVC, b"info") => {
420 if let Ok((input, s)) = parse_str_dq(input) {
421 return Ok((input, Value::Str(s, Quote::None)));
422 }
423 }
424 (MessageType::SOCKADDR, b"SADDR") => {
425 let broken_string: IResult<&[u8], &[u8]> =
426 recognize(pair(tag("unknown family"), opt(take_till(is_sep))))(input);
427 if let Ok((input, s)) = broken_string {
428 return Ok((input, Value::Str(s, Quote::None)));
429 }
430 }
431 _ => (),
432 };
433
434 alt((
435 terminated(
436 map(take_while1(is_safe_unquoted_chr), |s| {
437 Value::Str(s, Quote::None)
438 }),
439 peek(take_while1(is_sep)),
440 ),
441 map(parse_kv_sq, |s| Value::Str(s, Quote::Single)),
442 map(parse_str_sq, |s| Value::Str(s, Quote::Single)),
443 map(parse_str_dq, |s| Value::Str(s, Quote::Double)),
444 map(parse_kv_braced, |s| Value::Str(s, Quote::Braces)),
445 map(parse_str_braced, |s| Value::Str(s, Quote::Braces)),
446 value(Value::Empty, peek(take_while1(is_sep))),
447 ))(input)
448}
449
450#[inline(always)]
451fn parse_str_sq(input: &[u8]) -> IResult<&[u8], &[u8]> {
452 delimited(tag("'"), take_while(|c| c != b'\''), tag("'"))(input)
453}
454
455#[inline(always)]
456fn parse_str_dq_safe(input: &[u8]) -> IResult<&[u8], &[u8]> {
457 delimited(tag("\""), take_while(is_safe_chr), tag("\""))(input)
458}
459
460#[inline(always)]
461fn parse_str_dq(input: &[u8]) -> IResult<&[u8], &[u8]> {
462 delimited(tag("\""), take_while(|c| c != b'"'), tag("\""))(input)
463}
464
465#[inline(always)]
466fn parse_str_braced(input: &[u8]) -> IResult<&[u8], &[u8]> {
467 delimited(tag("{ "), take_until(" }"), tag(" }"))(input)
468}
469
470#[inline(always)]
471fn parse_str_unq(input: &[u8]) -> IResult<&[u8], &[u8]> {
472 take_while(is_safe_chr)(input)
473}
474
475#[inline(always)]
476fn parse_str_unq_inside_sq(input: &[u8]) -> IResult<&[u8], &[u8]> {
477 take_while(|c| is_safe_chr(c) && c != b'\'')(input)
478}
479
480#[inline(always)]
481fn parse_str_words_inside_sq(input: &[u8]) -> IResult<&[u8], &[u8]> {
482 let mut rest = input;
483 loop {
484 (rest, _) = take_while(|c| !b"' ".contains(&c))(rest)?;
485 if alt((recognize(tuple((space1, parse_key, tag("=")))), tag("'")))(rest).is_ok() {
486 break;
487 }
488 (rest, _) = space1(rest)?;
489 }
490 let l = input.len() - rest.len();
491 Ok((rest, &input[..l]))
492}
493
494#[inline(always)]
496fn parse_kv_sq(input: &[u8]) -> IResult<&[u8], &[u8]> {
497 delimited(
498 tag("'"),
499 recognize(separated_list0(
500 tag(" "),
501 tuple((
502 recognize(pair(alpha1, many0_count(alt((alphanumeric1, is_a("-_")))))),
503 tag("="),
504 alt((parse_str_dq, parse_str_braced, parse_str_unq_inside_sq)),
505 )),
506 )),
507 tag("'"),
508 )(input)
509}
510
511#[inline(always)]
513fn parse_kv_sq_as_map(input: &[u8]) -> IResult<&[u8], Value> {
514 map(
515 delimited(
516 tag("'"),
517 separated_list0(
518 space1,
519 alt((separated_pair(
520 parse_key,
521 alt((
522 tag("="),
523 recognize(tuple((tag(":"), space0))), )),
525 alt((
526 parse_encoded,
527 map(parse_str_words_inside_sq, |v| Value::Str(v, Quote::None)),
528 map(parse_str_unq_inside_sq, |v| Value::Str(v, Quote::None)),
529 )),
530 ),)),
531 ),
532 tag("'"),
533 ),
534 Value::Map,
535 )(input)
536}
537
538#[inline(always)]
540fn parse_kv_braced(input: &[u8]) -> IResult<&[u8], &[u8]> {
541 delimited(
542 tag("{ "),
543 recognize(separated_list0(
544 tag(" "),
545 tuple((
546 recognize(pair(alpha1, many0_count(alt((alphanumeric1, is_a("-_")))))),
547 tag("="),
548 alt((parse_str_sq, parse_str_dq, parse_str_unq)),
549 )),
550 )),
551 tag(" }"),
552 )(input)
553}
554
555#[inline(always)]
557fn parse_key(input: &[u8]) -> IResult<&[u8], Key> {
558 map(
559 recognize(pair(alpha1, many0_count(alt((alphanumeric1, is_a("-_")))))),
560 |s: &[u8]| {
561 if let Ok(c) = Common::try_from(s) {
562 Key::Common(c)
563 } else if s.ends_with(b"uid") {
564 Key::NameUID(NVec::from(s))
565 } else if s.ends_with(b"gid") {
566 Key::NameGID(NVec::from(s))
567 } else {
568 Key::Name(NVec::from(s))
569 }
570 },
571 )(input)
572}
573
574#[inline(always)]
576fn parse_key_a_x_len(input: &[u8]) -> IResult<&[u8], Key> {
577 map(delimited(tag("a"), dec_u32, tag("_len")), Key::ArgLen)(input)
578}
579
580#[inline(always)]
582fn parse_key_a_xy(input: &[u8]) -> IResult<&[u8], Key> {
583 map(
584 pair(
585 preceded(tag("a"), dec_u32),
586 delimited(tag("["), dec_u16, tag("]")),
587 ),
588 |(x, y)| Key::Arg(x, Some(y)),
589 )(input)
590}
591
592#[inline(always)]
594fn parse_key_a_x(input: &[u8]) -> IResult<&[u8], Key> {
595 map(preceded(tag("a"), u32), |x| Key::Arg(x, None))(input)
596}
597
598#[inline(always)]
601fn parse_identifier(input: &[u8]) -> IResult<&[u8], &[u8]> {
602 recognize(pair(
603 alt((alpha1, tag("_"))),
604 many0_count(alt((alphanumeric1, tag("_")))),
605 ))(input)
606}
607
608#[inline(always)]
611fn is_safe_chr(c: u8) -> bool {
612 c == b'!' || (b'#'..=b'~').contains(&c)
613}
614
615#[inline(always)]
618fn is_safe_unquoted_chr(c: u8) -> bool {
619 (b'#'..=b'&').contains(&c) || (b'('..=b'z').contains(&c) || c == b'!' || c == b'|' || c == b'~'
620}
621
622#[inline(always)]
624fn is_sep(c: u8) -> bool {
625 c == b' ' || c == b'\x1d' || c == b'\n'
626}