muninn_query 0.5.0

Query langugage for muninn logging stack
Documentation
use crate::parser::utils::{IResult, LocatedSpan};
use nom::{
  branch::alt,
  bytes::complete::{is_not, take_while_m_n},
  character::complete::{char, multispace1},
  combinator::{map, map_opt, map_res, value, verify},
  multi::fold_many0,
  sequence::{delimited, preceded},
};

/// Parse a unicode sequence, of the form u{XXXX}, where XXXX is 1 to 6
/// hexadecimal numerals. We will combine this later with escaped_char
/// to parse sequences like \u{00AC}.
fn unicode(input: LocatedSpan) -> IResult<char> {
  // parses between 1 and 6 hexadecimal numerals.
  let hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit());

  // parses u{XXXX}.
  let delimited_hex = preceded(char('u'), delimited(char('{'), hex, char('}')));

  let u32 = map_res(delimited_hex, |hex: LocatedSpan| {
    u32::from_str_radix(*hex, 16)
  });

  // map_opt is like map_res, but it takes an Option instead of a Result. If
  // the function returns None, map_opt returns an error. In this case, because
  // not all u32 values are valid unicode code points, we have to fallibly
  // convert to char with from_u32.
  map_opt(u32, |value| std::char::from_u32(value))(input)
}

/// Parse an escaped character: \n, \t, \r, \u{00AC}, etc.
fn escaped_char(input: LocatedSpan) -> IResult<StringFragment> {
  preceded(
    char('\\'),
    alt((
      map(unicode, StringFragment::EscapedChar),
      value(StringFragment::EscapedChar('\n'), char('n')),
      value(StringFragment::EscapedChar('\r'), char('r')),
      value(StringFragment::EscapedChar('\t'), char('t')),
      value(StringFragment::EscapedChar('\\'), char('\\')),
      value(StringFragment::EscapedChar('"'), char('"')),
      value(StringFragment::Literal("\u{1B}*"), char('*')),
      value(StringFragment::Literal("\u{1B}?"), char('?')),
    )),
  )(input)
}

/// Parse a backslash, followed by any amount of whitespace. This is used later
/// to discard any escaped whitespace.
fn escaped_whitespace(input: LocatedSpan) -> IResult<LocatedSpan> {
  preceded(char('\\'), multispace1)(input)
}

/// Parse a non-empty block of text that doesn't include \ or "
fn literal(input: LocatedSpan) -> IResult<LocatedSpan> {
  verify(is_not("\"\\"), |s: &LocatedSpan| !s.is_empty())(input)
}

/// A string fragment contains a fragment of a string being parsed: either
/// a non-empty Literal (a series of non-escaped characters), a single
/// parsed escaped character, or a block of escaped whitespace.
#[derive(Debug, Clone, Copy, PartialEq)]
enum StringFragment<'a> {
  Literal(&'a str),
  EscapedChar(char),
  EscapedWS,
}

/// Combine literal, escaped_whitespace, and escaped_char
/// into a StringFragment.
fn fragment(input: LocatedSpan) -> IResult<StringFragment> {
  alt((
    map(literal, |res: LocatedSpan| StringFragment::Literal(*res)),
    escaped_char,
    value(StringFragment::EscapedWS, escaped_whitespace),
  ))(input)
}

/// Parse a string. Use a loop of fragment and push all of the fragments
/// into an output string.
pub(crate) fn string(input: LocatedSpan) -> IResult<String> {
  let build_string = fold_many0(fragment, String::new, |mut string, fragment| {
    match fragment {
      StringFragment::Literal(s) => string.push_str(s),
      StringFragment::EscapedChar(c) => string.push(c),
      StringFragment::EscapedWS => {}
    }
    string
  });

  // Finally, parse the string. Note that, if `build_string` could accept a raw
  // " character, the closing delimiter " would never match. When using
  // `delimited` with a looping parser (like fold_many0), be sure that the
  // loop won't accidentally match your closing delimiter!
  delimited(char('"'), build_string, char('"'))(input)
}

#[cfg(test)]
mod tests {
  use super::string;
  use crate::parser::utils::{span, unwrap_span};

  #[test]
  fn test_string() {
    assert_eq!(
      string(span("\"test\"")).map(unwrap_span),
      Ok(("", ("test".to_string())))
    );
    assert_eq!(
      string(span("\"a b\"")).map(unwrap_span),
      Ok(("", ("a b".to_string())))
    );

    assert_eq!(
      string(span("\"\\\"\"")).map(unwrap_span),
      Ok(("", ("\"".to_string())))
    );

    assert_eq!(
      string(span("\"\\\\\"\"")).map(unwrap_span),
      Ok(("\"", ("\\".to_string())))
    );

    assert_eq!(
      string(span("\"\\n\"")).map(unwrap_span),
      Ok(("", ("\n".to_string())))
    );
  }
}