logparse 0.2.0

parse arbitrary messages containing rust-like debug output to syntax highlight them
Documentation
use proptest_derive::Arbitrary;
use std::borrow::Cow;

/// See [`Token::Separated`].
#[derive(Copy, Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum Separator {
    Eq,
    Colon,
    DoubleColon,
}

/// See [`Token::String`].
#[derive(Copy, Clone, Debug, Arbitrary, PartialEq)]
#[allow(missing_docs)]
pub enum QuoteType {
    Single,
    Double,
    Backtick,
}

/// See [`Token::Delimited`].
#[derive(Clone, Debug, Arbitrary, PartialEq)]
#[allow(missing_docs)]
pub enum Delimiter {
    Paren,
    Bracket,
    Brace,
    Angle,
}

/// See [`Token::String`].
#[derive(Clone, Debug, PartialEq)]
pub struct AnyString<'a> {
    /// Any text immediately preceeding the string, like the `f` prefix in `f"mrrp"`.
    pub prefix: Cow<'a, str>,
    /// The kind of string quote used.
    pub ty: QuoteType,
    /// The string's text.
    pub contents: Cow<'a, str>,
    /// How may hashtags were used around the string.
    /// Stings have to open and close with an equal number of hashtags.
    pub num_hashtags: usize,
    /// Any text immediately following the string.
    pub suffix: Cow<'a, str>,
}

/// Any sequence of 0 or more spaces.
#[derive(Clone, Debug, PartialEq)]
pub struct Space<'a>(pub Cow<'a, str>);

/// See [`Token::Path`].
#[derive(Copy, Clone, Debug, PartialEq, Arbitrary)]
#[allow(missing_docs)]
pub enum PathSep {
    /// Happens at the start of paths, for the no leading / case
    None,
    Slash,
    Backslash,
}

/// A segment of a path, with a leading separator. See [`Token::Path`].
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub struct PathSegment<'a> {
    pub leading_separator: PathSep,
    pub segment: Cow<'a, str>,
}

/// See [`Path`]. This is the `:3:4` part in `mrrp.rs:3:4`.
#[derive(Clone, Debug, PartialEq)]
pub struct FileLocation<'a> {
    /// Line number.
    pub line: Cow<'a, str>,
    /// Optionally, a column offset.
    pub offset: Option<Cow<'a, str>>,
}

/// See [`Path`].
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub struct FileName<'a> {
    pub leading_separator: PathSep,
    pub segment: Cow<'a, str>,
    pub ext_excluding_dot: Option<Cow<'a, str>>,
    pub location: Option<FileLocation<'a>>,
}

/// See [`Token::Path`].
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub struct Path<'a> {
    pub drive_excluding_colon: Option<char>,
    pub segments: Vec<PathSegment<'a>>,

    /// A filename: the last path segment, optionally with an extension and file/line number.
    pub filename: FileName<'a>,
}

/// See [`Token::Number`].
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub struct Number<'a> {
    pub number: Cow<'a, str>,
    pub suffix_without_underscore: Option<Cow<'a, str>>,
}

/// Anything that doesn't contain spaces, and that can be a prefix of `Delimited`.
/// i.e. an english word, or rust `::`-separated Path
#[derive(Clone, Debug, PartialEq)]
pub enum Atom<'a> {
    /// Raw text
    Text(Cow<'a, str>),
}

/// The main AST node: each of these are treated as one unit, that may be separated by spaces.
#[derive(Clone, Debug, PartialEq)]
pub enum Token<'a> {
    /// A literal `true`.
    /// For example:
    ///
    /// ```rust
    /// # logparse::generate_ast_recognizer!(is_true, Token::True);
    /// assert!(is_true("true"));
    ///
    /// // some counterexamples
    /// assert!(!is_true("14"));
    /// assert!(!is_true("`mrow!`"));
    /// assert!(!is_true("false"));
    /// ```
    True,
    /// A literal `false`.
    /// For example:
    ///
    /// ```rust
    /// # logparse::generate_ast_recognizer!(is_false, Token::False);
    /// assert!(is_false("false"));
    ///
    /// // some counterexamples
    /// assert!(!is_false("14"));
    /// assert!(!is_false("`mrow!`"));
    /// assert!(!is_false("true"));
    /// ```
    False,
    /// A literal `None`.
    /// For example:
    ///
    /// ```rust
    /// # logparse::generate_ast_recognizer!(is_none, Token::None);
    /// assert!(is_none("None"));
    ///
    /// // some counterexamples
    /// assert!(!is_none("14"));
    /// assert!(!is_none("`mrow!`"));
    /// assert!(!is_none("true"));
    /// ```
    None,

    /// A lifetime, a la rust. The rules for this are pretty restrictive.
    /// A lifetime must be, a single quote, followed by no more than 3 ascii lowercase alphabetic characters,
    /// followed by *not* a closing quote, and any nonalphabetic character. Like a comma,
    /// whitespace, eof, etc.
    ///
    /// ```
    /// # logparse::generate_ast_recognizer!(is_lt, Token::Lifetime(_));
    ///
    /// assert!(is_lt("'a"));
    /// assert!(is_lt("'tcx"));
    ///
    /// // some counterexamples
    ///
    /// assert!(!is_lt("'verylong"));
    /// assert!(!is_lt("'foo'"));
    /// assert!(!is_lt("'a'"));
    /// assert!(!is_lt("'a longer single-quoted string'"));
    /// assert!(!is_lt("a"));
    /// assert!(!is_lt("13"));
    /// ``
    Lifetime(Cow<'a, str>),

    /// A path, anything that looks vaguely path-like.
    /// For example:
    ///
    /// ```rust
    /// # logparse::generate_ast_recognizer!(is_path, Token::Path(_));
    /// assert!(is_path("/"));
    /// assert!(is_path("/home"));
    /// assert!(is_path("C:/Users"));
    /// assert!(is_path("C:\\Users"));
    /// assert!(is_path("/home/some_path/file-some_where.rs"));
    /// assert!(is_path("/home/some_path/file-and-line.rs:3"));
    /// assert!(is_path("/home/some_path/file-and-line.rs:3:4"));
    ///
    /// // some counterexamples
    /// assert!(!is_path(":14"));
    /// assert!(!is_path(":14:15"));
    /// assert!(!is_path("14"));
    /// assert!(!is_path("`mrow!`"));
    /// assert!(!is_path("true"));
    /// ```
    Path(Path<'a>),
    /// A string literal, with quite flexible rules on what's considered a string.
    /// Any rust string literal parses, and a little more.
    /// For example:
    ///
    /// ```rust
    /// # logparse::generate_ast_recognizer!(is_string, Token::String(_));
    /// assert!(is_string(r##""mrrp""##));
    /// assert!(is_string(r##"'mrrp'"##));
    /// assert!(is_string(r##"`mrrp`"##));
    /// assert!(is_string(r##"b"mrrp""##));
    /// assert!(is_string(r##"f"mrrp""##));
    /// assert!(is_string(r##"c"mrrp""##));
    /// assert!(is_string(r##"r"mrrp""##));
    /// assert!(is_string(r##"r#"mrrp"#"##));
    /// assert!(is_string(r##"b#"mrrp"#"##));
    /// assert!(is_string(r####"anyprefix###"mrrp"###anysuffix"####));
    ///
    /// // some counterexamples
    /// assert!(!is_string("14"));
    /// assert!(!is_string("true"));
    /// assert!(!is_string("/home"));
    ///
    /// // non-matching hashtags
    /// assert!(!is_string(r#"r#"mrrp""#));
    /// assert!(!is_string(r###"r#"mrrp"##"###));
    /// assert!(!is_string(r##"r"mrrp"#"##));
    ///
    /// // non-matching quotes
    /// assert!(!is_string(r#"`mrrp""#));
    /// assert!(!is_string(r#"`mrrp'"#));
    /// ```
    String(AnyString<'a>),
    /// A number, float or int.
    /// There must not be any alphabetic character after the number, without a space inbetween.
    /// That's to guard against finding numbers inside hashes for example.
    ///
    /// A suffix is allowed, however, as long as its separated by an underscore.
    ///
    /// For example:
    ///
    /// ```rust
    /// # logparse::generate_ast_recognizer!(is_number, Token::Number(_));
    /// assert!(is_number("1"));
    /// assert!(is_number("99999999"));
    /// assert!(is_number("1.5"));
    /// assert!(is_number("1e10"));
    /// assert!(is_number("-1"));
    /// assert!(is_number("-1.5"));
    ///
    /// // with suffix
    /// assert!(is_number("10_usize"));
    ///
    /// // some counterexamples
    /// assert!(!is_number("`mrow!`"));
    /// assert!(!is_number("true"));
    ///
    /// // suffix
    /// assert!(!is_number("14a"));
    /// ```
    Number(Number<'a>),

    // TODO: RustPath
    /// Any token, separated by a [`Separator`], followed by another segment.
    /// For example:
    ///
    /// ```rust
    /// # logparse::generate_ast_recognizer!(is_sep, Token::Separated {..});
    /// assert!(is_sep("a = 3"));
    /// assert!(is_sep("a::b::c"));
    /// assert!(is_sep("a: 5"));
    ///
    /// // some counterexamples
    /// assert!(!is_sep("`mrow!`"));
    /// assert!(!is_sep("true"));
    /// assert!(!is_sep("14"));
    /// ```
    Separated {
        /// The part before the separator.
        before: Box<Token<'a>>,
        /// The space between the `before` part, and the separator.
        space_before: Space<'a>,
        /// The separator itself.
        separator: Separator,
        /// The segment after the separator.
        after: Box<Segment<'a>>,
    },
    /// A segment, delimited by parentheses, braces or brackets, with an optional prefix.
    /// The prefix will be classified as a constructor if the delimiter is braces or parentheses.
    ///
    /// For example:
    ///
    /// ```rust
    /// # logparse::generate_ast_recognizer!(is_delim, Token::Delimited(_));
    /// assert!(is_delim("()"));
    /// assert!(is_delim("[]"));
    /// assert!(is_delim("{}"));
    /// assert!(is_delim("(1)"));
    /// assert!(is_delim("[1]"));
    /// assert!(is_delim("{1}"));
    /// assert!(is_delim("{/home/mrrp.rs}"));
    /// assert!(is_delim("constructor{/home/mrrp.rs}"));
    /// assert!(is_delim("constructor {/home/mrrp.rs}"));
    ///
    /// // some counterexamples
    /// assert!(!is_delim("`mrow!`"));
    /// assert!(!is_delim("true"));
    /// assert!(!is_delim("14"));
    /// ```
    Delimited(Delimited<'a>),

    /// Any other text, that couldn't otherwise be categorized.
    /// For example:
    ///
    /// ```rust
    /// # logparse::generate_ast_recognizer!(is_atom, Token::Atom(_));
    /// assert!(is_atom("mrrp"));
    ///
    /// // some counterexamples
    /// assert!(!is_atom("`mrow!`"));
    /// assert!(!is_atom("true"));
    /// assert!(!is_atom("14"));
    /// ```
    Atom(Atom<'a>),
}

/// See [`Token::Delimited`].
#[derive(Clone, Debug, PartialEq)]
pub struct Delimited<'a> {
    /// Delimiter prefix, i.e. the constructor part in `Some(mrrp)`.
    pub prefix: Option<(Atom<'a>, Space<'a>)>,
    /// The kind of delimiter itself.
    pub delimiter: Delimiter,
    /// The contents of the delimited segment.
    pub contents: Segments<'a>,
}

/// A Segment of text, with optional leading space.
#[derive(Clone, Debug, PartialEq)]
pub struct Segment<'a> {
    /// Any spaces (or empty) before this segment.
    pub leading_space: Space<'a>,
    /// The segment's contents itself.
    pub token: Token<'a>,
}

/// Segments, with possible trailing space.
///
/// Any input is split up into many segments, with their surrounding spaces.
/// Each segment might be a meaningless atom, or may be recognized to have more meaning.
#[derive(Clone, Debug, PartialEq)]
pub struct Segments<'a> {
    /// The segments themselves.
    pub segments: Vec<Segment<'a>>,
    /// Any spaces (or empty) after this sequence of segments.
    pub trailing_space: Space<'a>,
}