Skip to main content

rsigma_parser/
fieldpath.rs

1//! Field-path helpers shared by the parser, evaluator, and converters.
2//!
3//! Array selectors (`[any]`, `[all]`, `[N]`, ...) are written in brackets on a
4//! field path. To keep a literal bracket expressible in a field name, a literal
5//! `[` or `]` is escaped as `\[` / `\]` (mirroring Sigma's `\*` / `\?` wildcard
6//! escaping). Only an *unescaped* bracket opens a selector; an escaped one is a
7//! literal part of the field name and is unescaped before the field is resolved.
8
9use std::borrow::Cow;
10
11/// Unescape `\[` and `\]` into literal `[` and `]`. Any other backslash is left
12/// untouched. Returns a borrow when there is nothing to unescape (the common
13/// case), so non-escaped field names never allocate.
14pub fn unescape_brackets(s: &str) -> Cow<'_, str> {
15    if !s.contains('\\') {
16        return Cow::Borrowed(s);
17    }
18    let mut out = String::with_capacity(s.len());
19    let mut chars = s.chars().peekable();
20    while let Some(c) = chars.next() {
21        if c == '\\' && matches!(chars.peek(), Some('[') | Some(']')) {
22            out.push(chars.next().expect("peeked"));
23        } else {
24            out.push(c);
25        }
26    }
27    Cow::Owned(out)
28}
29
30/// Escape every *unescaped* `[` and `]` as `\[` / `\]`, leaving already-escaped
31/// brackets and every other character untouched. The inverse of
32/// [`unescape_brackets`]. Used to render a field name whose brackets must be
33/// read literally (for example below the array-matching spec version), so the
34/// escape-aware field resolver does not treat a trailing `[...]` as a selector.
35/// Returns a borrow when there is nothing to escape (the common case).
36pub fn escape_brackets(s: &str) -> Cow<'_, str> {
37    let bytes = s.as_bytes();
38    let is_unescaped_bracket =
39        |i: usize| (bytes[i] == b'[' || bytes[i] == b']') && (i == 0 || bytes[i - 1] != b'\\');
40    if !(0..bytes.len()).any(is_unescaped_bracket) {
41        return Cow::Borrowed(s);
42    }
43    let mut out = String::with_capacity(s.len() + 4);
44    for (i, c) in s.char_indices() {
45        if is_unescaped_bracket(i) {
46            out.push('\\');
47        }
48        out.push(c);
49    }
50    Cow::Owned(out)
51}
52
53/// Index of the first occurrence of the ASCII byte `ch` that is not escaped by
54/// an immediately preceding backslash. `ch` must be ASCII (`[` or `]` here);
55/// scanning bytes is safe because those never appear inside a UTF-8 multibyte
56/// sequence.
57pub fn first_unescaped(s: &str, ch: u8) -> Option<usize> {
58    let bytes = s.as_bytes();
59    let mut i = 0;
60    while i < bytes.len() {
61        if bytes[i] == ch && (i == 0 || bytes[i - 1] != b'\\') {
62            return Some(i);
63        }
64        i += 1;
65    }
66    None
67}
68
69/// Whether `s` ends with the ASCII byte `ch` and that byte is not escaped.
70pub fn ends_with_unescaped(s: &str, ch: u8) -> bool {
71    let bytes = s.as_bytes();
72    match bytes.len() {
73        0 => false,
74        1 => bytes[0] == ch,
75        n => bytes[n - 1] == ch && bytes[n - 2] != b'\\',
76    }
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82
83    #[test]
84    fn unescape_only_brackets() {
85        assert_eq!(unescape_brackets("plain"), "plain");
86        assert_eq!(unescape_brackets("args\\[0\\]"), "args[0]");
87        assert_eq!(unescape_brackets("a\\[b\\]c"), "a[b]c");
88        // A backslash not before a bracket is preserved.
89        assert_eq!(unescape_brackets("a\\b"), "a\\b");
90    }
91
92    #[test]
93    fn escape_only_unescaped_brackets() {
94        assert_eq!(escape_brackets("plain"), "plain");
95        assert_eq!(escape_brackets("args[0]"), "args\\[0\\]");
96        assert_eq!(escape_brackets("connections[any]"), "connections\\[any\\]");
97        // Already-escaped brackets are left as-is (no double escaping).
98        assert_eq!(escape_brackets("args\\[0\\]"), "args\\[0\\]");
99        // Round-trips with unescape_brackets.
100        assert_eq!(unescape_brackets(&escape_brackets("a[b]c")), "a[b]c");
101    }
102
103    #[test]
104    fn finds_first_unescaped_bracket() {
105        assert_eq!(first_unescaped("args[0]", b'['), Some(4));
106        assert_eq!(first_unescaped("args\\[0\\]", b'['), None);
107        assert_eq!(first_unescaped("a\\[b[any]", b'['), Some(4));
108    }
109
110    #[test]
111    fn unescaped_trailing_close() {
112        assert!(ends_with_unescaped("args[0]", b']'));
113        assert!(!ends_with_unescaped("args\\[0\\]", b']'));
114        assert!(!ends_with_unescaped("args", b']'));
115    }
116}