Skip to main content

rsigma_eval/event/
json.rs

1use std::borrow::Cow;
2
3use rsigma_parser::fieldpath::{first_unescaped, unescape_brackets};
4use serde_json::Value;
5
6use super::{Event, EventValue};
7
8/// Maximum nesting depth for recursive JSON traversal.
9const MAX_NESTING_DEPTH: usize = 64;
10
11/// Zero-copy event backed by `serde_json::Value`.
12///
13/// Supports both borrowed (`&Value`) and owned (`Value`) backing via `Cow`.
14/// This is the primary implementation for JSON/NDJSON input.
15///
16/// Flat keys are checked first: `"actor.id"` as a single key takes precedence
17/// over `{"actor": {"id": ...}}` nested traversal.
18#[derive(Debug)]
19pub struct JsonEvent<'a> {
20    inner: Cow<'a, Value>,
21}
22
23impl<'a> JsonEvent<'a> {
24    /// Wrap a borrowed JSON value as an event.
25    pub fn borrow(v: &'a Value) -> Self {
26        Self {
27            inner: Cow::Borrowed(v),
28        }
29    }
30
31    /// Wrap an owned JSON value as an event.
32    pub fn owned(v: Value) -> Self {
33        Self {
34            inner: Cow::Owned(v),
35        }
36    }
37}
38
39impl<'a> From<&'a Value> for JsonEvent<'a> {
40    fn from(v: &'a Value) -> Self {
41        Self::borrow(v)
42    }
43}
44
45impl From<Value> for JsonEvent<'static> {
46    fn from(v: Value) -> Self {
47        Self::owned(v)
48    }
49}
50
51impl<'a> Event for JsonEvent<'a> {
52    /// Get a field value by name, supporting dot-notation for nested access.
53    ///
54    /// Checks for a flat key first (exact match), then falls back to
55    /// dot-separated traversal. When a path segment crosses an array, every
56    /// element is followed and all terminal values are collected: a single
57    /// hit is returned as-is, multiple hits are returned as an
58    /// [`EventValue::Array`] so the matcher applies any-member semantics
59    /// (rather than only testing the first element).
60    fn get_field(&self, path: &str) -> Option<EventValue<'_>> {
61        let value: &Value = &self.inner;
62
63        if let Some(obj) = value.as_object()
64            && let Some(v) = obj.get(path)
65        {
66            return Some(EventValue::from(v));
67        }
68
69        if path.contains('.') || path.contains('[') || path.contains('\\') {
70            let ops = parse_path_ops(path);
71            let mut collected: Vec<EventValue<'_>> = Vec::new();
72            collect_by_ops(value, &ops, &mut collected);
73            return match collected.len() {
74                0 => None,
75                1 => collected.pop(),
76                _ => Some(EventValue::Array(collected)),
77            };
78        }
79
80        None
81    }
82
83    /// Check if any string value in the event satisfies a predicate.
84    ///
85    /// Short-circuits on the first match, avoiding the allocation of
86    /// collecting all string values into a `Vec`.
87    fn any_string_value(&self, pred: &dyn Fn(&str) -> bool) -> bool {
88        any_string_value_json(&self.inner, pred, MAX_NESTING_DEPTH)
89    }
90
91    /// Iterate over all string values in the event (for keyword detection).
92    ///
93    /// Recursively walks the entire event object and yields every string
94    /// value found, including inside nested objects and arrays. Traversal
95    /// is capped at 64 levels of nesting to prevent stack overflow.
96    fn all_string_values(&self) -> Vec<Cow<'_, str>> {
97        let mut values = Vec::new();
98        collect_string_values_json(&self.inner, &mut values, MAX_NESTING_DEPTH);
99        values
100    }
101
102    fn to_json(&self) -> Value {
103        self.inner.as_ref().clone()
104    }
105
106    /// Walk every leaf field in the event and yield dot-joined paths.
107    /// Intermediate object names (`actor` for `{"actor":{"id":"x"}}`)
108    /// are NOT emitted; only the leaves (`actor.id`) appear. This
109    /// matches typical Sigma rules, which reference nested values via
110    /// dot-notation; emitting the intermediate name would falsely flag
111    /// every parent object as "unknown" in the gap signal even when
112    /// the rule references a child path. Top-level scalar fields
113    /// (`{"actor":"alice"}`) emit `actor` because they ARE leaves.
114    /// Arrays contribute their parent path once; per-index suffixes
115    /// are not emitted.
116    fn field_keys(&self) -> Vec<Cow<'_, str>> {
117        let mut out = Vec::new();
118        collect_field_keys(&self.inner, "", &mut out, MAX_NESTING_DEPTH);
119        out
120    }
121}
122
123/// A single field-path navigation step.
124enum PathOp<'a> {
125    /// Object key lookup (bracket-unescaped). Distributes over arrays (implicit
126    /// any-member).
127    Key(Cow<'a, str>),
128    /// Positional array index, possibly negative. Selects one element; never
129    /// fans out.
130    Index(i64),
131}
132
133/// Resolve a positional index against an array length. Negative indices count
134/// from the end (`-1` is the last element); out-of-range yields `None`.
135pub(crate) fn resolve_array_index(index: i64, len: usize) -> Option<usize> {
136    if index >= 0 {
137        usize::try_from(index).ok().filter(|&i| i < len)
138    } else {
139        usize::try_from(index.unsigned_abs())
140            .ok()
141            .and_then(|abs| len.checked_sub(abs))
142    }
143}
144
145/// Parse a dot path into navigation ops, recognizing positional `name[N]`
146/// (and chained `name[N][M]`, with negative indices counting from the end). A
147/// bracket group that is not an integer degrades to a literal object key so it
148/// simply fails to match.
149fn parse_path_ops(path: &str) -> Vec<PathOp<'_>> {
150    let mut ops = Vec::new();
151    for part in path.split('.') {
152        match first_unescaped(part, b'[') {
153            Some(bpos) if parse_index_groups(&part[bpos..]).is_some() => {
154                let name = &part[..bpos];
155                if !name.is_empty() {
156                    ops.push(PathOp::Key(unescape_brackets(name)));
157                }
158                for idx in parse_index_groups(&part[bpos..]).expect("checked") {
159                    ops.push(PathOp::Index(idx));
160                }
161            }
162            // No unescaped index group: the whole segment is a literal key,
163            // with `\[` / `\]` unescaped to match the event's actual key.
164            _ => ops.push(PathOp::Key(unescape_brackets(part))),
165        }
166    }
167    ops
168}
169
170/// Parse `[N]` or `[N][M]...` into the contained indices (negative allowed), or
171/// `None` if any group is malformed or non-numeric.
172fn parse_index_groups(s: &str) -> Option<Vec<i64>> {
173    let mut out = Vec::new();
174    let mut rem = s;
175    while !rem.is_empty() {
176        let rest = rem.strip_prefix('[')?;
177        let close = rest.find(']')?;
178        let idx: i64 = rest[..close].parse().ok()?;
179        out.push(idx);
180        rem = &rest[close + 1..];
181    }
182    Some(out)
183}
184
185/// Follow navigation ops, collecting every terminal value into `out`.
186///
187/// A `Key` op distributes over arrays (implicit any-member): the remaining ops
188/// are applied to every element, so a path crossing an array of objects yields
189/// one value per element. An `Index` op selects a single element and never
190/// fans out, giving deterministic positional access.
191fn collect_by_ops<'a>(current: &'a Value, ops: &[PathOp<'_>], out: &mut Vec<EventValue<'a>>) {
192    let Some((op, rest)) = ops.split_first() else {
193        out.push(EventValue::from(current));
194        return;
195    };
196
197    match op {
198        PathOp::Key(key) => match current {
199            Value::Object(map) => {
200                if let Some(next) = map.get(key.as_ref()) {
201                    collect_by_ops(next, rest, out);
202                }
203            }
204            Value::Array(arr) => {
205                for item in arr {
206                    collect_by_ops(item, ops, out);
207                }
208            }
209            _ => {}
210        },
211        PathOp::Index(i) => {
212            if let Value::Array(arr) = current
213                && let Some(idx) = resolve_array_index(*i, arr.len())
214                && let Some(next) = arr.get(idx)
215            {
216                collect_by_ops(next, rest, out);
217            }
218        }
219    }
220}
221
222fn any_string_value_json(v: &Value, pred: &dyn Fn(&str) -> bool, depth: usize) -> bool {
223    if depth == 0 {
224        return false;
225    }
226    match v {
227        Value::String(s) => pred(s.as_str()),
228        Value::Object(map) => map
229            .values()
230            .any(|val| any_string_value_json(val, pred, depth - 1)),
231        Value::Array(arr) => arr
232            .iter()
233            .any(|val| any_string_value_json(val, pred, depth - 1)),
234        _ => false,
235    }
236}
237
238fn collect_field_keys<'a>(v: &'a Value, prefix: &str, out: &mut Vec<Cow<'a, str>>, depth: usize) {
239    if depth == 0 {
240        return;
241    }
242    if let Value::Object(map) = v {
243        for (k, child) in map {
244            let path = if prefix.is_empty() {
245                k.clone()
246            } else {
247                format!("{prefix}.{k}")
248            };
249            match child {
250                // Recurse into nested objects but do NOT emit the
251                // intermediate path; only the leaf descendants count.
252                // Sigma rules normally reference leaves via
253                // dot-notation, so emitting `actor` alongside
254                // `actor.id` would falsely flag the parent as
255                // "unknown" in the gap signal.
256                Value::Object(_) => collect_field_keys(child, &path, out, depth - 1),
257                _ => out.push(Cow::Owned(path)),
258            }
259        }
260    }
261}
262
263fn collect_string_values_json<'a>(v: &'a Value, out: &mut Vec<Cow<'a, str>>, depth: usize) {
264    if depth == 0 {
265        return;
266    }
267    match v {
268        Value::String(s) => out.push(Cow::Borrowed(s.as_str())),
269        Value::Object(map) => {
270            for val in map.values() {
271                collect_string_values_json(val, out, depth - 1);
272            }
273        }
274        Value::Array(arr) => {
275            for val in arr {
276                collect_string_values_json(val, out, depth - 1);
277            }
278        }
279        _ => {}
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286    use serde_json::json;
287
288    #[test]
289    fn json_flat_field() {
290        let v = json!({"CommandLine": "whoami", "User": "admin"});
291        let event = JsonEvent::borrow(&v);
292        assert_eq!(
293            event.get_field("CommandLine"),
294            Some(EventValue::Str(Cow::Borrowed("whoami")))
295        );
296    }
297
298    #[test]
299    fn json_nested_field() {
300        let v = json!({"actor": {"id": "user123", "type": "User"}});
301        let event = JsonEvent::borrow(&v);
302        assert_eq!(
303            event.get_field("actor.id"),
304            Some(EventValue::Str(Cow::Borrowed("user123")))
305        );
306    }
307
308    #[test]
309    fn json_flat_key_precedence() {
310        let v = json!({"actor.id": "flat_value", "actor": {"id": "nested_value"}});
311        let event = JsonEvent::borrow(&v);
312        assert_eq!(
313            event.get_field("actor.id"),
314            Some(EventValue::Str(Cow::Borrowed("flat_value")))
315        );
316    }
317
318    #[test]
319    fn json_missing_field() {
320        let v = json!({"foo": "bar"});
321        let event = JsonEvent::borrow(&v);
322        assert_eq!(event.get_field("missing"), None);
323    }
324
325    #[test]
326    fn json_null_field() {
327        let v = json!({"foo": null});
328        let event = JsonEvent::borrow(&v);
329        assert_eq!(event.get_field("foo"), Some(EventValue::Null));
330    }
331
332    #[test]
333    fn json_array_traversal() {
334        // A path crossing an array of objects now collects every element's
335        // leaf value (any-member semantics), not just the first.
336        let v = json!({"a": {"b": [{"c": "found"}, {"c": "other"}]}});
337        let event = JsonEvent::borrow(&v);
338        assert_eq!(
339            event.get_field("a.b.c"),
340            Some(EventValue::Array(vec![
341                EventValue::Str(Cow::Borrowed("found")),
342                EventValue::Str(Cow::Borrowed("other")),
343            ]))
344        );
345    }
346
347    #[test]
348    fn json_array_traversal_no_match() {
349        let v = json!({"a": {"b": [{"x": 1}, {"y": 2}]}});
350        let event = JsonEvent::borrow(&v);
351        assert_eq!(event.get_field("a.b.c"), None);
352    }
353
354    #[test]
355    fn json_array_traversal_deep() {
356        let v = json!({
357            "events": [
358                {"actors": [{"name": "alice"}, {"name": "bob"}]},
359                {"actors": [{"name": "charlie"}]}
360            ]
361        });
362        let event = JsonEvent::borrow(&v);
363        // Nested arrays flatten to every leaf value.
364        assert_eq!(
365            event.get_field("events.actors.name"),
366            Some(EventValue::Array(vec![
367                EventValue::Str(Cow::Borrowed("alice")),
368                EventValue::Str(Cow::Borrowed("bob")),
369                EventValue::Str(Cow::Borrowed("charlie")),
370            ]))
371        );
372    }
373
374    #[test]
375    fn json_array_at_root_level() {
376        let v = json!({"process": [{"command_line": "whoami"}, {"command_line": "id"}]});
377        let event = JsonEvent::borrow(&v);
378        assert_eq!(
379            event.get_field("process.command_line"),
380            Some(EventValue::Array(vec![
381                EventValue::Str(Cow::Borrowed("whoami")),
382                EventValue::Str(Cow::Borrowed("id")),
383            ]))
384        );
385    }
386
387    #[test]
388    fn json_array_returns_array_value() {
389        let v = json!({"a": {"tags": ["t1", "t2"]}});
390        let event = JsonEvent::borrow(&v);
391        let result = event.get_field("a.tags");
392        assert!(matches!(result, Some(EventValue::Array(_))));
393    }
394
395    #[test]
396    fn json_flat_key_wins_over_array_traversal() {
397        let v = json!({"a.b.c": "flat", "a": {"b": [{"c": "nested"}]}});
398        let event = JsonEvent::borrow(&v);
399        assert_eq!(
400            event.get_field("a.b.c"),
401            Some(EventValue::Str(Cow::Borrowed("flat")))
402        );
403    }
404
405    #[test]
406    fn json_all_string_values() {
407        let v = json!({
408            "a": "hello",
409            "b": 42,
410            "c": {"d": "world", "e": true},
411            "f": ["one", "two"]
412        });
413        let event = JsonEvent::borrow(&v);
414        let values = event.all_string_values();
415        let strs: Vec<&str> = values.iter().map(|c| c.as_ref()).collect();
416        assert!(strs.contains(&"hello"));
417        assert!(strs.contains(&"world"));
418        assert!(strs.contains(&"one"));
419        assert!(strs.contains(&"two"));
420        assert_eq!(values.len(), 4);
421    }
422
423    #[test]
424    fn json_to_json_roundtrip() {
425        let v = json!({"a": 1, "b": "hello", "c": [1, 2]});
426        let event = JsonEvent::borrow(&v);
427        assert_eq!(event.to_json(), v);
428    }
429
430    #[test]
431    fn json_owned_works() {
432        let v = json!({"key": "value"});
433        let event = JsonEvent::owned(v.clone());
434        assert_eq!(
435            event.get_field("key"),
436            Some(EventValue::Str(Cow::Borrowed("value")))
437        );
438        assert_eq!(event.to_json(), v);
439    }
440
441    #[test]
442    fn json_field_keys_flat() {
443        let v = json!({"CommandLine": "x", "User": "y"});
444        let event = JsonEvent::borrow(&v);
445        let mut keys: Vec<String> = event.field_keys().iter().map(|c| c.to_string()).collect();
446        keys.sort();
447        assert_eq!(keys, vec!["CommandLine", "User"]);
448    }
449
450    #[test]
451    fn json_field_keys_nested_leaves_only() {
452        // Intermediate object names like `actor` are NOT emitted; only
453        // leaves (`actor.id`, `actor.type`) and top-level scalars
454        // (`verb`) appear.
455        let v = json!({"actor": {"id": "u1", "type": "User"}, "verb": "login"});
456        let event = JsonEvent::borrow(&v);
457        let mut keys: Vec<String> = event.field_keys().iter().map(|c| c.to_string()).collect();
458        keys.sort();
459        assert_eq!(keys, vec!["actor.id", "actor.type", "verb"]);
460    }
461
462    #[test]
463    fn json_field_keys_deeply_nested_leaves_only() {
464        let v = json!({"a": {"b": {"c": 1}}, "flat": "x"});
465        let event = JsonEvent::borrow(&v);
466        let mut keys: Vec<String> = event.field_keys().iter().map(|c| c.to_string()).collect();
467        keys.sort();
468        assert_eq!(keys, vec!["a.b.c", "flat"]);
469    }
470
471    #[test]
472    fn json_field_keys_array_parent_only() {
473        let v = json!({"events": [{"id": 1}, {"id": 2}]});
474        let event = JsonEvent::borrow(&v);
475        let keys: Vec<String> = event.field_keys().iter().map(|c| c.to_string()).collect();
476        // Arrays contribute their parent key only; array indices are not enumerated.
477        assert_eq!(keys, vec!["events"]);
478    }
479
480    #[test]
481    fn json_field_keys_top_level_non_object_empty() {
482        let v = json!("just a string");
483        let event = JsonEvent::owned(v);
484        assert!(event.field_keys().is_empty());
485    }
486
487    #[test]
488    fn json_traversal_with_consecutive_dots_does_not_panic() {
489        // Pathological input -- a path like `a..b` used to be tokenised
490        // by `split('.')` into `["a", "", "b"]` and then walked head-by-
491        // head; the new `split_once('.')` recursion produces the same
492        // `("a", ".b")` -> `("", "b")` -> ... sequence with no
493        // allocation. Verify the lookup falls back to `None` rather
494        // than panicking or accidentally matching.
495        let v = json!({"a": {"b": "x"}});
496        let event = JsonEvent::borrow(&v);
497        assert_eq!(event.get_field("a..b"), None);
498    }
499
500    #[test]
501    fn json_traversal_with_trailing_dot_does_not_panic() {
502        // A trailing dot used to leave an empty trailing segment in the
503        // `Vec<&str>` path which the object branch tried to look up
504        // against the map; the iterator-based walker preserves that
505        // miss without allocating.
506        let v = json!({"a": {"b": "x"}});
507        let event = JsonEvent::borrow(&v);
508        assert_eq!(event.get_field("a.b."), None);
509    }
510}