Skip to main content

rsigma_eval/event/
json.rs

1use std::borrow::Cow;
2
3use serde_json::Value;
4
5use super::{Event, EventValue};
6
7/// Maximum nesting depth for recursive JSON traversal.
8const MAX_NESTING_DEPTH: usize = 64;
9
10/// Zero-copy event backed by `serde_json::Value`.
11///
12/// Supports both borrowed (`&Value`) and owned (`Value`) backing via `Cow`.
13/// This is the primary implementation for JSON/NDJSON input.
14///
15/// Flat keys are checked first: `"actor.id"` as a single key takes precedence
16/// over `{"actor": {"id": ...}}` nested traversal.
17#[derive(Debug)]
18pub struct JsonEvent<'a> {
19    inner: Cow<'a, Value>,
20}
21
22impl<'a> JsonEvent<'a> {
23    /// Wrap a borrowed JSON value as an event.
24    pub fn borrow(v: &'a Value) -> Self {
25        Self {
26            inner: Cow::Borrowed(v),
27        }
28    }
29
30    /// Wrap an owned JSON value as an event.
31    pub fn owned(v: Value) -> Self {
32        Self {
33            inner: Cow::Owned(v),
34        }
35    }
36}
37
38impl<'a> From<&'a Value> for JsonEvent<'a> {
39    fn from(v: &'a Value) -> Self {
40        Self::borrow(v)
41    }
42}
43
44impl From<Value> for JsonEvent<'static> {
45    fn from(v: Value) -> Self {
46        Self::owned(v)
47    }
48}
49
50impl<'a> Event for JsonEvent<'a> {
51    /// Get a field value by name, supporting dot-notation for nested access.
52    ///
53    /// Checks for a flat key first (exact match), then falls back to
54    /// dot-separated traversal. When a path segment yields an array,
55    /// each element is tried and the first match is returned (OR semantics).
56    fn get_field(&self, path: &str) -> Option<EventValue<'_>> {
57        let value: &Value = &self.inner;
58
59        if let Some(obj) = value.as_object()
60            && let Some(v) = obj.get(path)
61        {
62            return Some(EventValue::from(v));
63        }
64
65        if path.contains('.') {
66            return traverse_json(value, path).map(EventValue::from);
67        }
68
69        None
70    }
71
72    /// Check if any string value in the event satisfies a predicate.
73    ///
74    /// Short-circuits on the first match, avoiding the allocation of
75    /// collecting all string values into a `Vec`.
76    fn any_string_value(&self, pred: &dyn Fn(&str) -> bool) -> bool {
77        any_string_value_json(&self.inner, pred, MAX_NESTING_DEPTH)
78    }
79
80    /// Iterate over all string values in the event (for keyword detection).
81    ///
82    /// Recursively walks the entire event object and yields every string
83    /// value found, including inside nested objects and arrays. Traversal
84    /// is capped at 64 levels of nesting to prevent stack overflow.
85    fn all_string_values(&self) -> Vec<Cow<'_, str>> {
86        let mut values = Vec::new();
87        collect_string_values_json(&self.inner, &mut values, MAX_NESTING_DEPTH);
88        values
89    }
90
91    fn to_json(&self) -> Value {
92        self.inner.as_ref().clone()
93    }
94
95    /// Walk every leaf field in the event and yield dot-joined paths.
96    /// Intermediate object names (`actor` for `{"actor":{"id":"x"}}`)
97    /// are NOT emitted; only the leaves (`actor.id`) appear. This
98    /// matches typical Sigma rules, which reference nested values via
99    /// dot-notation; emitting the intermediate name would falsely flag
100    /// every parent object as "unknown" in the gap signal even when
101    /// the rule references a child path. Top-level scalar fields
102    /// (`{"actor":"alice"}`) emit `actor` because they ARE leaves.
103    /// Arrays contribute their parent path once; per-index suffixes
104    /// are not emitted.
105    fn field_keys(&self) -> Vec<Cow<'_, str>> {
106        let mut out = Vec::new();
107        collect_field_keys(&self.inner, "", &mut out, MAX_NESTING_DEPTH);
108        out
109    }
110}
111
112/// Recursively traverse a JSON value following dot-notation path segments.
113///
114/// `path` is the remaining dot-joined path (e.g. `"actor.id.value"`); the
115/// function splits the leading segment on each recursion via
116/// [`str::split_once`] so no `Vec<&str>` is allocated. Each lookup was a
117/// hot path under `get_field`, called once per detection item per event;
118/// the previous `path.split('.').collect::<Vec<_>>()` allocated on every
119/// nested lookup.
120///
121/// When a segment resolves to an array, each element is tried with the
122/// same (unconsumed) `path`, matching the OR semantics of the prior
123/// implementation: the array does not consume a path segment.
124fn traverse_json<'a>(current: &'a Value, path: &str) -> Option<&'a Value> {
125    match current {
126        Value::Object(map) => {
127            // `split_once` consumes a single segment per recursion; the
128            // `has_more` flag distinguishes "the last segment, return the
129            // looked-up value" from "more segments to walk into". Treating
130            // an `is_empty()` path as terminal would change the
131            // pathological "trailing dot" case (`"a.b."`) from a miss to
132            // a hit, because consuming `b` would leave an empty rest that
133            // the old `Vec<&str>` walker tried to apply to the leaf
134            // value and bailed on; preserve that miss semantics here.
135            let (head, rest, has_more) = match path.split_once('.') {
136                Some((h, r)) => (h, r, true),
137                None => (path, "", false),
138            };
139            let next = map.get(head)?;
140            if has_more {
141                traverse_json(next, rest)
142            } else {
143                Some(next)
144            }
145        }
146        Value::Array(arr) => {
147            // Arrays do not consume a path segment; each element is
148            // tried with the full remaining path, matching the OR
149            // semantics of the prior `traverse_json(item, parts)` call.
150            for item in arr {
151                if let Some(v) = traverse_json(item, path) {
152                    return Some(v);
153                }
154            }
155            None
156        }
157        _ => None,
158    }
159}
160
161fn any_string_value_json(v: &Value, pred: &dyn Fn(&str) -> bool, depth: usize) -> bool {
162    if depth == 0 {
163        return false;
164    }
165    match v {
166        Value::String(s) => pred(s.as_str()),
167        Value::Object(map) => map
168            .values()
169            .any(|val| any_string_value_json(val, pred, depth - 1)),
170        Value::Array(arr) => arr
171            .iter()
172            .any(|val| any_string_value_json(val, pred, depth - 1)),
173        _ => false,
174    }
175}
176
177fn collect_field_keys<'a>(v: &'a Value, prefix: &str, out: &mut Vec<Cow<'a, str>>, depth: usize) {
178    if depth == 0 {
179        return;
180    }
181    if let Value::Object(map) = v {
182        for (k, child) in map {
183            let path = if prefix.is_empty() {
184                k.clone()
185            } else {
186                format!("{prefix}.{k}")
187            };
188            match child {
189                // Recurse into nested objects but do NOT emit the
190                // intermediate path; only the leaf descendants count.
191                // Sigma rules normally reference leaves via
192                // dot-notation, so emitting `actor` alongside
193                // `actor.id` would falsely flag the parent as
194                // "unknown" in the gap signal.
195                Value::Object(_) => collect_field_keys(child, &path, out, depth - 1),
196                _ => out.push(Cow::Owned(path)),
197            }
198        }
199    }
200}
201
202fn collect_string_values_json<'a>(v: &'a Value, out: &mut Vec<Cow<'a, str>>, depth: usize) {
203    if depth == 0 {
204        return;
205    }
206    match v {
207        Value::String(s) => out.push(Cow::Borrowed(s.as_str())),
208        Value::Object(map) => {
209            for val in map.values() {
210                collect_string_values_json(val, out, depth - 1);
211            }
212        }
213        Value::Array(arr) => {
214            for val in arr {
215                collect_string_values_json(val, out, depth - 1);
216            }
217        }
218        _ => {}
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use serde_json::json;
226
227    #[test]
228    fn json_flat_field() {
229        let v = json!({"CommandLine": "whoami", "User": "admin"});
230        let event = JsonEvent::borrow(&v);
231        assert_eq!(
232            event.get_field("CommandLine"),
233            Some(EventValue::Str(Cow::Borrowed("whoami")))
234        );
235    }
236
237    #[test]
238    fn json_nested_field() {
239        let v = json!({"actor": {"id": "user123", "type": "User"}});
240        let event = JsonEvent::borrow(&v);
241        assert_eq!(
242            event.get_field("actor.id"),
243            Some(EventValue::Str(Cow::Borrowed("user123")))
244        );
245    }
246
247    #[test]
248    fn json_flat_key_precedence() {
249        let v = json!({"actor.id": "flat_value", "actor": {"id": "nested_value"}});
250        let event = JsonEvent::borrow(&v);
251        assert_eq!(
252            event.get_field("actor.id"),
253            Some(EventValue::Str(Cow::Borrowed("flat_value")))
254        );
255    }
256
257    #[test]
258    fn json_missing_field() {
259        let v = json!({"foo": "bar"});
260        let event = JsonEvent::borrow(&v);
261        assert_eq!(event.get_field("missing"), None);
262    }
263
264    #[test]
265    fn json_null_field() {
266        let v = json!({"foo": null});
267        let event = JsonEvent::borrow(&v);
268        assert_eq!(event.get_field("foo"), Some(EventValue::Null));
269    }
270
271    #[test]
272    fn json_array_traversal() {
273        let v = json!({"a": {"b": [{"c": "found"}, {"c": "other"}]}});
274        let event = JsonEvent::borrow(&v);
275        assert_eq!(
276            event.get_field("a.b.c"),
277            Some(EventValue::Str(Cow::Borrowed("found")))
278        );
279    }
280
281    #[test]
282    fn json_array_traversal_no_match() {
283        let v = json!({"a": {"b": [{"x": 1}, {"y": 2}]}});
284        let event = JsonEvent::borrow(&v);
285        assert_eq!(event.get_field("a.b.c"), None);
286    }
287
288    #[test]
289    fn json_array_traversal_deep() {
290        let v = json!({
291            "events": [
292                {"actors": [{"name": "alice"}, {"name": "bob"}]},
293                {"actors": [{"name": "charlie"}]}
294            ]
295        });
296        let event = JsonEvent::borrow(&v);
297        assert_eq!(
298            event.get_field("events.actors.name"),
299            Some(EventValue::Str(Cow::Borrowed("alice")))
300        );
301    }
302
303    #[test]
304    fn json_array_at_root_level() {
305        let v = json!({"process": [{"command_line": "whoami"}, {"command_line": "id"}]});
306        let event = JsonEvent::borrow(&v);
307        assert_eq!(
308            event.get_field("process.command_line"),
309            Some(EventValue::Str(Cow::Borrowed("whoami")))
310        );
311    }
312
313    #[test]
314    fn json_array_returns_array_value() {
315        let v = json!({"a": {"tags": ["t1", "t2"]}});
316        let event = JsonEvent::borrow(&v);
317        let result = event.get_field("a.tags");
318        assert!(matches!(result, Some(EventValue::Array(_))));
319    }
320
321    #[test]
322    fn json_flat_key_wins_over_array_traversal() {
323        let v = json!({"a.b.c": "flat", "a": {"b": [{"c": "nested"}]}});
324        let event = JsonEvent::borrow(&v);
325        assert_eq!(
326            event.get_field("a.b.c"),
327            Some(EventValue::Str(Cow::Borrowed("flat")))
328        );
329    }
330
331    #[test]
332    fn json_all_string_values() {
333        let v = json!({
334            "a": "hello",
335            "b": 42,
336            "c": {"d": "world", "e": true},
337            "f": ["one", "two"]
338        });
339        let event = JsonEvent::borrow(&v);
340        let values = event.all_string_values();
341        let strs: Vec<&str> = values.iter().map(|c| c.as_ref()).collect();
342        assert!(strs.contains(&"hello"));
343        assert!(strs.contains(&"world"));
344        assert!(strs.contains(&"one"));
345        assert!(strs.contains(&"two"));
346        assert_eq!(values.len(), 4);
347    }
348
349    #[test]
350    fn json_to_json_roundtrip() {
351        let v = json!({"a": 1, "b": "hello", "c": [1, 2]});
352        let event = JsonEvent::borrow(&v);
353        assert_eq!(event.to_json(), v);
354    }
355
356    #[test]
357    fn json_owned_works() {
358        let v = json!({"key": "value"});
359        let event = JsonEvent::owned(v.clone());
360        assert_eq!(
361            event.get_field("key"),
362            Some(EventValue::Str(Cow::Borrowed("value")))
363        );
364        assert_eq!(event.to_json(), v);
365    }
366
367    #[test]
368    fn json_field_keys_flat() {
369        let v = json!({"CommandLine": "x", "User": "y"});
370        let event = JsonEvent::borrow(&v);
371        let mut keys: Vec<String> = event.field_keys().iter().map(|c| c.to_string()).collect();
372        keys.sort();
373        assert_eq!(keys, vec!["CommandLine", "User"]);
374    }
375
376    #[test]
377    fn json_field_keys_nested_leaves_only() {
378        // Intermediate object names like `actor` are NOT emitted; only
379        // leaves (`actor.id`, `actor.type`) and top-level scalars
380        // (`verb`) appear.
381        let v = json!({"actor": {"id": "u1", "type": "User"}, "verb": "login"});
382        let event = JsonEvent::borrow(&v);
383        let mut keys: Vec<String> = event.field_keys().iter().map(|c| c.to_string()).collect();
384        keys.sort();
385        assert_eq!(keys, vec!["actor.id", "actor.type", "verb"]);
386    }
387
388    #[test]
389    fn json_field_keys_deeply_nested_leaves_only() {
390        let v = json!({"a": {"b": {"c": 1}}, "flat": "x"});
391        let event = JsonEvent::borrow(&v);
392        let mut keys: Vec<String> = event.field_keys().iter().map(|c| c.to_string()).collect();
393        keys.sort();
394        assert_eq!(keys, vec!["a.b.c", "flat"]);
395    }
396
397    #[test]
398    fn json_field_keys_array_parent_only() {
399        let v = json!({"events": [{"id": 1}, {"id": 2}]});
400        let event = JsonEvent::borrow(&v);
401        let keys: Vec<String> = event.field_keys().iter().map(|c| c.to_string()).collect();
402        // Arrays contribute their parent key only; array indices are not enumerated.
403        assert_eq!(keys, vec!["events"]);
404    }
405
406    #[test]
407    fn json_field_keys_top_level_non_object_empty() {
408        let v = json!("just a string");
409        let event = JsonEvent::owned(v);
410        assert!(event.field_keys().is_empty());
411    }
412
413    #[test]
414    fn json_traversal_with_consecutive_dots_does_not_panic() {
415        // Pathological input -- a path like `a..b` used to be tokenised
416        // by `split('.')` into `["a", "", "b"]` and then walked head-by-
417        // head; the new `split_once('.')` recursion produces the same
418        // `("a", ".b")` -> `("", "b")` -> ... sequence with no
419        // allocation. Verify the lookup falls back to `None` rather
420        // than panicking or accidentally matching.
421        let v = json!({"a": {"b": "x"}});
422        let event = JsonEvent::borrow(&v);
423        assert_eq!(event.get_field("a..b"), None);
424    }
425
426    #[test]
427    fn json_traversal_with_trailing_dot_does_not_panic() {
428        // A trailing dot used to leave an empty trailing segment in the
429        // `Vec<&str>` path which the object branch tried to look up
430        // against the map; the iterator-based walker preserves that
431        // miss without allocating.
432        let v = json!({"a": {"b": "x"}});
433        let event = JsonEvent::borrow(&v);
434        assert_eq!(event.get_field("a.b."), None);
435    }
436}