Skip to main content

rgx/filter/
json_path.rs

1//! Minimal dotted/indexed path language for JSONL field extraction.
2//!
3//! Grammar:
4//!
5//! ```text
6//! path    := segment+
7//! segment := ('.' ident) | ('[' digits ']')
8//! ident   := [A-Za-z_][A-Za-z0-9_]*
9//! ```
10//!
11//! The grammar is deliberately small — no wildcards, no filters, no quoted
12//! keys. It is just enough to address a field inside a typical JSONL record
13//! (e.g. `.msg`, `.steps[0].text`). Anything more expressive than this is out
14//! of scope for the v1 `--json` flag.
15
16use serde_json::Value;
17
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum Segment {
20    Key(String),
21    Index(usize),
22}
23
24/// Parse a dotted/indexed path expression into a list of segments.
25///
26/// Returns `Err` with a message pointing at the character offset on failure.
27pub fn parse_path(s: &str) -> Result<Vec<Segment>, String> {
28    if s.is_empty() {
29        return Err("empty path".to_string());
30    }
31
32    let bytes = s.as_bytes();
33    let mut segments = Vec::new();
34    let mut i = 0;
35
36    while i < bytes.len() {
37        match bytes[i] {
38            b'.' => {
39                i += 1;
40                let start = i;
41                if i >= bytes.len() {
42                    return Err(format!("expected identifier at position {i}"));
43                }
44                // First char of identifier must be [A-Za-z_].
45                if !is_ident_start(bytes[i]) {
46                    return Err(format!(
47                        "expected identifier start at position {i}, found {:?}",
48                        bytes[i] as char
49                    ));
50                }
51                i += 1;
52                while i < bytes.len() && is_ident_continue(bytes[i]) {
53                    i += 1;
54                }
55                // Safe to slice — identifier chars are ASCII.
56                let ident = &s[start..i];
57                segments.push(Segment::Key(ident.to_string()));
58            }
59            b'[' => {
60                i += 1;
61                let start = i;
62                while i < bytes.len() && bytes[i].is_ascii_digit() {
63                    i += 1;
64                }
65                if start == i {
66                    return Err(format!("expected digits at position {start}"));
67                }
68                let digits = &s[start..i];
69                if i >= bytes.len() || bytes[i] != b']' {
70                    return Err(format!("expected ']' at position {i}"));
71                }
72                let index: usize = digits
73                    .parse()
74                    .map_err(|e| format!("invalid index {digits:?}: {e}"))?;
75                i += 1; // consume ']'
76                segments.push(Segment::Index(index));
77            }
78            other => {
79                return Err(format!(
80                    "expected '.' or '[' at position {i}, found {:?}",
81                    other as char
82                ));
83            }
84        }
85    }
86
87    if segments.is_empty() {
88        return Err("empty path".to_string());
89    }
90    Ok(segments)
91}
92
93/// Walk a JSON `Value` along the given `path`. Returns `None` if any segment
94/// misses (wrong type, missing key, out-of-bounds index).
95pub fn extract<'a>(value: &'a Value, path: &[Segment]) -> Option<&'a Value> {
96    let mut cur = value;
97    for seg in path {
98        match seg {
99            Segment::Key(k) => cur = cur.as_object()?.get(k)?,
100            Segment::Index(i) => cur = cur.as_array()?.get(*i)?,
101        }
102    }
103    Some(cur)
104}
105
106fn is_ident_start(b: u8) -> bool {
107    b.is_ascii_alphabetic() || b == b'_'
108}
109
110fn is_ident_continue(b: u8) -> bool {
111    b.is_ascii_alphanumeric() || b == b'_'
112}