Skip to main content

rsigma_runtime/sources/
extract.rs

1//! Expression-based data extraction for dynamic sources.
2//!
3//! Supports three extraction languages with dual syntax:
4//! - Plain string: always jq (the common case)
5//! - Structured object `{ expr, type }`: explicit language selection
6//!
7//! Supported types: `jq` (via jaq), `jsonpath` (via serde_json_path), `cel` (via cel-interpreter).
8
9use rsigma_eval::pipeline::sources::ExtractExpr;
10
11use super::{SourceError, SourceErrorKind};
12
13/// Apply a typed extract expression to parsed source data.
14pub fn apply_extract(
15    data: &serde_json::Value,
16    expr: &ExtractExpr,
17) -> Result<serde_json::Value, SourceError> {
18    match expr {
19        ExtractExpr::Jq(e) => apply_jq(data, e),
20        ExtractExpr::JsonPath(e) => apply_jsonpath(data, e),
21        ExtractExpr::Cel(e) => apply_cel(data, e),
22    }
23}
24
25/// Apply a jq expression using jaq.
26fn apply_jq(data: &serde_json::Value, expr: &str) -> Result<serde_json::Value, SourceError> {
27    use jaq_interpret::{Ctx, FilterT, RcIter, Val};
28
29    let mut defs = jaq_interpret::ParseCtx::new(vec![]);
30    let (filter, errs) = jaq_parse::parse(expr, jaq_parse::main());
31
32    if !errs.is_empty() || filter.is_none() {
33        return Err(SourceError {
34            source_id: String::new(),
35            kind: SourceErrorKind::Extract(format!("invalid jq expression: {expr}")),
36        });
37    }
38
39    let filter = defs.compile(filter.unwrap());
40    let inputs = RcIter::new(std::iter::empty());
41    let val = Val::from(data.clone());
42
43    let ctx = Ctx::new([], &inputs);
44    let results: Vec<Val> = filter
45        .run((ctx, val))
46        .collect::<Result<Vec<_>, _>>()
47        .map_err(|e| SourceError {
48            source_id: String::new(),
49            kind: SourceErrorKind::Extract(format!("jq execution error: {e}")),
50        })?;
51
52    match results.len() {
53        0 => Ok(serde_json::Value::Null),
54        1 => Ok(val_to_json(&results[0])),
55        _ => {
56            let arr: Vec<serde_json::Value> = results.iter().map(val_to_json).collect();
57            Ok(serde_json::Value::Array(arr))
58        }
59    }
60}
61
62/// Apply a JSONPath expression using jsonpath-rust.
63fn apply_jsonpath(data: &serde_json::Value, expr: &str) -> Result<serde_json::Value, SourceError> {
64    use jsonpath_rust::JsonPath;
65
66    let results = data.query(expr).map_err(|e| SourceError {
67        source_id: String::new(),
68        kind: SourceErrorKind::Extract(format!("invalid JSONPath expression: {e}")),
69    })?;
70
71    match results.len() {
72        0 => Ok(serde_json::Value::Null),
73        1 => Ok(results[0].clone()),
74        _ => {
75            let arr: Vec<serde_json::Value> = results.into_iter().cloned().collect();
76            Ok(serde_json::Value::Array(arr))
77        }
78    }
79}
80
81/// Apply a CEL expression using the `cel` crate (cel-rust).
82///
83/// The resolved source data is bound as the CEL variable `data`.
84fn apply_cel(data: &serde_json::Value, expr: &str) -> Result<serde_json::Value, SourceError> {
85    use cel::{Context, Program};
86
87    let program = Program::compile(expr).map_err(|e| SourceError {
88        source_id: String::new(),
89        kind: SourceErrorKind::Extract(format!("invalid CEL expression: {e}")),
90    })?;
91
92    let mut context = Context::default();
93    let cel_value = json_to_cel(data);
94    let _ = context.add_variable("data", cel_value);
95
96    let result = program.execute(&context).map_err(|e| SourceError {
97        source_id: String::new(),
98        kind: SourceErrorKind::Extract(format!("CEL execution error: {e}")),
99    })?;
100
101    Ok(cel_to_json(&result))
102}
103
104/// Convert a serde_json::Value to a cel::Value.
105fn json_to_cel(json: &serde_json::Value) -> cel::Value {
106    match json {
107        serde_json::Value::Null => cel::Value::Null,
108        serde_json::Value::Bool(b) => (*b).into(),
109        serde_json::Value::Number(n) => {
110            if let Some(i) = n.as_i64() {
111                i.into()
112            } else if let Some(u) = n.as_u64() {
113                u.into()
114            } else if let Some(f) = n.as_f64() {
115                f.into()
116            } else {
117                cel::Value::Null
118            }
119        }
120        serde_json::Value::String(s) => s.as_str().into(),
121        serde_json::Value::Array(arr) => {
122            let items: Vec<cel::Value> = arr.iter().map(json_to_cel).collect();
123            items.into()
124        }
125        serde_json::Value::Object(map) => {
126            let cel_map: std::collections::HashMap<cel::objects::Key, cel::Value> = map
127                .iter()
128                .map(|(k, v)| (k.as_str().into(), json_to_cel(v)))
129                .collect();
130            cel_map.into()
131        }
132    }
133}
134
135/// Convert a cel::Value back to serde_json::Value.
136fn cel_to_json(val: &cel::Value) -> serde_json::Value {
137    match val {
138        cel::Value::Null => serde_json::Value::Null,
139        cel::Value::Bool(b) => serde_json::Value::Bool(*b),
140        cel::Value::Int(i) => serde_json::json!(i),
141        cel::Value::UInt(u) => serde_json::json!(u),
142        cel::Value::Float(f) => serde_json::json!(f),
143        cel::Value::String(s) => serde_json::Value::String(s.to_string()),
144        cel::Value::List(list) => {
145            let arr: Vec<serde_json::Value> = list.iter().map(cel_to_json).collect();
146            serde_json::Value::Array(arr)
147        }
148        cel::Value::Map(map) => {
149            let mut obj = serde_json::Map::new();
150            for (k, v) in map.map.iter() {
151                let key = match k {
152                    cel::objects::Key::String(s) => s.to_string(),
153                    cel::objects::Key::Int(i) => i.to_string(),
154                    cel::objects::Key::Uint(u) => u.to_string(),
155                    cel::objects::Key::Bool(b) => b.to_string(),
156                };
157                obj.insert(key, cel_to_json(v));
158            }
159            serde_json::Value::Object(obj)
160        }
161        _ => serde_json::Value::String(format!("{val:?}")),
162    }
163}
164
165/// Convert a jaq `Val` to a `serde_json::Value`.
166fn val_to_json(val: &jaq_interpret::Val) -> serde_json::Value {
167    match val {
168        jaq_interpret::Val::Null => serde_json::Value::Null,
169        jaq_interpret::Val::Bool(b) => serde_json::Value::Bool(*b),
170        jaq_interpret::Val::Int(i) => serde_json::json!(i),
171        jaq_interpret::Val::Float(f) => serde_json::json!(f),
172        jaq_interpret::Val::Num(n) => {
173            if let Ok(i) = n.parse::<i64>() {
174                serde_json::json!(i)
175            } else if let Ok(f) = n.parse::<f64>() {
176                serde_json::json!(f)
177            } else {
178                serde_json::Value::String(n.to_string())
179            }
180        }
181        jaq_interpret::Val::Str(s) => serde_json::Value::String(s.to_string()),
182        jaq_interpret::Val::Arr(arr) => {
183            serde_json::Value::Array(arr.iter().map(val_to_json).collect())
184        }
185        jaq_interpret::Val::Obj(obj) => {
186            let map: serde_json::Map<String, serde_json::Value> = obj
187                .iter()
188                .map(|(k, v)| (k.to_string(), val_to_json(v)))
189                .collect();
190            serde_json::Value::Object(map)
191        }
192    }
193}