Skip to main content

camel_language_xpath/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use async_trait::async_trait;
4use camel_language_api::{Body, Exchange, Value};
5use camel_language_api::{Expression, Language, LanguageError, Predicate};
6use serde_json::Value as JsonValue;
7use sxd_document::parser;
8use sxd_xpath::{Context, Factory, Value as SxdValue};
9use tracing::{debug, warn};
10
11// TODO(XPH-002): sxd-xpath is unmaintained; replacement planned.
12// Input size is bounded to prevent resource exhaustion.
13
14/// Configuration for XPath evaluation.
15///
16/// # Security note (XPH-002)
17/// The underlying `sxd-xpath` crate is unmaintained. This config provides
18/// a `max_input_bytes` guard to limit resource consumption.
19///
20/// # TODO(XPH-001): Namespace support
21/// The XPath evaluation context does not yet support namespace declarations.
22/// To evaluate expressions with XML namespaces (e.g. `/soap:Envelope/soap:Body`),
23/// a namespace map (`HashMap<String, String>` mapping prefix → URI) must be
24/// added to this config and registered with the `sxd_xpath::Context` before
25/// evaluation.
26#[derive(Debug, Clone)]
27pub struct XPathConfig {
28    /// Maximum allowed XML input size in bytes. Default: 1 MiB.
29    pub max_input_bytes: Option<usize>,
30}
31
32impl Default for XPathConfig {
33    fn default() -> Self {
34        Self {
35            max_input_bytes: Some(1_048_576), // 1 MiB
36        }
37    }
38}
39
40pub struct XPathLanguage {
41    config: XPathConfig,
42}
43
44struct XPathExpression {
45    query: String,
46    config: XPathConfig,
47}
48
49struct XPathPredicate {
50    query: String,
51    config: XPathConfig,
52}
53
54fn extract_xml(exchange: &Exchange) -> Result<String, LanguageError> {
55    match &exchange.input.body {
56        Body::Xml(s) => Ok(s.clone()),
57        other => other
58            .clone()
59            .try_into_xml()
60            .map_err(|e| {
61                LanguageError::EvalError(format!("body is not XML and cannot be coerced: {e}"))
62            })
63            .and_then(|b| match b {
64                Body::Xml(s) => Ok(s),
65                _ => Err(LanguageError::EvalError(
66                    "body coercion did not produce XML".into(),
67                )),
68            }),
69    }
70}
71
72fn compile_xpath(query: &str) -> Result<sxd_xpath::XPath, LanguageError> {
73    let factory = Factory::new();
74    factory
75        .build(query)
76        .map_err(|e| {
77            warn!(error = %e, "xpath expression compile failed");
78            LanguageError::ParseError {
79                expr: query.to_string(),
80                reason: e.to_string(),
81            }
82        })
83        .and_then(|opt| {
84            opt.ok_or_else(|| {
85                warn!("xpath expression compile failed");
86                LanguageError::ParseError {
87                    expr: query.to_string(),
88                    reason: "empty XPath expression".into(),
89                }
90            })
91        })
92}
93
94fn run_query(query: &str, xml: &str, config: &XPathConfig) -> Result<JsonValue, LanguageError> {
95    if let Some(max) = config.max_input_bytes
96        && xml.len() > max
97    {
98        return Err(LanguageError::EvalError(
99            "input exceeds maximum allowed size".into(),
100        ));
101    }
102    let package = parser::parse(xml).map_err(|_| {
103        // sxd parse errors can embed document-derived content (e.g. MismatchedTag
104        // includes tag names from the exchange body which may be sensitive).
105        // Return a generic message; do NOT include the raw error in logs or errors.
106        warn!("xpath: body XML could not be parsed");
107        LanguageError::EvalError("xml parse error: body is not valid XML".to_string())
108    })?;
109    let doc = package.as_document();
110    let xpath = compile_xpath(query)?;
111    // TODO(XPH-001): Namespace declarations are not yet supported. The context
112    // should be populated with namespace prefix → URI mappings from XPathConfig
113    // before calling xpath.evaluate().
114    let context = Context::new();
115    let result = xpath.evaluate(&context, doc.root()).map_err(|_| {
116        // sxd_xpath eval errors describe query structure issues (unknown variable/function,
117        // type mismatch). No document-derived values are embedded, but we follow the
118        // same conservative pattern: generic message, no raw external error strings.
119        warn!("xpath: expression evaluation failed");
120        LanguageError::EvalError(
121            "xpath query failed: expression could not be evaluated".to_string(),
122        )
123    })?;
124
125    Ok(match result {
126        SxdValue::Nodeset(ns) => {
127            let nodes: Vec<_> = ns.document_order();
128            match nodes.len() {
129                0 => JsonValue::Null,
130                1 => JsonValue::String(nodes[0].string_value()),
131                _ => JsonValue::Array(
132                    nodes
133                        .into_iter()
134                        .map(|n| JsonValue::String(n.string_value()))
135                        .collect(),
136                ),
137            }
138        }
139        SxdValue::Boolean(b) => JsonValue::Bool(b),
140        SxdValue::Number(n) => serde_json::Number::from_f64(n)
141            .map(JsonValue::Number)
142            .unwrap_or(JsonValue::Null),
143        SxdValue::String(s) => JsonValue::String(s),
144    })
145}
146
147#[async_trait]
148impl Expression for XPathExpression {
149    async fn evaluate(&self, exchange: &Exchange) -> Result<Value, LanguageError> {
150        let xml = extract_xml(exchange)?;
151        run_query(&self.query, &xml, &self.config)
152    }
153}
154
155#[async_trait]
156impl Predicate for XPathPredicate {
157    async fn matches(&self, exchange: &Exchange) -> Result<bool, LanguageError> {
158        let xml = extract_xml(exchange)?;
159        let result = run_query(&self.query, &xml, &self.config)?;
160        Ok(match &result {
161            JsonValue::Null => false,
162            JsonValue::Bool(b) => *b,
163            JsonValue::Number(n) => n.as_f64().is_some_and(|f| f != 0.0),
164            JsonValue::String(s) => !s.is_empty(),
165            JsonValue::Array(arr) => !arr.is_empty(),
166            _ => true,
167        })
168    }
169}
170
171impl Default for XPathLanguage {
172    fn default() -> Self {
173        Self::new()
174    }
175}
176
177impl XPathLanguage {
178    /// Create an XPathLanguage::new() with the default configuration (1 MiB input limit).
179    pub fn new() -> Self {
180        Self::with_config(XPathConfig::default())
181    }
182
183    /// Create an XPathLanguage::new() with a custom configuration.
184    pub fn with_config(config: XPathConfig) -> Self {
185        Self { config }
186    }
187}
188
189impl Language for XPathLanguage {
190    fn name(&self) -> &'static str {
191        "xpath"
192    }
193
194    fn create_expression(&self, script: &str) -> Result<Box<dyn Expression>, LanguageError> {
195        compile_xpath(script)?;
196        debug!("xpath expression compiled");
197        Ok(Box::new(XPathExpression {
198            query: script.to_string(),
199            config: self.config.clone(),
200        }))
201    }
202
203    fn create_predicate(&self, script: &str) -> Result<Box<dyn Predicate>, LanguageError> {
204        compile_xpath(script)?;
205        debug!("xpath expression compiled");
206        Ok(Box::new(XPathPredicate {
207            query: script.to_string(),
208            config: self.config.clone(),
209        }))
210    }
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use camel_language_api::Message;
217
218    async fn exchange_with_xml(xml: &str) -> Exchange {
219        Exchange::new(Message::new(Body::Xml(xml.to_string())))
220    }
221
222    async fn exchange_with_text_body(text: &str) -> Exchange {
223        Exchange::new(Message::new(Body::Text(text.to_string())))
224    }
225
226    async fn empty_exchange() -> Exchange {
227        Exchange::new(Message::default())
228    }
229
230    #[tokio::test]
231    async fn expression_simple_path() {
232        let lang = XPathLanguage::new();
233        let expr = lang.create_expression("/root/name").unwrap();
234        let ex = exchange_with_xml("<root><name>books</name></root>").await;
235        let result = expr.evaluate(&ex).await.unwrap();
236        assert_eq!(result, JsonValue::String("books".to_string()));
237    }
238
239    #[tokio::test]
240    async fn expression_nested_path() {
241        let lang = XPathLanguage::new();
242        let expr = lang.create_expression("/root/inner/value").unwrap();
243        let ex = exchange_with_xml("<root><inner><value>42</value></inner></root>").await;
244        let result = expr.evaluate(&ex).await.unwrap();
245        assert_eq!(result, JsonValue::String("42".to_string()));
246    }
247
248    #[tokio::test]
249    async fn expression_attribute_access() {
250        let lang = XPathLanguage::new();
251        let expr = lang.create_expression("/root/item/@id").unwrap();
252        let ex = exchange_with_xml("<root><item id=\"123\"/></root>").await;
253        let result = expr.evaluate(&ex).await.unwrap();
254        assert_eq!(result, JsonValue::String("123".to_string()));
255    }
256
257    #[tokio::test]
258    async fn expression_text_function() {
259        let lang = XPathLanguage::new();
260        let expr = lang.create_expression("/root/name/text()").unwrap();
261        let ex = exchange_with_xml("<root><name>hello</name></root>").await;
262        let result = expr.evaluate(&ex).await.unwrap();
263        assert_eq!(result, JsonValue::String("hello".to_string()));
264    }
265
266    #[tokio::test]
267    async fn expression_wildcard() {
268        let lang = XPathLanguage::new();
269        let expr = lang.create_expression("/root/item").unwrap();
270        let ex = exchange_with_xml("<root><item>a</item><item>b</item></root>").await;
271        let result = expr.evaluate(&ex).await.unwrap();
272        assert_eq!(
273            result,
274            JsonValue::Array(vec![
275                JsonValue::String("a".to_string()),
276                JsonValue::String("b".to_string()),
277            ])
278        );
279    }
280
281    #[tokio::test]
282    async fn expression_predicate_position() {
283        let lang = XPathLanguage::new();
284        let expr = lang.create_expression("/root/item[2]").unwrap();
285        let ex = exchange_with_xml("<root><item>a</item><item>b</item><item>c</item></root>").await;
286        let result = expr.evaluate(&ex).await.unwrap();
287        assert_eq!(result, JsonValue::String("b".to_string()));
288    }
289
290    #[tokio::test]
291    async fn expression_count_function() {
292        let lang = XPathLanguage::new();
293        let expr = lang.create_expression("count(/root/item)").unwrap();
294        let ex = exchange_with_xml("<root><item>a</item><item>b</item></root>").await;
295        let result = expr.evaluate(&ex).await.unwrap();
296        assert_eq!(
297            result,
298            JsonValue::Number(serde_json::Number::from_f64(2.0).unwrap())
299        );
300    }
301
302    #[tokio::test]
303    async fn expression_text_body_with_valid_xml() {
304        let lang = XPathLanguage::new();
305        let expr = lang.create_expression("/root/value").unwrap();
306        let ex = exchange_with_text_body("<root><value>test</value></root>").await;
307        let result = expr.evaluate(&ex).await.unwrap();
308        assert_eq!(result, JsonValue::String("test".to_string()));
309    }
310
311    #[tokio::test]
312    async fn expression_text_body_with_invalid_xml() {
313        let lang = XPathLanguage::new();
314        let expr = lang.create_expression("/root").unwrap();
315        let ex = exchange_with_text_body("not xml at all").await;
316        let result = expr.evaluate(&ex).await;
317        assert!(result.is_err());
318    }
319
320    #[tokio::test]
321    async fn expression_empty_body_is_error() {
322        let lang = XPathLanguage::new();
323        let expr = lang.create_expression("/root").unwrap();
324        let ex = empty_exchange().await;
325        let result = expr.evaluate(&ex).await;
326        assert!(result.is_err());
327    }
328
329    #[tokio::test]
330    async fn expression_empty_result_is_null() {
331        let lang = XPathLanguage::new();
332        let expr = lang.create_expression("/root/missing").unwrap();
333        let ex = exchange_with_xml("<root><name>test</name></root>").await;
334        let result = expr.evaluate(&ex).await.unwrap();
335        assert_eq!(result, JsonValue::Null);
336    }
337
338    #[tokio::test]
339    async fn expression_invalid_xpath_syntax() {
340        let lang = XPathLanguage::new();
341        let result = lang.create_expression("//[invalid");
342        let err = match result {
343            Err(e) => e,
344            Ok(_) => panic!("expected ParseError"),
345        };
346        match err {
347            LanguageError::ParseError { expr, reason } => {
348                assert!(!expr.is_empty());
349                assert!(!reason.is_empty());
350            }
351            other => panic!("expected ParseError, got {other:?}"),
352        }
353    }
354
355    #[tokio::test]
356    async fn predicate_non_empty_nodeset_is_true() {
357        let lang = XPathLanguage::new();
358        let pred = lang.create_predicate("/root/item").unwrap();
359        let ex = exchange_with_xml("<root><item>a</item><item>b</item></root>").await;
360        assert!(pred.matches(&ex).await.unwrap());
361    }
362
363    #[tokio::test]
364    async fn predicate_empty_result_is_false() {
365        let lang = XPathLanguage::new();
366        let pred = lang.create_predicate("/root/missing").unwrap();
367        let ex = exchange_with_xml("<root><name>test</name></root>").await;
368        assert!(!pred.matches(&ex).await.unwrap());
369    }
370
371    #[tokio::test]
372    async fn predicate_boolean_expression() {
373        let lang = XPathLanguage::new();
374        let pred = lang.create_predicate("count(/root/item) > 2").unwrap();
375        let ex = exchange_with_xml("<root><item>a</item><item>b</item><item>c</item></root>").await;
376        assert!(pred.matches(&ex).await.unwrap());
377    }
378
379    #[tokio::test]
380    async fn predicate_numeric_comparison_false() {
381        let lang = XPathLanguage::new();
382        let pred = lang.create_predicate("count(/root/item) > 5").unwrap();
383        let ex = exchange_with_xml("<root><item>a</item></root>").await;
384        assert!(!pred.matches(&ex).await.unwrap());
385    }
386
387    #[tokio::test]
388    async fn expression_rejects_oversized_input() {
389        let lang = XPathLanguage::with_config(XPathConfig {
390            max_input_bytes: Some(100),
391        });
392        let expr = lang.create_expression("/root").unwrap();
393        let big_xml = format!("<root>{}</root>", "x".repeat(200));
394        let ex = exchange_with_xml(&big_xml).await;
395        let result = expr.evaluate(&ex).await;
396        assert!(result.is_err());
397        match result.unwrap_err() {
398            LanguageError::EvalError(msg) => {
399                assert!(msg.contains("input exceeds maximum allowed size"));
400            }
401            other => panic!("expected EvalError, got {other:?}"),
402        }
403    }
404
405    #[tokio::test]
406    async fn predicate_rejects_oversized_input() {
407        let lang = XPathLanguage::with_config(XPathConfig {
408            max_input_bytes: Some(100),
409        });
410        let pred = lang.create_predicate("/root").unwrap();
411        let big_xml = format!("<root>{}</root>", "x".repeat(200));
412        let ex = exchange_with_xml(&big_xml).await;
413        let result = pred.matches(&ex).await;
414        assert!(result.is_err());
415    }
416}