etree/
xpath.rs

1/// XPath parser
2///
3/// Grammar rules:
4/// ```text
5/// xpath:
6///     element
7///     separator element
8///     xpath separator element
9/// separator:
10///     //
11///     /
12/// element:
13///     ..
14///     .
15///     @name
16///     name [ conditions_or ]
17///     name [ index ]
18///     * [ conditions_or ]
19///     * [ index ]
20///     *
21///     name
22/// conditions_or:
23///     conditions_and or conditions_and
24///     conditions_and
25/// conditions_and:
26///     condition and condition
27///     condition
28/// condition:
29///     name operator string
30///     @name operator string
31///     text() operator string
32///     position() operator decimal
33///     name
34///     @name
35///     @*
36///     ( condition )
37///     ( conditions_and )
38///     ( conditions_or )
39/// index:
40///     decimal
41///     last() - decimal
42///     last()
43/// operator:
44///     >=
45///     <=
46///     >
47///     <
48///     !=
49///     =
50/// ```
51use std::collections::{HashSet, HashMap};
52use nom::{
53    IResult,
54    bytes::complete::{tag, escaped},
55    character::complete::{one_of, none_of, char, anychar, space0, space1, alpha1, alphanumeric1, digit1},
56    branch::alt,
57    sequence::{pair, tuple, delimited},
58    multi::{many0, many0_count},
59    combinator::{recognize, opt, map, value},
60};
61
62#[allow(dead_code)]
63#[derive(Debug, PartialEq)]
64pub struct XPathSegment {
65    pub separator: String,
66    pub node: String,
67    pub condition: Predictor,
68}
69
70#[allow(dead_code)]
71#[derive(Debug, PartialEq)]
72pub enum Predictor {
73    And(Box<Predictor>, Box<Predictor>),
74    Or(Box<Predictor>, Box<Predictor>),
75    Condition(String, Option<String>, Option<String>),
76    IndexDecimal(String),
77    IndexExpr(String, String),
78    None,
79}
80
81impl Predictor {
82    #[allow(dead_code)]
83    pub fn collect(&self) -> (Vec<String>, Vec<String>, Vec<String>) {
84        let mut child = HashSet::new();
85        let mut attr = HashSet::new();
86        let mut func = HashSet::new();
87        match self {
88            Predictor::And(ref left, ref right) => {
89                let (c1, a1, f1) = left.collect();
90                child.extend(c1);
91                attr.extend(a1);
92                func.extend(f1);
93                let (c2, a2, f2) = right.collect();
94                child.extend(c2);
95                attr.extend(a2);
96                func.extend(f2);
97            },
98            Predictor::Or(ref left, ref right) => {
99                let (c1, a1, f1) = left.collect();
100                child.extend(c1);
101                attr.extend(a1);
102                func.extend(f1);
103                let (c2, a2, f2) = right.collect();
104                child.extend(c2);
105                attr.extend(a2);
106                func.extend(f2);
107            },
108            Predictor::Condition(ref left, _, _) => {
109                if left.starts_with("@") {
110                    attr.insert(left.get(1..).unwrap().to_string());
111                } else if left.ends_with("()") {
112                    func.insert(left.to_string());
113                } else {
114                    child.insert(left.to_string());
115                }
116            },
117            Predictor::IndexExpr(_, _) => {
118                func.insert("last()".to_string());
119            },
120            _ => {}
121        }
122        let mut child:Vec<_> = child.into_iter().collect();
123        let mut attr:Vec<_> = attr.into_iter().collect();
124        let mut func:Vec<_> = func.into_iter().collect();
125        child.sort();
126        attr.sort();
127        func.sort();
128        (child, attr, func)
129    }
130    #[allow(dead_code)]
131    pub fn expr(&self, info:&HashMap<String, String>) -> String {
132        match self {
133            Predictor::And(ref left, ref right) => {
134                format!("({}) && ({})", left.expr(info), right.expr(info))
135            },
136            Predictor::Or(ref left, ref right) => {
137                format!("({}) || ({})", left.expr(info), right.expr(info))
138            },
139            Predictor::Condition(ref left, ref op, ref right) => {
140                if info.contains_key(left) {
141                    if op.is_none() || right.is_none() {
142                        "true".to_string()
143                    } else {
144                        format!("'{}' {} {}", escape_info(info.get(left).unwrap()).unwrap().1, op.as_ref().unwrap(), right.as_ref().unwrap())
145                    }
146                } else {
147                    "false".to_string()
148                }
149            },
150            Predictor::IndexDecimal(ref left) => {
151                debug_assert!(info.contains_key("position()"));
152                format!("{} == {}", info.get("position()").unwrap(), left)
153            },
154            Predictor::IndexExpr(ref left, ref right) => {
155                debug_assert!(info.contains_key("position()"));
156                debug_assert!(info.contains_key("last()"));
157                if right == "" {
158                    format!("{} == {}", info.get("position()").unwrap(), info.get(left).unwrap())
159                } else {
160                    format!("{} == {} - {}", info.get("position()").unwrap(), info.get(left).unwrap(), right)
161                }
162            },
163            _ => {
164                "true".to_string()
165            }
166        }
167    }
168}
169
170fn escape_info(input:&str) -> IResult<&str, String> {
171    map(
172        many0(alt((
173            value("\\\\".to_string(), char('\\')),
174            value("\\'".to_string(), char('\'')),
175            map(anychar, |c| c.to_string()),
176        ))), |v| v.join("")
177    )(input)
178}
179fn decimal(input:&str) -> IResult<&str, &str> {
180    digit1(input)
181}
182
183fn name(input:&str) -> IResult<&str, &str> {
184    recognize(pair(
185            alt((alpha1, tag("_"), tag(":"))),
186            many0_count(alt((alphanumeric1, tag("_"), tag(":"), tag("-"), tag(".")))),
187    ))(input)
188}
189
190fn separator(input:&str) -> IResult<&str, &str> {
191    alt((
192            tag("//"),
193            tag("/"),
194    ))(input)
195}
196
197fn operator(input:&str) -> IResult<&str, &str> {
198    alt((
199            tag(">="),
200            tag("<="),
201            tag(">"),
202            tag("<"),
203            tag("!="),
204            value("==", tag("=")),
205    ))(input)
206}
207
208fn string(input:&str) -> IResult<&str, &str> {
209    recognize(delimited(
210            tag("'"),
211            many0_count(escaped(none_of("'\\"), '\\', one_of(r#"\'"#))),
212            tag("'"),
213    ))(input)
214}
215
216fn index(input:&str) -> IResult<&str, Predictor> {
217    alt((
218            map(decimal, |t| Predictor::IndexDecimal(t.to_string())),
219            map(tuple((tag("last()"), space0, tag("-"), space0, decimal)), |t| Predictor::IndexExpr(t.0.to_string(), t.4.to_string())),
220            map(tag("last()"), |t:&str| Predictor::IndexExpr(t.to_string(), "".to_string())),
221    ))(input)
222}
223
224fn condition(input:&str) -> IResult<&str, Predictor> {
225    alt((
226            map(tuple((name, space0, operator, space0, string)), |t| Predictor::Condition(t.0.to_string(), Some(t.2.to_string()), Some(t.4.to_string()))),
227            map(tuple((tag("@"), name, space0, operator, space0, string)), |t| Predictor::Condition(format!("@{}", t.1), Some(t.3.to_string()), Some(t.5.to_string()))),
228            map(tuple((tag("text()"), space0, operator, space0, string)), |t| Predictor::Condition(t.0.to_string(), Some(t.2.to_string()), Some(t.4.to_string()))),
229            map(tuple((tag("position()"), space0, operator, space0, decimal)), |t| Predictor::Condition(t.0.to_string(), Some(t.2.to_string()), Some(t.4.to_string()))),
230            map(name, |t| Predictor::Condition(t.to_string(), None, None)),
231            map(pair(tag("@"), name), |t| Predictor::Condition(format!("{}{}", t.0, t.1), None, None)),
232            map(tag("@*"), |t:&str| Predictor::Condition(t.to_string(), None, None)),
233            map(tuple((tag("("), space0, condition, space0, tag(")"))), |t| t.2),
234            map(tuple((tag("("), space0, conditions_and, space0, tag(")"))), |t| t.2),
235            map(tuple((tag("("), space0, conditions_or, space0, tag(")"))), |t| t.2),
236    ))(input)
237}
238
239fn conditions_and(input:&str) -> IResult<&str, Predictor> {
240    alt((
241            map(tuple((condition, space1, tag("and"), space1, condition)), |t| Predictor::And(Box::new(t.0), Box::new(t.4))),
242            condition,
243    ))(input)
244}
245
246fn conditions_or(input:&str) -> IResult<&str, Predictor> {
247    alt((
248            map(tuple((conditions_and, space1, tag("or"), space1, conditions_and)), |t| Predictor::Or(Box::new(t.0), Box::new(t.4))),
249            conditions_and,
250    ))(input)
251}
252
253fn element(input:&str) -> IResult<&str, XPathSegment> {
254    alt((
255            map(tag(".."), |t:&str| XPathSegment {
256                separator: "".to_string(),
257                node: t.to_string(),
258                condition: Predictor::None,
259            }),
260            map(tag("."), |t:&str| XPathSegment {
261                separator: "".to_string(),
262                node: t.to_string(),
263                condition: Predictor::None,
264            }),
265            map(recognize(pair(tag("@"), name)), |t| XPathSegment {
266                separator: "".to_string(),
267                node: "*".to_string(),
268                condition: Predictor::Condition(t.to_string(), None, None),
269            }),
270            map(tuple((name, tag("["), space0, conditions_or, space0, tag("]"))), |t| XPathSegment {
271                separator: "".to_string(),
272                node: t.0.to_string(),
273                condition: t.3,
274            }),
275            map(tuple((name, tag("["), space0, index, space0, tag("]"))), |t| XPathSegment {
276                separator: "".to_string(),
277                node: t.0.to_string(),
278                condition: t.3,
279            }),
280            map(tuple((tag("*["), space0, conditions_or, space0, tag("]"))), |t| XPathSegment {
281                separator: "".to_string(),
282                node: "*".to_string(),
283                condition: t.2,
284            }),
285            map(tuple((tag("*["), space0, index, space0, tag("]"))), |t| XPathSegment {
286                separator: "".to_string(),
287                node: "*".to_string(),
288                condition: t.2,
289            }),
290            map(tag("*"), |t:&str| XPathSegment {
291                separator: "".to_string(),
292                node: t.to_string(),
293                condition: Predictor::None,
294            }),
295            map(name, |t| XPathSegment {
296                separator: "".to_string(),
297                node: t.to_string(),
298                condition: Predictor::None,
299            }),
300    ))(input)
301}
302
303#[allow(dead_code)]
304pub fn xpath(input:&str) -> IResult<&str, Vec<XPathSegment>> {
305    let (remaining, initial) = opt(element)(input)?;
306    let mut segments = Vec::new();
307    if let Some(data) = initial {
308        segments.push(data);
309    }
310    let (remaining, parts) = many0(map(pair(separator, element), |mut t| {
311        t.1.separator = t.0.to_string();
312        t.1
313    }))(remaining)?;
314    segments.extend(parts);
315    Ok((remaining, segments))
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321    #[test]
322    fn test_decimal() {
323        assert_eq!(decimal("1234a"), Ok(("a", "1234")));
324    }
325    #[test]
326    fn test_name() {
327        assert_eq!(name("h:a12 u"), Ok((" u", "h:a12")));
328        assert_eq!(name("_a12[u"), Ok(("[u", "_a12")));
329        assert!(name("1a12 u").is_err());
330    }
331    #[test]
332    fn test_separator() {
333        assert_eq!(separator("/a"), Ok(("a", "/")));
334        assert_eq!(separator("//a"), Ok(("a", "//")));
335        assert_eq!(separator("///a"), Ok(("/a", "//")));
336    }
337    #[test]
338    fn test_operator() {
339        assert_eq!(operator(">=a"), Ok(("a", ">=")));
340        assert_eq!(operator("<=a"), Ok(("a", "<=")));
341        assert_eq!(operator(">a"), Ok(("a", ">")));
342        assert_eq!(operator("<a"), Ok(("a", "<")));
343        assert_eq!(operator("!=a"), Ok(("a", "!=")));
344        assert_eq!(operator("=a"), Ok(("a", "==")));
345    }
346    #[test]
347    fn test_string() {
348        assert_eq!(string("'ab''"), Ok(("'", "'ab'")));
349        assert_eq!(string(r"'ab\'''"), Ok(("'", r"'ab\''")));
350    }
351    #[test]
352    fn test_index() {
353        assert_eq!(index("2a"), Ok(("a", Predictor::IndexDecimal("2".to_string()))));
354        assert_eq!(index("last()a"), Ok(("a", Predictor::IndexExpr("last()".to_string(), "".to_string()))));
355        assert_eq!(index("last()- 2a"), Ok(("a", Predictor::IndexExpr("last()".to_string(), "2".to_string()))));
356    }
357    #[test]
358    fn test_condition() {
359        assert_eq!(condition("child_node"), Ok(("", Predictor::Condition("child_node".to_string(), None, None))));
360        assert_eq!(condition("child_node= 'aa'"), Ok(("", Predictor::Condition("child_node".to_string(), Some("==".to_string()), Some("'aa'".to_string())))));
361        assert_eq!(condition("@*a"), Ok(("a", Predictor::Condition("@*".to_string(), None, None))));
362        assert_eq!(condition("@attr"), Ok(("", Predictor::Condition("@attr".to_string(), None, None))));
363        assert_eq!(condition("@attr  = 'aa'"), Ok(("", Predictor::Condition("@attr".to_string(), Some("==".to_string()), Some("'aa'".to_string())))));
364        assert_eq!(condition("text()!= 'aa'"), Ok(("", Predictor::Condition("text()".to_string(), Some("!=".to_string()), Some("'aa'".to_string())))));
365        assert_eq!(condition("position()>= 7a"), Ok(("a", Predictor::Condition("position()".to_string(), Some(">=".to_string()), Some("7".to_string())))));
366        assert_eq!(condition("(position()>= 7 )a"), Ok(("a", Predictor::Condition("position()".to_string(), Some(">=".to_string()), Some("7".to_string())))));
367    }
368    #[test]
369    fn test_conditions_or() {
370        assert_eq!(conditions_or("@attr  = 'aa'"), Ok(("", Predictor::Condition("@attr".to_string(), Some("==".to_string()), Some("'aa'".to_string())))));
371        assert_eq!(conditions_or("text()!= 'aa'"), Ok(("", Predictor::Condition("text()".to_string(), Some("!=".to_string()), Some("'aa'".to_string())))));
372        assert_eq!(conditions_or("child_node and @attr)"), Ok((")", Predictor::And(
373                Box::new(Predictor::Condition("child_node".to_string(), None, None)),
374                Box::new(Predictor::Condition("@attr".to_string(), None, None)),
375                ))));
376        assert_eq!(conditions_or("text()='aa' or child_node and @attr)"), Ok((")", Predictor::Or(
377                Box::new(Predictor::Condition("text()".to_string(), Some("==".to_string()), Some("'aa'".to_string()))),
378                Box::new(Predictor::And(
379                        Box::new(Predictor::Condition("child_node".to_string(), None, None)),
380                        Box::new(Predictor::Condition("@attr".to_string(), None, None)),
381                        )),
382                ))));
383    }
384    #[test]
385    fn test_xpath() {
386        assert_eq!(xpath("@id"), Ok(("", vec![
387                    XPathSegment {
388                        separator:"".to_string(),
389                        node:"*".to_string(),
390                        condition:Predictor::Condition("@id".to_string(), None, None)
391                    },
392        ])));
393        assert_eq!(xpath("//NODE[@oid and @attrcatref='abc']"), Ok(("", vec![
394                    XPathSegment {
395                        separator:"//".to_string(),
396                        node:"NODE".to_string(),
397                        condition:Predictor::And(
398                            Box::new(Predictor::Condition("@oid".to_string(), None, None)),
399                            Box::new(Predictor::Condition("@attrcatref".to_string(), Some("==".to_string()), Some("'abc'".to_string()))),
400                        )
401                    },
402        ])));
403        assert_eq!(xpath(".//NAME/TUV"), Ok(("", vec![
404                    XPathSegment {
405                        separator:"".to_string(),
406                        node:".".to_string(),
407                        condition:Predictor::None
408                    },
409                    XPathSegment {
410                        separator:"//".to_string(),
411                        node:"NAME".to_string(),
412                        condition:Predictor::None
413                    },
414                    XPathSegment {
415                        separator:"/".to_string(),
416                        node:"TUV".to_string(),
417                        condition:Predictor::None
418                    },
419        ])));
420    }
421    #[test]
422    fn test_predictor_expr() {
423        let (remaining, segs) = xpath(".//NAME[text()='aa' and (@id='bb' or @gid)]").unwrap();
424        assert_eq!(remaining, "");
425        assert_eq!(segs.len(), 2);
426        assert_eq!(segs[1].condition.collect(), (
427                Vec::new(),
428                vec!["gid".to_string(), "id".to_string()],
429                vec!["text()".to_string(),],
430        ));
431        let mut info = HashMap::new();
432        info.insert("text()".to_string(), "aaa".to_string());
433        info.insert("@id".to_string(), "123".to_string());
434        assert_eq!(segs[1].condition.expr(&info), "('aaa' == 'aa') && (('123' == 'bb') || (false))")
435    }
436    #[test]
437    fn test_escape_info() {
438        assert_eq!(escape_info("ab'c"), Ok(("", "ab\\'c".to_string())));
439        assert_eq!(escape_info("ab\\c"), Ok(("", "ab\\\\c".to_string())));
440    }
441}