1use std::collections::{HashSet, HashMap};
52use nom::{
53 IResult,
54 bytes::complete::{tag, escaped},
55 character::complete::{one_of, none_of, char, anychar, space0, space1, alpha1, alphanumeric1, digit1},
56 branch::alt,
57 sequence::{pair, tuple, delimited},
58 multi::{many0, many0_count},
59 combinator::{recognize, opt, map, value},
60};
61
62#[allow(dead_code)]
63#[derive(Debug, PartialEq)]
64pub struct XPathSegment {
65 pub separator: String,
66 pub node: String,
67 pub condition: Predictor,
68}
69
70#[allow(dead_code)]
71#[derive(Debug, PartialEq)]
72pub enum Predictor {
73 And(Box<Predictor>, Box<Predictor>),
74 Or(Box<Predictor>, Box<Predictor>),
75 Condition(String, Option<String>, Option<String>),
76 IndexDecimal(String),
77 IndexExpr(String, String),
78 None,
79}
80
81impl Predictor {
82 #[allow(dead_code)]
83 pub fn collect(&self) -> (Vec<String>, Vec<String>, Vec<String>) {
84 let mut child = HashSet::new();
85 let mut attr = HashSet::new();
86 let mut func = HashSet::new();
87 match self {
88 Predictor::And(ref left, ref right) => {
89 let (c1, a1, f1) = left.collect();
90 child.extend(c1);
91 attr.extend(a1);
92 func.extend(f1);
93 let (c2, a2, f2) = right.collect();
94 child.extend(c2);
95 attr.extend(a2);
96 func.extend(f2);
97 },
98 Predictor::Or(ref left, ref right) => {
99 let (c1, a1, f1) = left.collect();
100 child.extend(c1);
101 attr.extend(a1);
102 func.extend(f1);
103 let (c2, a2, f2) = right.collect();
104 child.extend(c2);
105 attr.extend(a2);
106 func.extend(f2);
107 },
108 Predictor::Condition(ref left, _, _) => {
109 if left.starts_with("@") {
110 attr.insert(left.get(1..).unwrap().to_string());
111 } else if left.ends_with("()") {
112 func.insert(left.to_string());
113 } else {
114 child.insert(left.to_string());
115 }
116 },
117 Predictor::IndexExpr(_, _) => {
118 func.insert("last()".to_string());
119 },
120 _ => {}
121 }
122 let mut child:Vec<_> = child.into_iter().collect();
123 let mut attr:Vec<_> = attr.into_iter().collect();
124 let mut func:Vec<_> = func.into_iter().collect();
125 child.sort();
126 attr.sort();
127 func.sort();
128 (child, attr, func)
129 }
130 #[allow(dead_code)]
131 pub fn expr(&self, info:&HashMap<String, String>) -> String {
132 match self {
133 Predictor::And(ref left, ref right) => {
134 format!("({}) && ({})", left.expr(info), right.expr(info))
135 },
136 Predictor::Or(ref left, ref right) => {
137 format!("({}) || ({})", left.expr(info), right.expr(info))
138 },
139 Predictor::Condition(ref left, ref op, ref right) => {
140 if info.contains_key(left) {
141 if op.is_none() || right.is_none() {
142 "true".to_string()
143 } else {
144 format!("'{}' {} {}", escape_info(info.get(left).unwrap()).unwrap().1, op.as_ref().unwrap(), right.as_ref().unwrap())
145 }
146 } else {
147 "false".to_string()
148 }
149 },
150 Predictor::IndexDecimal(ref left) => {
151 debug_assert!(info.contains_key("position()"));
152 format!("{} == {}", info.get("position()").unwrap(), left)
153 },
154 Predictor::IndexExpr(ref left, ref right) => {
155 debug_assert!(info.contains_key("position()"));
156 debug_assert!(info.contains_key("last()"));
157 if right == "" {
158 format!("{} == {}", info.get("position()").unwrap(), info.get(left).unwrap())
159 } else {
160 format!("{} == {} - {}", info.get("position()").unwrap(), info.get(left).unwrap(), right)
161 }
162 },
163 _ => {
164 "true".to_string()
165 }
166 }
167 }
168}
169
170fn escape_info(input:&str) -> IResult<&str, String> {
171 map(
172 many0(alt((
173 value("\\\\".to_string(), char('\\')),
174 value("\\'".to_string(), char('\'')),
175 map(anychar, |c| c.to_string()),
176 ))), |v| v.join("")
177 )(input)
178}
179fn decimal(input:&str) -> IResult<&str, &str> {
180 digit1(input)
181}
182
183fn name(input:&str) -> IResult<&str, &str> {
184 recognize(pair(
185 alt((alpha1, tag("_"), tag(":"))),
186 many0_count(alt((alphanumeric1, tag("_"), tag(":"), tag("-"), tag(".")))),
187 ))(input)
188}
189
190fn separator(input:&str) -> IResult<&str, &str> {
191 alt((
192 tag("//"),
193 tag("/"),
194 ))(input)
195}
196
197fn operator(input:&str) -> IResult<&str, &str> {
198 alt((
199 tag(">="),
200 tag("<="),
201 tag(">"),
202 tag("<"),
203 tag("!="),
204 value("==", tag("=")),
205 ))(input)
206}
207
208fn string(input:&str) -> IResult<&str, &str> {
209 recognize(delimited(
210 tag("'"),
211 many0_count(escaped(none_of("'\\"), '\\', one_of(r#"\'"#))),
212 tag("'"),
213 ))(input)
214}
215
216fn index(input:&str) -> IResult<&str, Predictor> {
217 alt((
218 map(decimal, |t| Predictor::IndexDecimal(t.to_string())),
219 map(tuple((tag("last()"), space0, tag("-"), space0, decimal)), |t| Predictor::IndexExpr(t.0.to_string(), t.4.to_string())),
220 map(tag("last()"), |t:&str| Predictor::IndexExpr(t.to_string(), "".to_string())),
221 ))(input)
222}
223
224fn condition(input:&str) -> IResult<&str, Predictor> {
225 alt((
226 map(tuple((name, space0, operator, space0, string)), |t| Predictor::Condition(t.0.to_string(), Some(t.2.to_string()), Some(t.4.to_string()))),
227 map(tuple((tag("@"), name, space0, operator, space0, string)), |t| Predictor::Condition(format!("@{}", t.1), Some(t.3.to_string()), Some(t.5.to_string()))),
228 map(tuple((tag("text()"), space0, operator, space0, string)), |t| Predictor::Condition(t.0.to_string(), Some(t.2.to_string()), Some(t.4.to_string()))),
229 map(tuple((tag("position()"), space0, operator, space0, decimal)), |t| Predictor::Condition(t.0.to_string(), Some(t.2.to_string()), Some(t.4.to_string()))),
230 map(name, |t| Predictor::Condition(t.to_string(), None, None)),
231 map(pair(tag("@"), name), |t| Predictor::Condition(format!("{}{}", t.0, t.1), None, None)),
232 map(tag("@*"), |t:&str| Predictor::Condition(t.to_string(), None, None)),
233 map(tuple((tag("("), space0, condition, space0, tag(")"))), |t| t.2),
234 map(tuple((tag("("), space0, conditions_and, space0, tag(")"))), |t| t.2),
235 map(tuple((tag("("), space0, conditions_or, space0, tag(")"))), |t| t.2),
236 ))(input)
237}
238
239fn conditions_and(input:&str) -> IResult<&str, Predictor> {
240 alt((
241 map(tuple((condition, space1, tag("and"), space1, condition)), |t| Predictor::And(Box::new(t.0), Box::new(t.4))),
242 condition,
243 ))(input)
244}
245
246fn conditions_or(input:&str) -> IResult<&str, Predictor> {
247 alt((
248 map(tuple((conditions_and, space1, tag("or"), space1, conditions_and)), |t| Predictor::Or(Box::new(t.0), Box::new(t.4))),
249 conditions_and,
250 ))(input)
251}
252
253fn element(input:&str) -> IResult<&str, XPathSegment> {
254 alt((
255 map(tag(".."), |t:&str| XPathSegment {
256 separator: "".to_string(),
257 node: t.to_string(),
258 condition: Predictor::None,
259 }),
260 map(tag("."), |t:&str| XPathSegment {
261 separator: "".to_string(),
262 node: t.to_string(),
263 condition: Predictor::None,
264 }),
265 map(recognize(pair(tag("@"), name)), |t| XPathSegment {
266 separator: "".to_string(),
267 node: "*".to_string(),
268 condition: Predictor::Condition(t.to_string(), None, None),
269 }),
270 map(tuple((name, tag("["), space0, conditions_or, space0, tag("]"))), |t| XPathSegment {
271 separator: "".to_string(),
272 node: t.0.to_string(),
273 condition: t.3,
274 }),
275 map(tuple((name, tag("["), space0, index, space0, tag("]"))), |t| XPathSegment {
276 separator: "".to_string(),
277 node: t.0.to_string(),
278 condition: t.3,
279 }),
280 map(tuple((tag("*["), space0, conditions_or, space0, tag("]"))), |t| XPathSegment {
281 separator: "".to_string(),
282 node: "*".to_string(),
283 condition: t.2,
284 }),
285 map(tuple((tag("*["), space0, index, space0, tag("]"))), |t| XPathSegment {
286 separator: "".to_string(),
287 node: "*".to_string(),
288 condition: t.2,
289 }),
290 map(tag("*"), |t:&str| XPathSegment {
291 separator: "".to_string(),
292 node: t.to_string(),
293 condition: Predictor::None,
294 }),
295 map(name, |t| XPathSegment {
296 separator: "".to_string(),
297 node: t.to_string(),
298 condition: Predictor::None,
299 }),
300 ))(input)
301}
302
303#[allow(dead_code)]
304pub fn xpath(input:&str) -> IResult<&str, Vec<XPathSegment>> {
305 let (remaining, initial) = opt(element)(input)?;
306 let mut segments = Vec::new();
307 if let Some(data) = initial {
308 segments.push(data);
309 }
310 let (remaining, parts) = many0(map(pair(separator, element), |mut t| {
311 t.1.separator = t.0.to_string();
312 t.1
313 }))(remaining)?;
314 segments.extend(parts);
315 Ok((remaining, segments))
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321 #[test]
322 fn test_decimal() {
323 assert_eq!(decimal("1234a"), Ok(("a", "1234")));
324 }
325 #[test]
326 fn test_name() {
327 assert_eq!(name("h:a12 u"), Ok((" u", "h:a12")));
328 assert_eq!(name("_a12[u"), Ok(("[u", "_a12")));
329 assert!(name("1a12 u").is_err());
330 }
331 #[test]
332 fn test_separator() {
333 assert_eq!(separator("/a"), Ok(("a", "/")));
334 assert_eq!(separator("//a"), Ok(("a", "//")));
335 assert_eq!(separator("///a"), Ok(("/a", "//")));
336 }
337 #[test]
338 fn test_operator() {
339 assert_eq!(operator(">=a"), Ok(("a", ">=")));
340 assert_eq!(operator("<=a"), Ok(("a", "<=")));
341 assert_eq!(operator(">a"), Ok(("a", ">")));
342 assert_eq!(operator("<a"), Ok(("a", "<")));
343 assert_eq!(operator("!=a"), Ok(("a", "!=")));
344 assert_eq!(operator("=a"), Ok(("a", "==")));
345 }
346 #[test]
347 fn test_string() {
348 assert_eq!(string("'ab''"), Ok(("'", "'ab'")));
349 assert_eq!(string(r"'ab\'''"), Ok(("'", r"'ab\''")));
350 }
351 #[test]
352 fn test_index() {
353 assert_eq!(index("2a"), Ok(("a", Predictor::IndexDecimal("2".to_string()))));
354 assert_eq!(index("last()a"), Ok(("a", Predictor::IndexExpr("last()".to_string(), "".to_string()))));
355 assert_eq!(index("last()- 2a"), Ok(("a", Predictor::IndexExpr("last()".to_string(), "2".to_string()))));
356 }
357 #[test]
358 fn test_condition() {
359 assert_eq!(condition("child_node"), Ok(("", Predictor::Condition("child_node".to_string(), None, None))));
360 assert_eq!(condition("child_node= 'aa'"), Ok(("", Predictor::Condition("child_node".to_string(), Some("==".to_string()), Some("'aa'".to_string())))));
361 assert_eq!(condition("@*a"), Ok(("a", Predictor::Condition("@*".to_string(), None, None))));
362 assert_eq!(condition("@attr"), Ok(("", Predictor::Condition("@attr".to_string(), None, None))));
363 assert_eq!(condition("@attr = 'aa'"), Ok(("", Predictor::Condition("@attr".to_string(), Some("==".to_string()), Some("'aa'".to_string())))));
364 assert_eq!(condition("text()!= 'aa'"), Ok(("", Predictor::Condition("text()".to_string(), Some("!=".to_string()), Some("'aa'".to_string())))));
365 assert_eq!(condition("position()>= 7a"), Ok(("a", Predictor::Condition("position()".to_string(), Some(">=".to_string()), Some("7".to_string())))));
366 assert_eq!(condition("(position()>= 7 )a"), Ok(("a", Predictor::Condition("position()".to_string(), Some(">=".to_string()), Some("7".to_string())))));
367 }
368 #[test]
369 fn test_conditions_or() {
370 assert_eq!(conditions_or("@attr = 'aa'"), Ok(("", Predictor::Condition("@attr".to_string(), Some("==".to_string()), Some("'aa'".to_string())))));
371 assert_eq!(conditions_or("text()!= 'aa'"), Ok(("", Predictor::Condition("text()".to_string(), Some("!=".to_string()), Some("'aa'".to_string())))));
372 assert_eq!(conditions_or("child_node and @attr)"), Ok((")", Predictor::And(
373 Box::new(Predictor::Condition("child_node".to_string(), None, None)),
374 Box::new(Predictor::Condition("@attr".to_string(), None, None)),
375 ))));
376 assert_eq!(conditions_or("text()='aa' or child_node and @attr)"), Ok((")", Predictor::Or(
377 Box::new(Predictor::Condition("text()".to_string(), Some("==".to_string()), Some("'aa'".to_string()))),
378 Box::new(Predictor::And(
379 Box::new(Predictor::Condition("child_node".to_string(), None, None)),
380 Box::new(Predictor::Condition("@attr".to_string(), None, None)),
381 )),
382 ))));
383 }
384 #[test]
385 fn test_xpath() {
386 assert_eq!(xpath("@id"), Ok(("", vec![
387 XPathSegment {
388 separator:"".to_string(),
389 node:"*".to_string(),
390 condition:Predictor::Condition("@id".to_string(), None, None)
391 },
392 ])));
393 assert_eq!(xpath("//NODE[@oid and @attrcatref='abc']"), Ok(("", vec![
394 XPathSegment {
395 separator:"//".to_string(),
396 node:"NODE".to_string(),
397 condition:Predictor::And(
398 Box::new(Predictor::Condition("@oid".to_string(), None, None)),
399 Box::new(Predictor::Condition("@attrcatref".to_string(), Some("==".to_string()), Some("'abc'".to_string()))),
400 )
401 },
402 ])));
403 assert_eq!(xpath(".//NAME/TUV"), Ok(("", vec![
404 XPathSegment {
405 separator:"".to_string(),
406 node:".".to_string(),
407 condition:Predictor::None
408 },
409 XPathSegment {
410 separator:"//".to_string(),
411 node:"NAME".to_string(),
412 condition:Predictor::None
413 },
414 XPathSegment {
415 separator:"/".to_string(),
416 node:"TUV".to_string(),
417 condition:Predictor::None
418 },
419 ])));
420 }
421 #[test]
422 fn test_predictor_expr() {
423 let (remaining, segs) = xpath(".//NAME[text()='aa' and (@id='bb' or @gid)]").unwrap();
424 assert_eq!(remaining, "");
425 assert_eq!(segs.len(), 2);
426 assert_eq!(segs[1].condition.collect(), (
427 Vec::new(),
428 vec!["gid".to_string(), "id".to_string()],
429 vec!["text()".to_string(),],
430 ));
431 let mut info = HashMap::new();
432 info.insert("text()".to_string(), "aaa".to_string());
433 info.insert("@id".to_string(), "123".to_string());
434 assert_eq!(segs[1].condition.expr(&info), "('aaa' == 'aa') && (('123' == 'bb') || (false))")
435 }
436 #[test]
437 fn test_escape_info() {
438 assert_eq!(escape_info("ab'c"), Ok(("", "ab\\'c".to_string())));
439 assert_eq!(escape_info("ab\\c"), Ok(("", "ab\\\\c".to_string())));
440 }
441}