pact_models/
xml_utils.rs

1//! Collection of utilities for working with XML
2
3use std::collections::{BTreeMap, HashMap};
4use std::ops::Index;
5use std::str;
6use anyhow::anyhow;
7use indextree::{Arena, NodeId};
8use itertools::Itertools;
9use kiss_xml::dom::{Element, Node};
10use lazy_static::lazy_static;
11use sxd_document::{Package, parser};
12use tracing::trace;
13
14use crate::path_exp::{DocPath, PathToken};
15
16/// Parses a vector of bytes into a XML document
17pub fn parse_bytes(bytes: &[u8]) -> anyhow::Result<Package> {
18  let string = str::from_utf8(bytes)?;
19  match parser::parse(string) {
20    Ok(doc) => Ok(doc),
21    Err(err) => Err(anyhow!("Failed to parse bytes as XML - {}", err))
22  }
23}
24
25/// Resolve the path expression against the XML, returning a list of pointer values that match.
26pub fn resolve_path(value: &Element, expression: &DocPath) -> Vec<String> {
27  let mut tree = Arena::new();
28  let root = tree.new_node("".into());
29
30  let tokens = expression.tokens();
31  query_graph(tokens.as_slice(), &mut tree, root, value, 0);
32
33  let tokens = expression.tokens().iter()
34    .filter(|t| match t {
35      PathToken::Index(_) => false,
36      _ => true
37    }).collect_vec();
38  let expanded_paths = root.descendants(&tree)
39    .fold(Vec::<String>::new(), |mut acc, node_id| {
40    let node = tree.index(node_id);
41    if !node.get().is_empty() && node.first_child().is_none() {
42      let path: Vec<String> = node_id.ancestors(&tree)
43        .map(|n| format!("{}", tree.index(n).get()))
44        .collect();
45      if path.len() == tokens.len() {
46        acc.push(path.iter().rev().join("/"));
47      }
48    }
49    acc
50  });
51  expanded_paths
52}
53
54fn query_graph(
55  path_iter: &[PathToken],
56  tree: &mut Arena<String>,
57  parent_id: NodeId,
58  element: &Element,
59  index: usize
60) {
61  trace!(?path_iter, %parent_id, index, %element, ">>> query_graph");
62
63  if let Some(token) = path_iter.first() {
64    trace!(?token, "next token");
65    match token {
66      PathToken::Field(name) => {
67        let matches = if element.name() == name.as_str() {
68          trace!(name, %parent_id, "Field name matches element");
69          Some(parent_id.append_value(format!("{}[{}]", name, index), tree))
70        } else {
71          if let Some(ns) = element.namespace() {
72            let name_with_ns = format!("{}:{}", ns, element.name());
73            if name_with_ns == name.as_str() {
74              trace!(name, %parent_id, "Field name matches element including namespace");
75              Some(parent_id.append_value(format!("{}[{}]", name_with_ns, index), tree))
76            } else {
77              None
78            }
79          } else {
80            None
81          }
82        };
83
84        if let Some(node_id) = matches {
85          let remaining_tokens = &path_iter[1..];
86          if !remaining_tokens.is_empty() {
87            query_attributes(remaining_tokens, tree, node_id, element, index);
88            query_text(remaining_tokens, tree, node_id, element, index);
89
90            if let Some(PathToken::Index(_)) = remaining_tokens.first() {
91              query_graph(remaining_tokens, tree, node_id, element, index);
92            } else {
93              let grouped_children = group_children(element);
94              for children in grouped_children.values() {
95                for (index, child) in children.iter().enumerate() {
96                  query_graph(remaining_tokens, tree, node_id, *child, index);
97                }
98              }
99            }
100          }
101        }
102      },
103      PathToken::Index(i) => {
104        if *i == index {
105          trace!(index, i, %parent_id, "Index matches element index");
106          let remaining_tokens = &path_iter[1..];
107          if !remaining_tokens.is_empty() {
108            query_attributes(remaining_tokens, tree, parent_id, element, index);
109            query_text(remaining_tokens, tree, parent_id, element, index);
110
111            let grouped_children = group_children(element);
112            for (_, children) in grouped_children {
113              for (index, child) in children.iter().enumerate() {
114                query_graph(remaining_tokens, tree, parent_id, *child, index);
115              }
116            }
117          }
118        } else {
119          trace!(index, i, %parent_id, "Index does not match element index, removing");
120          parent_id.detach(tree);
121        }
122      }
123      PathToken::Star | PathToken::StarIndex => {
124        trace!(%parent_id, name = element.name(), "* -> Adding current node to parent");
125        let node_id = parent_id.append_value(format!("{}[{}]", element.name(), index), tree);
126
127        let remaining_tokens = &path_iter[1..];
128        if !remaining_tokens.is_empty() {
129          query_attributes(remaining_tokens, tree, node_id, element, index);
130          query_text(remaining_tokens, tree, node_id, element, index);
131
132          let grouped_children = group_children(element);
133          for (_, children) in grouped_children {
134            for (index, child) in children.iter().enumerate() {
135              query_graph(remaining_tokens, tree, node_id, *child, index);
136            }
137          }
138        }
139      },
140      PathToken::Root => {
141        query_graph(&path_iter[1..], tree, parent_id, element, index);
142      }
143    }
144  }
145}
146
147/// Groups all the child element by name
148pub fn group_children(element: &Element) -> BTreeMap<String, Vec<&Element>> {
149  element.child_elements()
150    .fold(BTreeMap::new(), |mut acc, child| {
151      acc.entry(child.name())
152        .and_modify(|entry: &mut Vec<_>| entry.push(child))
153        .or_insert_with(|| vec![child]);
154      acc
155    })
156}
157
158fn query_attributes(
159  path_iter: &[PathToken],
160  tree: &mut Arena<String>,
161  parent_id: NodeId,
162  element: &Element,
163  index: usize
164) {
165  trace!(?path_iter, %parent_id, index, %element, ">>> query_attributes");
166
167  if let Some(token) = path_iter.first() {
168    trace!(?token, "next token");
169    if let PathToken::Field(name) = token {
170      if name.starts_with('@') {
171        let attribute_name = &name[1..];
172        let attributes = resolve_namespaces(element.attributes());
173        if attributes.contains_key(attribute_name) {
174          trace!(name, "Field name matches element attribute");
175          parent_id.append_value(name.clone(), tree);
176        }
177      }
178    }
179  }
180}
181
182fn resolve_namespaces(attributes: &HashMap<String, String>) -> HashMap<String, String> {
183  let namespaces: HashMap<_, _> = attributes.iter()
184    .filter_map(|(key, value)| if key.starts_with("xmlns:") {
185      Some((key.strip_prefix("xmlns:").unwrap(), value.as_str()))
186    } else {
187      None
188    }).collect();
189  if namespaces.is_empty() {
190    attributes.clone()
191  } else {
192    attributes.iter()
193      .flat_map(|(k, v)| {
194        if let Some((ns, attr)) = k.split_once(':') {
195          if let Some(name) = namespaces.get(ns) {
196            vec![(k.clone(), v.clone()), (format!("{}:{}", *name, attr), v.clone())]
197          } else {
198            vec![(k.clone(), v.clone())]
199          }
200        } else {
201          vec![(k.clone(), v.clone())]
202        }
203      }).collect()
204  }
205}
206
207fn query_text(
208  path_iter: &[PathToken],
209  tree: &mut Arena<String>,
210  parent_id: NodeId,
211  element: &Element,
212  index: usize
213) {
214  trace!(?path_iter, %parent_id, index, %element, ">>> query_text");
215
216  if let Some(token) = path_iter.first() {
217    trace!(?token, "next token");
218    if let PathToken::Field(name) = token {
219      let text_nodes = text_nodes(element);
220      if name == "#text" && !text_nodes.is_empty() {
221        trace!(name, "Field name matches element text");
222        parent_id.append_value(name.clone(), tree);
223      }
224    }
225  }
226}
227
228/// Return all the content of the element text nodes
229pub fn text_nodes(element: &Element) -> Vec<String> {
230  element.children()
231    .filter_map(|child| if let Ok(text) = child.as_text() {
232      if text.content.is_empty() {
233        None
234      } else {
235        Some(text.content.clone())
236      }
237    } else {
238      None
239    })
240    .collect_vec()
241}
242
243lazy_static!{
244   static ref PATH_RE: regex::Regex = regex::Regex::new(r#"(\w+)\[(\d+)]"#).unwrap();
245}
246
247/// Enum to box the result value from resolve_matching_node
248#[derive(Debug, Clone, PartialOrd, PartialEq)]
249pub enum XmlResult {
250  /// Matched XML element
251  ElementNode(Element),
252  /// Matched XML text
253  TextNode(String),
254  /// Matches an attribute
255  Attribute(String, String)
256}
257
258/// Returns the matching node from the XML for the given path.
259pub fn resolve_matching_node(element: &Element, path: &str) -> Option<XmlResult> {
260  trace!(path, %element, ">>> resolve_matching_node");
261  let paths = path.split("/")
262    .filter(|s| !s.is_empty())
263    .collect_vec();
264  if let Some(first_part) = paths.first() {
265    if let Some(captures) = PATH_RE.captures(first_part) {
266      let name = &captures[1];
267      let index: usize = (&captures[2]).parse().unwrap_or_default();
268      if index == 0 && name == element.name() {
269        if paths.len() > 1 {
270          match_next(element, &paths[1..])
271        } else {
272          Some(XmlResult::ElementNode(element.clone()))
273        }
274      } else {
275        None
276      }
277    } else {
278      None
279    }
280  } else {
281    None
282  }
283}
284
285fn match_next(element: &Element, paths: &[&str]) -> Option<XmlResult> {
286  trace!(?paths, %element, ">>> match_next");
287  if let Some(first_part) = paths.first() {
288    if first_part.starts_with('@') {
289      element.attributes().get(&first_part[1..])
290        .map(|value| XmlResult::Attribute(first_part[1..].to_string(), value.clone()))
291    } else if *first_part == "#text" {
292      let text = element.text();
293      if text.is_empty() {
294        None
295      } else {
296        Some(XmlResult::TextNode(text))
297      }
298    } else if let Some(captures) = PATH_RE.captures(first_part) {
299      let name = &captures[1];
300      let index: usize = (&captures[2]).parse().unwrap_or_default();
301      let grouped_children = group_children(element);
302      let child = grouped_children.get(name)
303        .map(|values| values.get(index))
304        .flatten()
305        .map(|value| *value);
306      if let Some(child) = child {
307        if paths.len() > 1 {
308          match_next(child, &paths[1..])
309        } else {
310          Some(XmlResult::ElementNode(child.clone()))
311        }
312      } else {
313        None
314      }
315    } else {
316      None
317    }
318  } else {
319    None
320  }
321}
322
323#[cfg(test)]
324mod tests {
325  use expectest::prelude::*;
326  use maplit::hashmap;
327
328  use crate::path_exp::DocPath;
329
330  use super::*;
331
332  #[test_log::test]
333  fn resolve_path_test() {
334    let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
335      <config>
336        <name>My Settings</name>
337        <sound>
338          <property name="volume" value="11" />
339          <property name="mixer" value="standard" />
340        </sound>
341      </config>
342      "#;
343    let dom = kiss_xml::parse_str(xml).unwrap();
344    let root = dom.root_element();
345
346    let path = DocPath::root();
347    expect!(resolve_path(root, &path).is_empty()).to(be_true());
348
349    let path = DocPath::new_unwrap("$.config");
350    expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/config[0]"]));
351
352    let path = DocPath::new_unwrap("$.config.sound");
353    expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/config[0]/sound[0]"]));
354
355    let path = DocPath::new_unwrap("$.config.sound.property");
356    expect!(resolve_path(root, &path)).to(be_equal_to(vec![
357      "/config[0]/sound[0]/property[0]",
358      "/config[0]/sound[0]/property[1]"
359    ]));
360
361    let path = DocPath::new_unwrap("$.config.sound[0].property[0]");
362    expect!(resolve_path(root, &path)).to(be_equal_to(vec![
363      "/config[0]/sound[0]/property[0]"
364    ]));
365
366    let path = DocPath::new_unwrap("$.config.*");
367    expect!(resolve_path(root, &path)).to(be_equal_to(vec![
368      "/config[0]/name[0]",
369      "/config[0]/sound[0]"
370    ]));
371
372    let path = DocPath::new_unwrap("$.config[*]");
373    expect!(resolve_path(root, &path)).to(be_equal_to(vec![
374      "/config[0]/name[0]",
375      "/config[0]/sound[0]"
376    ]));
377
378    let path = DocPath::new_unwrap("$.config.sound.property.@name");
379    expect!(resolve_path(root, &path)).to(be_equal_to(vec![
380      "/config[0]/sound[0]/property[0]/@name",
381      "/config[0]/sound[0]/property[1]/@name"
382    ]));
383
384    let path = DocPath::new_unwrap("$.config.sound.property.@other");
385    expect!(resolve_path(root, &path).is_empty()).to(be_true());
386
387    let path = DocPath::new_unwrap("$.config.sound.*.@name");
388    expect!(resolve_path(root, &path)).to(be_equal_to(vec![
389      "/config[0]/sound[0]/property[0]/@name",
390      "/config[0]/sound[0]/property[1]/@name"
391    ]));
392
393    let path = DocPath::new_unwrap("$.config.name.#text");
394    expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/config[0]/name[0]/#text"]));
395
396    let path = DocPath::new_unwrap("$.config.*.#text");
397    expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/config[0]/name[0]/#text"]));
398
399    let path = DocPath::new_unwrap("$.config.sound.property.#text");
400    expect!(resolve_path(root, &path).is_empty()).to(be_true());
401
402    let path = DocPath::new_unwrap("$.config.sound.property[1].@name");
403    expect!(resolve_path(root, &path)).to(be_equal_to(vec![
404      "/config[0]/sound[0]/property[1]/@name"
405    ]));
406
407    let path = DocPath::new_unwrap("$.config.sound.property[2].@name");
408    expect!(resolve_path(root, &path).is_empty()).to(be_true());
409  }
410
411  #[test_log::test]
412  fn resolve_path_with_xml_namespaces_test() {
413    let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
414      <a:alligator xmlns:a="urn:alligators" xmlns:n="urn:names" n:name="Mary">
415        <a:favouriteNumbers>
416          <favouriteNumber xmlns="urn:favourite:numbers">1</favouriteNumber>
417        </a:favouriteNumbers>
418      </a:alligator>
419      "#;
420    let dom = kiss_xml::parse_str(xml).unwrap();
421    let root = dom.root_element();
422
423    let path = DocPath::root();
424    expect!(resolve_path(root, &path).is_empty()).to(be_true());
425
426    let path = DocPath::new_unwrap("$.alligator");
427    expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/alligator[0]"]));
428
429    let path = DocPath::new_unwrap("$['urn:alligators:alligator']");
430    expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/urn:alligators:alligator[0]"]));
431
432    let path = DocPath::new_unwrap("$['urn:alligators:alligator']['@n:name']");
433    expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/urn:alligators:alligator[0]/@n:name"]));
434
435    let path = DocPath::new_unwrap("$['urn:alligators:alligator']['@urn:names:name']");
436    expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/urn:alligators:alligator[0]/@urn:names:name"]));
437  }
438
439  #[test_log::test]
440  fn resolve_matching_node_test() {
441    let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
442      <config>
443        <name>My Settings</name>
444        <sound>
445          <property name="volume" value="11" />
446          <property name="mixer" value="standard" />
447        </sound>
448      </config>
449      "#;
450    let dom = kiss_xml::parse_str(xml).unwrap();
451    let root = dom.root_element();
452
453    expect!(resolve_matching_node(root, "/config[0]")).to(be_some()
454      .value(XmlResult::ElementNode(root.clone())));
455    expect!(resolve_matching_node(root, "/config[1]")).to(be_none());
456
457    let sound = root.elements_by_name("sound").next().unwrap().clone();
458    expect!(resolve_matching_node(root, "/config[0]/sound[0]")).to(be_some()
459      .value(XmlResult::ElementNode(sound.clone())));
460    expect!(resolve_matching_node(root, "/config[0]/sound[1]")).to(be_none());
461
462    let properties = sound.elements_by_name("property").cloned().collect_vec();
463    expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[0]")).to(be_some()
464      .value(XmlResult::ElementNode(properties[0].clone())));
465    expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[1]")).to(be_some()
466      .value(XmlResult::ElementNode(properties[1].clone())));
467
468    expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[0]/@name")).to(be_some()
469      .value(XmlResult::Attribute("name".to_string(), "volume".to_string())));
470    expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[1]/@name")).to(be_some()
471      .value(XmlResult::Attribute("name".to_string(), "mixer".to_string())));
472    expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[1]/@other")).to(be_none());
473
474    expect!(resolve_matching_node(root, "/config[0]/name[0]/#text")).to(be_some()
475      .value(XmlResult::TextNode("My Settings".to_string())));
476    expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[0]/#text")).to(be_none());
477  }
478
479  #[test_log::test]
480  fn resolve_namespaces_test() {
481    expect!(resolve_namespaces(&hashmap!{})).to(be_equal_to(hashmap!{}));
482
483    let attributes = hashmap!{
484      "a".to_string() => "b".to_string(),
485      "c".to_string() => "d".to_string()
486    };
487    expect!(resolve_namespaces(&attributes)).to(be_equal_to(attributes));
488
489    let attributes = hashmap!{
490      "n:name".to_string() => "Mary".to_string(),
491      "xmlns:a".to_string() => "urn:alligators".to_string(),
492      "xmlns:n".to_string() => "urn:names".to_string()
493    };
494    expect!(resolve_namespaces(&attributes)).to(be_equal_to(hashmap!{
495      "n:name".to_string() => "Mary".to_string(),
496      "urn:names:name".to_string() => "Mary".to_string(),
497      "xmlns:a".to_string() => "urn:alligators".to_string(),
498      "xmlns:n".to_string() => "urn:names".to_string()
499    }));
500  }
501}