1use std::collections::{BTreeMap, HashMap};
4use std::ops::Index;
5use std::str;
6use anyhow::anyhow;
7use indextree::{Arena, NodeId};
8use itertools::Itertools;
9use kiss_xml::dom::{Element, Node};
10use lazy_static::lazy_static;
11use sxd_document::{Package, parser};
12use tracing::trace;
13
14use crate::path_exp::{DocPath, PathToken};
15
16pub fn parse_bytes(bytes: &[u8]) -> anyhow::Result<Package> {
18 let string = str::from_utf8(bytes)?;
19 match parser::parse(string) {
20 Ok(doc) => Ok(doc),
21 Err(err) => Err(anyhow!("Failed to parse bytes as XML - {}", err))
22 }
23}
24
25pub fn resolve_path(value: &Element, expression: &DocPath) -> Vec<String> {
27 let mut tree = Arena::new();
28 let root = tree.new_node("".into());
29
30 let tokens = expression.tokens();
31 query_graph(tokens.as_slice(), &mut tree, root, value, 0);
32
33 let tokens = expression.tokens().iter()
34 .filter(|t| match t {
35 PathToken::Index(_) => false,
36 _ => true
37 }).collect_vec();
38 let expanded_paths = root.descendants(&tree)
39 .fold(Vec::<String>::new(), |mut acc, node_id| {
40 let node = tree.index(node_id);
41 if !node.get().is_empty() && node.first_child().is_none() {
42 let path: Vec<String> = node_id.ancestors(&tree)
43 .map(|n| format!("{}", tree.index(n).get()))
44 .collect();
45 if path.len() == tokens.len() {
46 acc.push(path.iter().rev().join("/"));
47 }
48 }
49 acc
50 });
51 expanded_paths
52}
53
54fn query_graph(
55 path_iter: &[PathToken],
56 tree: &mut Arena<String>,
57 parent_id: NodeId,
58 element: &Element,
59 index: usize
60) {
61 trace!(?path_iter, %parent_id, index, %element, ">>> query_graph");
62
63 if let Some(token) = path_iter.first() {
64 trace!(?token, "next token");
65 match token {
66 PathToken::Field(name) => {
67 let matches = if element.name() == name.as_str() {
68 trace!(name, %parent_id, "Field name matches element");
69 Some(parent_id.append_value(format!("{}[{}]", name, index), tree))
70 } else {
71 if let Some(ns) = element.namespace() {
72 let name_with_ns = format!("{}:{}", ns, element.name());
73 if name_with_ns == name.as_str() {
74 trace!(name, %parent_id, "Field name matches element including namespace");
75 Some(parent_id.append_value(format!("{}[{}]", name_with_ns, index), tree))
76 } else {
77 None
78 }
79 } else {
80 None
81 }
82 };
83
84 if let Some(node_id) = matches {
85 let remaining_tokens = &path_iter[1..];
86 if !remaining_tokens.is_empty() {
87 query_attributes(remaining_tokens, tree, node_id, element, index);
88 query_text(remaining_tokens, tree, node_id, element, index);
89
90 if let Some(PathToken::Index(_)) = remaining_tokens.first() {
91 query_graph(remaining_tokens, tree, node_id, element, index);
92 } else {
93 let grouped_children = group_children(element);
94 for children in grouped_children.values() {
95 for (index, child) in children.iter().enumerate() {
96 query_graph(remaining_tokens, tree, node_id, *child, index);
97 }
98 }
99 }
100 }
101 }
102 },
103 PathToken::Index(i) => {
104 if *i == index {
105 trace!(index, i, %parent_id, "Index matches element index");
106 let remaining_tokens = &path_iter[1..];
107 if !remaining_tokens.is_empty() {
108 query_attributes(remaining_tokens, tree, parent_id, element, index);
109 query_text(remaining_tokens, tree, parent_id, element, index);
110
111 let grouped_children = group_children(element);
112 for (_, children) in grouped_children {
113 for (index, child) in children.iter().enumerate() {
114 query_graph(remaining_tokens, tree, parent_id, *child, index);
115 }
116 }
117 }
118 } else {
119 trace!(index, i, %parent_id, "Index does not match element index, removing");
120 parent_id.detach(tree);
121 }
122 }
123 PathToken::Star | PathToken::StarIndex => {
124 trace!(%parent_id, name = element.name(), "* -> Adding current node to parent");
125 let node_id = parent_id.append_value(format!("{}[{}]", element.name(), index), tree);
126
127 let remaining_tokens = &path_iter[1..];
128 if !remaining_tokens.is_empty() {
129 query_attributes(remaining_tokens, tree, node_id, element, index);
130 query_text(remaining_tokens, tree, node_id, element, index);
131
132 let grouped_children = group_children(element);
133 for (_, children) in grouped_children {
134 for (index, child) in children.iter().enumerate() {
135 query_graph(remaining_tokens, tree, node_id, *child, index);
136 }
137 }
138 }
139 },
140 PathToken::Root => {
141 query_graph(&path_iter[1..], tree, parent_id, element, index);
142 }
143 }
144 }
145}
146
147pub fn group_children(element: &Element) -> BTreeMap<String, Vec<&Element>> {
149 element.child_elements()
150 .fold(BTreeMap::new(), |mut acc, child| {
151 acc.entry(child.name())
152 .and_modify(|entry: &mut Vec<_>| entry.push(child))
153 .or_insert_with(|| vec![child]);
154 acc
155 })
156}
157
158fn query_attributes(
159 path_iter: &[PathToken],
160 tree: &mut Arena<String>,
161 parent_id: NodeId,
162 element: &Element,
163 index: usize
164) {
165 trace!(?path_iter, %parent_id, index, %element, ">>> query_attributes");
166
167 if let Some(token) = path_iter.first() {
168 trace!(?token, "next token");
169 if let PathToken::Field(name) = token {
170 if name.starts_with('@') {
171 let attribute_name = &name[1..];
172 let attributes = resolve_namespaces(element.attributes());
173 if attributes.contains_key(attribute_name) {
174 trace!(name, "Field name matches element attribute");
175 parent_id.append_value(name.clone(), tree);
176 }
177 }
178 }
179 }
180}
181
182fn resolve_namespaces(attributes: &HashMap<String, String>) -> HashMap<String, String> {
183 let namespaces: HashMap<_, _> = attributes.iter()
184 .filter_map(|(key, value)| if key.starts_with("xmlns:") {
185 Some((key.strip_prefix("xmlns:").unwrap(), value.as_str()))
186 } else {
187 None
188 }).collect();
189 if namespaces.is_empty() {
190 attributes.clone()
191 } else {
192 attributes.iter()
193 .flat_map(|(k, v)| {
194 if let Some((ns, attr)) = k.split_once(':') {
195 if let Some(name) = namespaces.get(ns) {
196 vec![(k.clone(), v.clone()), (format!("{}:{}", *name, attr), v.clone())]
197 } else {
198 vec![(k.clone(), v.clone())]
199 }
200 } else {
201 vec![(k.clone(), v.clone())]
202 }
203 }).collect()
204 }
205}
206
207fn query_text(
208 path_iter: &[PathToken],
209 tree: &mut Arena<String>,
210 parent_id: NodeId,
211 element: &Element,
212 index: usize
213) {
214 trace!(?path_iter, %parent_id, index, %element, ">>> query_text");
215
216 if let Some(token) = path_iter.first() {
217 trace!(?token, "next token");
218 if let PathToken::Field(name) = token {
219 let text_nodes = text_nodes(element);
220 if name == "#text" && !text_nodes.is_empty() {
221 trace!(name, "Field name matches element text");
222 parent_id.append_value(name.clone(), tree);
223 }
224 }
225 }
226}
227
228pub fn text_nodes(element: &Element) -> Vec<String> {
230 element.children()
231 .filter_map(|child| if let Ok(text) = child.as_text() {
232 if text.content.is_empty() {
233 None
234 } else {
235 Some(text.content.clone())
236 }
237 } else {
238 None
239 })
240 .collect_vec()
241}
242
243lazy_static!{
244 static ref PATH_RE: regex::Regex = regex::Regex::new(r#"(\w+)\[(\d+)]"#).unwrap();
245}
246
247#[derive(Debug, Clone, PartialOrd, PartialEq)]
249pub enum XmlResult {
250 ElementNode(Element),
252 TextNode(String),
254 Attribute(String, String)
256}
257
258pub fn resolve_matching_node(element: &Element, path: &str) -> Option<XmlResult> {
260 trace!(path, %element, ">>> resolve_matching_node");
261 let paths = path.split("/")
262 .filter(|s| !s.is_empty())
263 .collect_vec();
264 if let Some(first_part) = paths.first() {
265 if let Some(captures) = PATH_RE.captures(first_part) {
266 let name = &captures[1];
267 let index: usize = (&captures[2]).parse().unwrap_or_default();
268 if index == 0 && name == element.name() {
269 if paths.len() > 1 {
270 match_next(element, &paths[1..])
271 } else {
272 Some(XmlResult::ElementNode(element.clone()))
273 }
274 } else {
275 None
276 }
277 } else {
278 None
279 }
280 } else {
281 None
282 }
283}
284
285fn match_next(element: &Element, paths: &[&str]) -> Option<XmlResult> {
286 trace!(?paths, %element, ">>> match_next");
287 if let Some(first_part) = paths.first() {
288 if first_part.starts_with('@') {
289 element.attributes().get(&first_part[1..])
290 .map(|value| XmlResult::Attribute(first_part[1..].to_string(), value.clone()))
291 } else if *first_part == "#text" {
292 let text = element.text();
293 if text.is_empty() {
294 None
295 } else {
296 Some(XmlResult::TextNode(text))
297 }
298 } else if let Some(captures) = PATH_RE.captures(first_part) {
299 let name = &captures[1];
300 let index: usize = (&captures[2]).parse().unwrap_or_default();
301 let grouped_children = group_children(element);
302 let child = grouped_children.get(name)
303 .map(|values| values.get(index))
304 .flatten()
305 .map(|value| *value);
306 if let Some(child) = child {
307 if paths.len() > 1 {
308 match_next(child, &paths[1..])
309 } else {
310 Some(XmlResult::ElementNode(child.clone()))
311 }
312 } else {
313 None
314 }
315 } else {
316 None
317 }
318 } else {
319 None
320 }
321}
322
323#[cfg(test)]
324mod tests {
325 use expectest::prelude::*;
326 use maplit::hashmap;
327
328 use crate::path_exp::DocPath;
329
330 use super::*;
331
332 #[test_log::test]
333 fn resolve_path_test() {
334 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
335 <config>
336 <name>My Settings</name>
337 <sound>
338 <property name="volume" value="11" />
339 <property name="mixer" value="standard" />
340 </sound>
341 </config>
342 "#;
343 let dom = kiss_xml::parse_str(xml).unwrap();
344 let root = dom.root_element();
345
346 let path = DocPath::root();
347 expect!(resolve_path(root, &path).is_empty()).to(be_true());
348
349 let path = DocPath::new_unwrap("$.config");
350 expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/config[0]"]));
351
352 let path = DocPath::new_unwrap("$.config.sound");
353 expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/config[0]/sound[0]"]));
354
355 let path = DocPath::new_unwrap("$.config.sound.property");
356 expect!(resolve_path(root, &path)).to(be_equal_to(vec![
357 "/config[0]/sound[0]/property[0]",
358 "/config[0]/sound[0]/property[1]"
359 ]));
360
361 let path = DocPath::new_unwrap("$.config.sound[0].property[0]");
362 expect!(resolve_path(root, &path)).to(be_equal_to(vec![
363 "/config[0]/sound[0]/property[0]"
364 ]));
365
366 let path = DocPath::new_unwrap("$.config.*");
367 expect!(resolve_path(root, &path)).to(be_equal_to(vec![
368 "/config[0]/name[0]",
369 "/config[0]/sound[0]"
370 ]));
371
372 let path = DocPath::new_unwrap("$.config[*]");
373 expect!(resolve_path(root, &path)).to(be_equal_to(vec![
374 "/config[0]/name[0]",
375 "/config[0]/sound[0]"
376 ]));
377
378 let path = DocPath::new_unwrap("$.config.sound.property.@name");
379 expect!(resolve_path(root, &path)).to(be_equal_to(vec![
380 "/config[0]/sound[0]/property[0]/@name",
381 "/config[0]/sound[0]/property[1]/@name"
382 ]));
383
384 let path = DocPath::new_unwrap("$.config.sound.property.@other");
385 expect!(resolve_path(root, &path).is_empty()).to(be_true());
386
387 let path = DocPath::new_unwrap("$.config.sound.*.@name");
388 expect!(resolve_path(root, &path)).to(be_equal_to(vec![
389 "/config[0]/sound[0]/property[0]/@name",
390 "/config[0]/sound[0]/property[1]/@name"
391 ]));
392
393 let path = DocPath::new_unwrap("$.config.name.#text");
394 expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/config[0]/name[0]/#text"]));
395
396 let path = DocPath::new_unwrap("$.config.*.#text");
397 expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/config[0]/name[0]/#text"]));
398
399 let path = DocPath::new_unwrap("$.config.sound.property.#text");
400 expect!(resolve_path(root, &path).is_empty()).to(be_true());
401
402 let path = DocPath::new_unwrap("$.config.sound.property[1].@name");
403 expect!(resolve_path(root, &path)).to(be_equal_to(vec![
404 "/config[0]/sound[0]/property[1]/@name"
405 ]));
406
407 let path = DocPath::new_unwrap("$.config.sound.property[2].@name");
408 expect!(resolve_path(root, &path).is_empty()).to(be_true());
409 }
410
411 #[test_log::test]
412 fn resolve_path_with_xml_namespaces_test() {
413 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
414 <a:alligator xmlns:a="urn:alligators" xmlns:n="urn:names" n:name="Mary">
415 <a:favouriteNumbers>
416 <favouriteNumber xmlns="urn:favourite:numbers">1</favouriteNumber>
417 </a:favouriteNumbers>
418 </a:alligator>
419 "#;
420 let dom = kiss_xml::parse_str(xml).unwrap();
421 let root = dom.root_element();
422
423 let path = DocPath::root();
424 expect!(resolve_path(root, &path).is_empty()).to(be_true());
425
426 let path = DocPath::new_unwrap("$.alligator");
427 expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/alligator[0]"]));
428
429 let path = DocPath::new_unwrap("$['urn:alligators:alligator']");
430 expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/urn:alligators:alligator[0]"]));
431
432 let path = DocPath::new_unwrap("$['urn:alligators:alligator']['@n:name']");
433 expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/urn:alligators:alligator[0]/@n:name"]));
434
435 let path = DocPath::new_unwrap("$['urn:alligators:alligator']['@urn:names:name']");
436 expect!(resolve_path(root, &path)).to(be_equal_to(vec!["/urn:alligators:alligator[0]/@urn:names:name"]));
437 }
438
439 #[test_log::test]
440 fn resolve_matching_node_test() {
441 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
442 <config>
443 <name>My Settings</name>
444 <sound>
445 <property name="volume" value="11" />
446 <property name="mixer" value="standard" />
447 </sound>
448 </config>
449 "#;
450 let dom = kiss_xml::parse_str(xml).unwrap();
451 let root = dom.root_element();
452
453 expect!(resolve_matching_node(root, "/config[0]")).to(be_some()
454 .value(XmlResult::ElementNode(root.clone())));
455 expect!(resolve_matching_node(root, "/config[1]")).to(be_none());
456
457 let sound = root.elements_by_name("sound").next().unwrap().clone();
458 expect!(resolve_matching_node(root, "/config[0]/sound[0]")).to(be_some()
459 .value(XmlResult::ElementNode(sound.clone())));
460 expect!(resolve_matching_node(root, "/config[0]/sound[1]")).to(be_none());
461
462 let properties = sound.elements_by_name("property").cloned().collect_vec();
463 expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[0]")).to(be_some()
464 .value(XmlResult::ElementNode(properties[0].clone())));
465 expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[1]")).to(be_some()
466 .value(XmlResult::ElementNode(properties[1].clone())));
467
468 expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[0]/@name")).to(be_some()
469 .value(XmlResult::Attribute("name".to_string(), "volume".to_string())));
470 expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[1]/@name")).to(be_some()
471 .value(XmlResult::Attribute("name".to_string(), "mixer".to_string())));
472 expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[1]/@other")).to(be_none());
473
474 expect!(resolve_matching_node(root, "/config[0]/name[0]/#text")).to(be_some()
475 .value(XmlResult::TextNode("My Settings".to_string())));
476 expect!(resolve_matching_node(root, "/config[0]/sound[0]/property[0]/#text")).to(be_none());
477 }
478
479 #[test_log::test]
480 fn resolve_namespaces_test() {
481 expect!(resolve_namespaces(&hashmap!{})).to(be_equal_to(hashmap!{}));
482
483 let attributes = hashmap!{
484 "a".to_string() => "b".to_string(),
485 "c".to_string() => "d".to_string()
486 };
487 expect!(resolve_namespaces(&attributes)).to(be_equal_to(attributes));
488
489 let attributes = hashmap!{
490 "n:name".to_string() => "Mary".to_string(),
491 "xmlns:a".to_string() => "urn:alligators".to_string(),
492 "xmlns:n".to_string() => "urn:names".to_string()
493 };
494 expect!(resolve_namespaces(&attributes)).to(be_equal_to(hashmap!{
495 "n:name".to_string() => "Mary".to_string(),
496 "urn:names:name".to_string() => "Mary".to_string(),
497 "xmlns:a".to_string() => "urn:alligators".to_string(),
498 "xmlns:n".to_string() => "urn:names".to_string()
499 }));
500 }
501}