biodivine_xml_doc/element.rs
1use crate::document::{Document, Node};
2use crate::error::{Error, Result};
3use std::collections::{HashMap, HashSet};
4
5#[derive(Debug)]
6pub(crate) struct ElementData {
7 full_name: String,
8 attributes: HashMap<String, String>, // q:attr="val" => {"q:attr": "val"}
9 namespace_decls: HashMap<String, String>, // local namespace newly defined in attributes
10 parent: Option<Element>,
11 children: Vec<Node>,
12}
13
14/// An easy way to build a new element
15/// by chaining methods to add properties.
16///
17/// Call [`Element::build()`] to start building.
18/// To finish building, either call `.finish()` or `.push_to(parent)`
19/// which returns [`Element`].
20///
21/// # Examples
22///
23/// ```
24/// use biodivine_xml_doc::{Document, Element, Node};
25///
26/// let mut doc = Document::new();
27///
28/// let root = Element::build("root")
29/// .attribute("id", "main")
30/// .attribute("class", "main")
31/// .finish(&mut doc);
32/// doc.push_root_node(root.as_node()).unwrap();
33///
34/// let name = Element::build("name")
35/// .text_content("No Name")
36/// .push_to(&mut doc, root);
37///
38/// /* Equivalent xml:
39/// <root id="main" class="main">
40/// <name>No Name</name>
41/// </root>
42/// */
43/// ```
44///
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub struct ElementBuilder {
47 full_name: String,
48 attributes: HashMap<String, String>,
49 namespace_decls: HashMap<String, String>,
50 text_content: Option<String>,
51}
52
53impl ElementBuilder {
54 fn new(full_name: String) -> ElementBuilder {
55 ElementBuilder {
56 full_name,
57 attributes: HashMap::new(),
58 namespace_decls: HashMap::new(),
59 text_content: None,
60 }
61 }
62
63 /// Removes previous prefix if it exists, and attach new prefix.
64 pub fn prefix(mut self, prefix: &str) -> Self {
65 let (_, name) = Element::separate_prefix_name(&self.full_name);
66 if prefix.is_empty() {
67 self.full_name = name.to_string();
68 } else {
69 self.full_name = format!("{}:{}", prefix, name);
70 }
71 self
72 }
73
74 pub fn attribute<S, T>(mut self, name: S, value: T) -> Self
75 where
76 S: Into<String>,
77 T: Into<String>,
78 {
79 self.attributes.insert(name.into(), value.into());
80 self
81 }
82
83 pub fn namespace_decl<S, T>(mut self, prefix: S, namespace: T) -> Self
84 where
85 S: Into<String>,
86 T: Into<String>,
87 {
88 self.namespace_decls.insert(prefix.into(), namespace.into());
89 self
90 }
91
92 pub fn text_content<S: Into<String>>(mut self, text: S) -> Self {
93 self.text_content = Some(text.into());
94 self
95 }
96
97 pub fn finish(self, doc: &mut Document) -> Element {
98 let elem = Element::with_data(doc, self.full_name, self.attributes, self.namespace_decls);
99 if let Some(text) = self.text_content {
100 elem.push_child(doc, Node::Text(text)).unwrap();
101 }
102 elem
103 }
104
105 /// Push this element to the parent's children.
106 pub fn push_to(self, doc: &mut Document, parent: Element) -> Element {
107 let elem = self.finish(doc);
108 elem.push_to(doc, parent).unwrap();
109 elem
110 }
111}
112
113/// Represents an XML element. It acts as a pointer to actual element data stored in Document.
114///
115/// This struct only contains a unique `usize` id and implements trait `Copy`.
116/// So you do not need to bother with having a reference.
117///
118/// Because the actual data of the element is stored in [`Document`],
119/// most methods takes `&Document` or `&mut Document` as its first argument.
120///
121/// Note that an element may only interact with elements of the same document,
122/// but the crate doesn't know which document an element is from.
123/// Trying to push an element from a different Document may result in unexpected errors.
124///
125/// # Examples
126///
127/// Find children nodes with attribute
128/// ```
129/// use biodivine_xml_doc::{Document, Element};
130///
131/// let doc = Document::parse_str(r#"<?xml version="1.0"?>
132/// <data>
133/// <item class="value">a</item>
134/// <item class="value">b</item>
135/// <item></item>
136/// </data>
137/// "#).unwrap();
138///
139/// let data = doc.root_element().unwrap();
140/// let value_items: Vec<Element> = data.children(&doc)
141/// .iter()
142/// .filter_map(|node| node.as_element())
143/// .filter(|elem| elem.attribute(&doc, "class") == Some("value"))
144/// .collect();
145/// ```
146///
147#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
148pub struct Element {
149 id: usize,
150}
151
152impl Element {
153 /// Create a new empty element with `full_name`.
154 ///
155 /// If full_name contains `:`,
156 /// everything before that will be interpreted as a namespace prefix.
157 pub fn new<S: Into<String>>(doc: &mut Document, full_name: S) -> Self {
158 Self::with_data(doc, full_name.into(), HashMap::new(), HashMap::new())
159 }
160
161 /// Chain methods to build an element easily.
162 /// The chain can be finished with `.finish()` or `.push_to(parent)`.
163 ///
164 /// # Example
165 /// ```
166 /// use biodivine_xml_doc::{Document, Element, Node};
167 ///
168 /// let mut doc = Document::new();
169 ///
170 /// let elem = Element::build("root")
171 /// .attribute("id", "main")
172 /// .attribute("class", "main")
173 /// .finish(&mut doc);
174 ///
175 /// doc.push_root_node(elem.as_node()).unwrap();
176 /// ```
177 pub fn build<S: Into<String>>(name: S) -> ElementBuilder {
178 ElementBuilder::new(name.into())
179 }
180
181 pub(crate) fn with_data(
182 doc: &mut Document,
183 full_name: String,
184 attributes: HashMap<String, String>,
185 namespace_decls: HashMap<String, String>,
186 ) -> Element {
187 let elem = Element { id: doc.counter };
188 let elem_data = ElementData {
189 full_name,
190 attributes,
191 namespace_decls,
192 parent: None,
193 children: vec![],
194 };
195 doc.store.push(elem_data);
196 doc.counter += 1;
197 elem
198 }
199
200 /// Create a container Element
201 pub(crate) fn container() -> (Element, ElementData) {
202 let elem_data = ElementData {
203 full_name: String::new(),
204 attributes: HashMap::new(),
205 namespace_decls: HashMap::new(),
206 parent: None,
207 children: Vec::new(),
208 };
209 let elem = Element { id: 0 };
210 (elem, elem_data)
211 }
212
213 /// Returns `true` if element is a container.
214 ///
215 /// See [`Document::container()`] for more information on 'container'.
216 pub fn is_container(&self) -> bool {
217 self.id == 0
218 }
219
220 /// Equivalent to `Node::Element(self)`
221 pub fn as_node(&self) -> Node {
222 Node::Element(*self)
223 }
224
225 /// Seperate full_name by `:`, returning (prefix, name).
226 ///
227 /// The first str is `""` if `full_name` has no prefix.
228 pub fn separate_prefix_name(full_name: &str) -> (&str, &str) {
229 match full_name.split_once(':') {
230 Some((prefix, name)) => (prefix, name),
231 None => ("", full_name),
232 }
233 }
234}
235
236/// Below are methods that take `&Document` as its first argument.
237impl Element {
238 fn data<'a>(&self, doc: &'a Document) -> &'a ElementData {
239 doc.store.get(self.id).unwrap()
240 }
241
242 fn mut_data<'a>(&self, doc: &'a mut Document) -> &'a mut ElementData {
243 doc.store.get_mut(self.id).unwrap()
244 }
245
246 /// Returns true if this element is the root node of document.
247 ///
248 /// Note that this crate allows Document to have multiple elements, even though it's not valid xml.
249 pub fn is_root(&self, doc: &Document) -> bool {
250 self.parent(doc).map_or(false, |p| p.is_container())
251 }
252
253 /// Returns the "top" parent of this element. If the element is attached, the "top" parent
254 /// is the document root. Otherwise, the "top" parent is the root of the detached sub-tree.
255 pub fn top_parent(&self, doc: &Document) -> Element {
256 let mut e = *self;
257 while let Some(parent) = e.parent(doc) {
258 if parent.is_container() {
259 return e;
260 }
261 e = parent;
262 }
263 e
264 }
265
266 /// Get full name of element, including its namespace prefix.
267 /// Use [`Element::name()`] to get its name without the prefix.
268 pub fn full_name<'a>(&self, doc: &'a Document) -> &'a str {
269 &self.data(doc).full_name
270 }
271
272 pub fn set_full_name<S: Into<String>>(&self, doc: &mut Document, name: S) {
273 self.mut_data(doc).full_name = name.into();
274 }
275
276 /// Get prefix and name of element. If it doesn't have prefix, will return an empty string.
277 ///
278 /// `<prefix: name` -> `("prefix", "name")`
279 pub fn prefix_name<'a>(&self, doc: &'a Document) -> (&'a str, &'a str) {
280 Self::separate_prefix_name(self.full_name(doc))
281 }
282
283 /// Get namespace prefix of element, without name.
284 ///
285 /// `<prefix:name>` -> `"prefix"`
286 pub fn prefix<'a>(&self, doc: &'a Document) -> &'a str {
287 self.prefix_name(doc).0
288 }
289
290 /// Set prefix of element, preserving its name.
291 ///
292 /// `prefix` should not have a `:`,
293 /// or everything after `:` will be interpreted as part of element name.
294 ///
295 /// If prefix is an empty string, removes prefix.
296 pub fn set_prefix<S: Into<String>>(&self, doc: &mut Document, prefix: S) {
297 let data = self.mut_data(doc);
298 let (_, name) = Self::separate_prefix_name(&data.full_name);
299 let prefix: String = prefix.into();
300 if prefix.is_empty() {
301 data.full_name = name.to_string();
302 } else {
303 data.full_name = format!("{}:{}", prefix, name);
304 }
305 }
306
307 /// Get name of element, without its namespace prefix.
308 /// Use `Element::full_name()` to get its full name with prefix.
309 ///
310 /// `<prefix:name>` -> `"name"`
311 pub fn name<'a>(&self, doc: &'a Document) -> &'a str {
312 self.prefix_name(doc).1
313 }
314
315 /// Set name of element, preserving its prefix.
316 ///
317 /// `name` should not have a `:`,
318 /// or everything before `:` may be interpreted as namespace prefix.
319 pub fn set_name<S: Into<String>>(&self, doc: &mut Document, name: S) {
320 let data = self.mut_data(doc);
321 let (prefix, _) = Self::separate_prefix_name(&data.full_name);
322 if prefix.is_empty() {
323 data.full_name = name.into();
324 } else {
325 data.full_name = format!("{}:{}", prefix, name.into());
326 }
327 }
328
329 /// Get attributes of element.
330 ///
331 /// The attribute names may have namespace prefix. To strip the prefix and only its name, call [`Element::separate_prefix_name`].
332 /// ```
333 /// use biodivine_xml_doc::{Document, Element};
334 ///
335 /// let mut doc = Document::new();
336 /// let element = Element::build("name")
337 /// .attribute("id", "name")
338 /// .attribute("pre:name", "value")
339 /// .finish(&mut doc);
340 ///
341 /// let attrs = element.attributes(&doc);
342 /// for (full_name, value) in attrs {
343 /// let (prefix, name) = Element::separate_prefix_name(full_name);
344 /// // ("", "id"), ("pre", "name")
345 /// }
346 /// ```
347 pub fn attributes<'a>(&self, doc: &'a Document) -> &'a HashMap<String, String> {
348 &self.data(doc).attributes
349 }
350
351 /// Get attribute value of an element by its full name. (Namespace prefix isn't stripped)
352 pub fn attribute<'a>(&self, doc: &'a Document, name: &str) -> Option<&'a str> {
353 self.attributes(doc).get(name).map(|v| v.as_str())
354 }
355
356 /// Add or set attribute.
357 ///
358 /// If `name` contains a `:`,
359 /// everything before `:` will be interpreted as namespace prefix.
360 pub fn set_attribute<S, T>(&self, doc: &mut Document, name: S, value: T)
361 where
362 S: Into<String>,
363 T: Into<String>,
364 {
365 self.mut_attributes(doc).insert(name.into(), value.into());
366 }
367
368 pub fn mut_attributes<'a>(&self, doc: &'a mut Document) -> &'a mut HashMap<String, String> {
369 &mut self.mut_data(doc).attributes
370 }
371
372 /// Gets the namespace of this element.
373 ///
374 /// Shorthand for `self.namespace_for_prefix(doc, self.prefix(doc))`.
375 pub fn namespace<'a>(&self, doc: &'a Document) -> Option<&'a str> {
376 self.namespace_for_prefix(doc, self.prefix(doc))
377 }
378
379 /// Gets HashMap of `xmlns:prefix=namespace` declared in this element's attributes.
380 ///
381 /// Default namespace has empty string as key.
382 pub fn namespace_decls<'a>(&self, doc: &'a Document) -> &'a HashMap<String, String> {
383 &self.data(doc).namespace_decls
384 }
385
386 pub fn mut_namespace_decls<'a>(
387 &self,
388 doc: &'a mut Document,
389 ) -> &'a mut HashMap<String, String> {
390 &mut self.mut_data(doc).namespace_decls
391 }
392
393 pub fn set_namespace_decl<S, T>(&self, doc: &mut Document, prefix: S, namespace: T)
394 where
395 S: Into<String>,
396 T: Into<String>,
397 {
398 self.mut_namespace_decls(doc)
399 .insert(prefix.into(), namespace.into());
400 }
401
402 /// Get namespace value given prefix, for this element.
403 /// "xml" and "xmlns" returns its default namespace.
404 ///
405 /// This method can return an empty namespace, but only for an empty prefix assuming
406 /// there is no default namespace declared.
407 pub fn namespace_for_prefix<'a>(&self, doc: &'a Document, prefix: &str) -> Option<&'a str> {
408 match prefix {
409 "xml" => return Some("http://www.w3.org/XML/1998/namespace"),
410 "xmlns" => return Some("http://www.w3.org/2000/xmlns/"),
411 _ => (),
412 };
413 let mut elem = *self;
414 loop {
415 let data = elem.data(doc);
416 if let Some(value) = data.namespace_decls.get(prefix) {
417 return Some(value);
418 }
419 if let Some(parent) = elem.parent(doc) {
420 elem = parent;
421 } else if prefix.is_empty() {
422 return Some("");
423 } else {
424 return None;
425 }
426 }
427 }
428
429 /// Returns `true` if this element is quantified by the given `namespace_url`. That is,
430 /// either its prefix resolves to this namespace, or this is the default
431 /// namespace in this context.
432 ///
433 /// See also the usage example in [Self::quantify_with_closest].
434 pub fn is_quantified(&self, doc: &Document, namespace_url: &str) -> bool {
435 self.namespace(doc) == Some(namespace_url)
436 }
437
438 /// Ensure that this element belongs to the specified namespace using the *closest* prefix
439 /// which corresponds to the given `namespace_url`.
440 ///
441 /// If the namespace is not declared for this element, returns `None`, otherwise returns
442 /// the new prefix. As such, `None` actually represents an error and must be consumed.
443 ///
444 /// See [Self::closest_prefix] for the definitions of which prefix will be used.
445 ///
446 /// ```rust
447 /// use biodivine_xml_doc::Document;
448 ///
449 /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
450 /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns2">
451 /// <child xmlns:ns="http://ns2" />
452 /// </parent>
453 /// "#).unwrap();
454 ///
455 /// let root = doc.root_element().unwrap();
456 /// let child = root.child_elements(&doc)[0];
457 ///
458 /// // Everybody is already quantified with ns1, since it is the default namespace.
459 ///
460 /// assert!(child.is_quantified(&doc, "http://ns1"));
461 /// assert!(!root.is_quantified(&doc, "http://ns2"));
462 ///
463 /// assert_eq!(child.quantify_with_closest(&mut doc, "http://ns1"), Some("".to_string()));
464 /// assert_eq!(root.quantify_with_closest(&mut doc, "http://ns2"), Some("ns2".to_string()));
465 ///
466 /// assert!(child.is_quantified(&doc, "http://ns1"));
467 /// assert!(root.is_quantified(&doc, "http://ns2"));
468 /// ```
469 #[must_use]
470 pub fn quantify_with_closest(&self, doc: &mut Document, namespace_url: &str) -> Option<String> {
471 let prefix = self.closest_prefix(doc, namespace_url);
472 if let Some(prefix) = prefix {
473 let prefix = prefix.to_string();
474 self.set_prefix(doc, prefix.as_str());
475 Some(prefix)
476 } else {
477 None
478 }
479 }
480
481 pub(crate) fn build_text_content<'a>(&self, doc: &'a Document, buf: &'a mut String) {
482 for child in self.children(doc) {
483 child.build_text_content(doc, buf);
484 }
485 }
486
487 /// Concatenate all text content of this element, including its child elements `text_content()`.
488 ///
489 /// Implementation of [Node.textContent](https://developer.mozilla.org/en-US/docs/Web/API/Node/textContent)
490 pub fn text_content(&self, doc: &Document) -> String {
491 let mut buf = String::new();
492 self.build_text_content(doc, &mut buf);
493 buf
494 }
495
496 /// Clears all its children and inserts a [`Node::Text`] with given text.
497 pub fn set_text_content<S: Into<String>>(&self, doc: &mut Document, text: S) {
498 self.clear_children(doc);
499 let node = Node::Text(text.into());
500 self.push_child(doc, node).unwrap();
501 }
502}
503
504/// Below are methods related to finding nodes in tree.
505impl Element {
506 pub fn parent(&self, doc: &Document) -> Option<Element> {
507 self.data(doc).parent
508 }
509
510 /// `self.parent(doc).is_some()`
511 pub fn has_parent(&self, doc: &Document) -> bool {
512 self.parent(doc).is_some()
513 }
514
515 /// Get child [`Node`]s of this element.
516 pub fn children<'a>(&self, doc: &'a Document) -> &'a Vec<Node> {
517 &self.data(doc).children
518 }
519
520 fn _children_recursive<'a>(&self, doc: &'a Document, nodes: &mut Vec<&'a Node>) {
521 for node in self.children(doc) {
522 nodes.push(node);
523 if let Node::Element(elem) = &node {
524 elem._children_recursive(doc, nodes);
525 }
526 }
527 }
528
529 /// Get all child nodes recursively. (i.e. includes its children's children.)
530 pub fn children_recursive<'a>(&self, doc: &'a Document) -> Vec<&'a Node> {
531 let mut nodes = Vec::new();
532 self._children_recursive(doc, &mut nodes);
533 nodes
534 }
535
536 /// `!self.children(doc).is_empty()`
537 pub fn has_children(&self, doc: &Document) -> bool {
538 !self.children(doc).is_empty()
539 }
540
541 /// Get only child [`Element`]s of this element.
542 ///
543 /// This calls `.children().iter().filter_map().collect()`.
544 /// Use [`Element::children()`] if performance is important.
545 pub fn child_elements(&self, doc: &Document) -> Vec<Element> {
546 self.children(doc)
547 .iter()
548 .filter_map(|node| {
549 if let Node::Element(elemid) = node {
550 Some(*elemid)
551 } else {
552 None
553 }
554 })
555 .collect()
556 }
557
558 /// Get child [`Element`]s recursively. (i.e. includes its child element's child elements)
559 pub fn child_elements_recursive(&self, doc: &Document) -> Vec<Element> {
560 self.children_recursive(doc)
561 .iter()
562 .filter_map(|node| {
563 if let Node::Element(elemid) = node {
564 Some(*elemid)
565 } else {
566 None
567 }
568 })
569 .collect()
570 }
571
572 /// Find first direct child element with name `name`.
573 pub fn find(&self, doc: &Document, name: &str) -> Option<Element> {
574 self.children(doc)
575 .iter()
576 .filter_map(|n| n.as_element())
577 .find(|e| e.name(doc) == name)
578 }
579
580 /// Find all direct child elements with name `name`.
581 pub fn find_all(&self, doc: &Document, name: &str) -> Vec<Element> {
582 self.children(doc)
583 .iter()
584 .filter_map(|n| n.as_element())
585 .filter(|e| e.name(doc) == name)
586 .collect()
587 }
588
589 /// A helper method that identifies child based on namespace if the namespace is
590 /// declared directly on this child.
591 fn has_self_declared_namespace(
592 &self,
593 doc: &Document,
594 prefix: &str,
595 namespace_url: &str,
596 ) -> bool {
597 let self_namespaces = self.namespace_decls(doc);
598 if let Some(namespace) = self_namespaces.get(prefix) {
599 namespace_url == namespace.as_str()
600 } else {
601 false
602 }
603 }
604
605 /// Find the first direct child element with the given tag `name` belonging to the
606 /// specified namespace (identified by a `namespace_url`).
607 ///
608 /// ```rust
609 /// use biodivine_xml_doc::Document;
610 ///
611 /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
612 /// <parent xmlns:ns1="http://ns1" xmlns:ns2="http://ns2">
613 /// <ns2:child id="1"/>
614 /// <ns1:child id="2"/>
615 /// </parent>
616 /// "#).unwrap();
617 ///
618 /// let root = doc.root_element().unwrap();
619 /// let child = root.find_quantified(&doc, "child", "http://ns1").unwrap();
620 /// assert_eq!(child.attribute(&doc, "id"), Some("2"));
621 /// ```
622 pub fn find_quantified(
623 &self,
624 doc: &Document,
625 name: &str,
626 namespace_url: &str,
627 ) -> Option<Element> {
628 let admissible_prefix = self.collect_namespace_prefixes(doc, namespace_url);
629 for child in self.child_elements(doc) {
630 let (child_prefix, child_name) = child.prefix_name(doc);
631 if name != child_name {
632 continue;
633 }
634 if admissible_prefix.contains(child_prefix) {
635 return Some(child);
636 }
637 if child.has_self_declared_namespace(doc, child_prefix, namespace_url) {
638 return Some(child);
639 }
640 }
641 None
642 }
643
644 /// Find *all* the direct child elements with the given tag `name` belonging to the
645 /// specified namespace (identified by a `namespace_url`).
646 ///
647 /// ```rust
648 /// use biodivine_xml_doc::Document;
649 ///
650 /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
651 /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns2">
652 /// <ns2:child id="1" />
653 /// <child id="2" />
654 /// <ns1:child id="3" />
655 /// </parent>
656 /// "#).unwrap();
657 ///
658 /// let root = doc.root_element().unwrap();
659 /// let children = root.find_all_quantified(&doc, "child", "http://ns1");
660 /// assert_eq!(children.len(), 2);
661 /// assert_eq!(children[0].attribute(&doc, "id"), Some("2"));
662 /// assert_eq!(children[1].attribute(&doc, "id"), Some("3"));
663 /// ```
664 pub fn find_all_quantified(
665 &self,
666 doc: &Document,
667 name: &str,
668 namespace_url: &str,
669 ) -> Vec<Element> {
670 let mut result = Vec::new();
671 let admissible_prefix = self.collect_namespace_prefixes(doc, namespace_url);
672 for child in self.child_elements(doc) {
673 let (child_prefix, child_name) = child.prefix_name(doc);
674 if name != child_name {
675 continue;
676 }
677 if admissible_prefix.contains(child_prefix) {
678 result.push(child);
679 }
680 if child.has_self_declared_namespace(doc, child_prefix, namespace_url) {
681 result.push(child);
682 }
683 }
684 result
685 }
686
687 /// Compute all namespace prefixes that are valid for the given `namespace_url` in the context
688 /// of *this* XML element.
689 ///
690 /// The default prefix is represented as an empty string slice.
691 ///
692 /// ```rust
693 /// use biodivine_xml_doc::Document;
694 ///
695 /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
696 /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns1">
697 /// <child xmlns:ns2="http://ns2" />
698 /// </parent>
699 /// "#).unwrap();
700 ///
701 /// let root = doc.root_element().unwrap();
702 /// let child = root.child_elements(&doc)[0];
703 /// // Three prefixes: `default`, `ns1`, and `ns2`
704 /// assert_eq!(root.collect_namespace_prefixes(&doc, "http://ns1").len(), 3);
705 /// // Only two prefixes. `ns2` is overridden.
706 /// assert_eq!(child.collect_namespace_prefixes(&doc, "http://ns1").len(), 2);
707 /// ```
708 pub fn collect_namespace_prefixes<'a>(
709 &self,
710 doc: &'a Document,
711 namespace_url: &str,
712 ) -> HashSet<&'a str> {
713 /// The idea is that we first go all the way to the root element,
714 /// and then as we are returning from the recursion, we are adding prefix "candidates".
715 /// However, at the same time, we are removing candidates which are overwritten
716 /// by another prefix lower on the path.
717 fn recursion<'a>(
718 document: &'a Document,
719 valid_prefixes: &mut HashSet<&'a str>,
720 element: &Element,
721 namespace_url: &str,
722 ) {
723 if let Some(parent) = element.parent(document) {
724 recursion(document, valid_prefixes, &parent, namespace_url);
725 }
726 // At this point, `valid_prefixes` contains all prefixes that are declared in
727 // some of our parents for the requested URL. As such, we can go through the
728 // declarations in this tag and add new prefix if it is valid, or remove prefix
729 // if it is overwritten by a different url.
730 for (prefix, namespace) in element.namespace_decls(document) {
731 if namespace.as_str() == namespace_url {
732 valid_prefixes.insert(prefix);
733 } else if valid_prefixes.contains(prefix.as_str()) {
734 valid_prefixes.remove(prefix.as_str());
735 }
736 }
737 }
738
739 let mut result = HashSet::new();
740 if namespace_url.is_empty() {
741 // "no namespace" has by default an empty prefix, but this can be removed
742 // if a different namespace is found along the way.
743 result.insert("");
744 }
745 recursion(doc, &mut result, self, namespace_url);
746 result
747 }
748
749 /// Collect namespace declarations which apply to this XML `Element`.
750 ///
751 /// The result contains the empty prefix only if it is declared with a non-empty namespace url.
752 ///
753 /// ```rust
754 /// use std::collections::HashMap;
755 /// use biodivine_xml_doc::Document;
756 ///
757 /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
758 /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns1">
759 /// <child xmlns:ns2="http://ns2">
760 /// <ns1:child/>
761 /// <ns2:child/>
762 /// </child>
763 /// </parent>
764 /// "#).unwrap();
765 ///
766 /// let root = doc.root_element().unwrap();
767 /// let child = root.child_elements(&doc)[0];
768 /// let declarations = child.collect_applicable_namespace_decls(&doc);
769 /// // The result should contain "" and "ns1". "ns2" is-redeclared on child, so is not needed.
770 /// let expected = HashMap::from([
771 /// ("ns2".to_string(), "http://ns2".to_string()),
772 /// ("ns1".to_string(), "http://ns1".to_string()),
773 /// ("".to_string(), "http://ns1".to_string())
774 /// ]);
775 /// assert_eq!(declarations.len(), 3);
776 /// assert_eq!(declarations, expected);
777 /// ```
778 pub fn collect_applicable_namespace_decls(&self, doc: &Document) -> HashMap<String, String> {
779 let mut e = *self;
780 let mut result = e.namespace_decls(doc).clone();
781 while let Some(parent) = e.parent(doc) {
782 e = parent;
783 for (prefix, url) in e.namespace_decls(doc) {
784 if !result.contains_key(prefix) {
785 result.insert(prefix.clone(), url.clone());
786 }
787 }
788 }
789 result
790 }
791
792 /// Collect "parent" namespace declarations which apply to the XML sub-tree of this `Element`.
793 ///
794 /// "Parent" declarations are those which appear on one of the parent tags of `Element`,
795 /// not in the `Element` sub-tree. Each namespace prefix resolves to a specific URL based
796 /// on standard XML namespace shadowing rules.
797 ///
798 /// Note that the method can return a combination of an empty prefix and an empty url
799 /// when the sub-tree contains elements with no prefix and there is no default namespace url
800 /// declared by the parents.
801 ///
802 /// ```rust
803 /// use std::collections::HashMap;
804 /// use biodivine_xml_doc::Document;
805 ///
806 /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
807 /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns1">
808 /// <child xmlns:ns2="http://ns2">
809 /// <ns1:child/>
810 /// <ns2:child/>
811 /// </child>
812 /// </parent>
813 /// "#).unwrap();
814 ///
815 /// let root = doc.root_element().unwrap();
816 /// let child = root.child_elements(&doc)[0];
817 /// let declarations = child.collect_external_namespace_decls(&doc);
818 /// // The result should contain "" and "ns1". "ns2" is-redeclared on child, so is not needed.
819 /// let expected = HashMap::from([
820 /// ("".to_string(), "http://ns1".to_string()),
821 /// ("ns1".to_string(), "http://ns1".to_string())
822 /// ]);
823 /// assert_eq!(declarations.len(), 2);
824 /// assert_eq!(declarations, expected);
825 /// ```
826 pub fn collect_external_namespace_decls(&self, doc: &Document) -> HashMap<String, String> {
827 /// Collect all prefixes within the element subtree that are not declared
828 /// within the sub-tree itself.
829 fn collect_prefixes<'a>(
830 e: &Element,
831 doc: &'a Document,
832 known_prefixes: &HashSet<&'a str>,
833 unknown_prefixes: &mut HashSet<&'a str>,
834 ) {
835 let my_declarations = e.namespace_decls(doc);
836 if my_declarations.is_empty() {
837 // This element has no namespace declarations, hence we just check it and continue
838 // recursively to the child elements.
839 let my_prefix = e.prefix(doc);
840 if !known_prefixes.contains(my_prefix) {
841 unknown_prefixes.insert(my_prefix);
842 }
843 for child in e.child_elements(doc) {
844 collect_prefixes(&child, doc, known_prefixes, unknown_prefixes);
845 }
846 } else {
847 // This element actually has declarations, so we need to copy the existing prefix
848 // map and update it with new values.
849 let mut my_known_prefixes = known_prefixes.clone();
850 for prefix in my_declarations.keys() {
851 my_known_prefixes.insert(prefix.as_str());
852 }
853 let my_prefix = e.prefix(doc);
854 if !known_prefixes.contains(my_prefix) {
855 unknown_prefixes.insert(my_prefix);
856 }
857 for child in e.child_elements(doc) {
858 collect_prefixes(&child, doc, &my_known_prefixes, unknown_prefixes);
859 }
860 }
861 }
862
863 let known = HashSet::new();
864 let mut unknown = HashSet::new();
865 collect_prefixes(self, doc, &known, &mut unknown);
866
867 unknown
868 .into_iter()
869 .map(|prefix| {
870 let Some(namespace) = self.namespace_for_prefix(doc, prefix) else {
871 panic!("Invalid XML document. Prefix `{}` not declared.", prefix);
872 };
873 (prefix.to_string(), namespace.to_string())
874 })
875 .collect::<HashMap<_, _>>()
876 }
877
878 /// Find the "closest" namespace prefix which is associated with the given `namespace_url`.
879 ///
880 /// If the namespace is declared on the element itself, then its prefix is returned.
881 /// Otherwise, the closest parent with the declared namespace is found and this prefix
882 /// is returned. If the namespace is not declared for this element, `None` is returned.
883 ///
884 /// If the "closest" element has multiple declarations of the namespace in question,
885 /// the lexicographically first prefix is return (i.e. compared through standard
886 /// string ordering).
887 ///
888 /// You can use empty namespace url to signify "no namespace", in which case the method
889 /// can only return an empty prefix, but it can also return `None` if there is a default
890 /// namespace which prevents you from having "no namespace" on this element.
891 ///
892 /// ```rust
893 /// use biodivine_xml_doc::Document;
894 ///
895 /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
896 /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns2">
897 /// <child xmlns:ns="http://ns2" />
898 /// </parent>
899 /// "#).unwrap();
900 ///
901 /// let root = doc.root_element().unwrap();
902 /// let child = root.child_elements(&doc)[0];
903 /// assert_eq!(root.closest_prefix(&doc, "http://ns1"), Some(""));
904 /// assert_eq!(root.closest_prefix(&doc, "http://ns2"), Some("ns2"));
905 /// assert_eq!(child.closest_prefix(&doc, "http://ns1"), Some(""));
906 /// assert_eq!(child.closest_prefix(&doc, "http://ns2"), Some("ns"));
907 /// ```
908 ///
909 pub fn closest_prefix<'a>(&self, doc: &'a Document, namespace_url: &str) -> Option<&'a str> {
910 let mut search = *self;
911 loop {
912 let mut candidate: Option<&str> = None;
913 for (prefix, url) in search.namespace_decls(doc) {
914 if url == namespace_url {
915 if let Some(current) = candidate {
916 if prefix.as_str() < current {
917 candidate = Some(prefix);
918 }
919 } else {
920 candidate = Some(prefix);
921 }
922 }
923 }
924 if candidate.is_some() {
925 return candidate;
926 }
927 if let Some(parent) = search.parent(doc) {
928 search = parent;
929 } else if namespace_url.is_empty() {
930 return Some("");
931 } else {
932 return None;
933 }
934 }
935 }
936}
937
938/// Below are functions that modify its tree-structure.
939///
940/// Because an element has reference to both its parent and its children,
941/// an element's parent and children is not directly exposed for modification.
942/// But in return, it is not possible for a document to be in an inconsistant state,
943/// where an element's parent doesn't have the element as its children.
944impl Element {
945 /// Equivalent to `vec.push()`.
946 /// # Errors
947 /// - [`Error::HasAParent`]: When you want to replace an element's parent with another,
948 /// call `element.detatch()` to make it parentless first.
949 /// This is to make it explicit that you are changing an element's parent, not adding another.
950 /// - [`Error::ContainerCannotMove`]: The container element's parent must always be None.
951 pub fn push_child(&self, doc: &mut Document, node: Node) -> Result<()> {
952 if let Node::Element(elem) = node {
953 if elem.is_container() {
954 return Err(Error::ContainerCannotMove);
955 }
956 let data = elem.mut_data(doc);
957 if data.parent.is_some() {
958 return Err(Error::HasAParent);
959 }
960 data.parent = Some(*self);
961 }
962 self.mut_data(doc).children.push(node);
963 Ok(())
964 }
965
966 /// Equivalent to `parent.push_child()`.
967 ///
968 /// # Errors
969 /// - [`Error::HasAParent`]: When you want to replace an element's parent with another,
970 /// call `element.detatch()` to make it parentless first.
971 /// This is to make it explicit that you are changing an element's parent, not adding another.
972 /// - [`Error::ContainerCannotMove`]: The container element's parent must always be None.
973 pub fn push_to(&self, doc: &mut Document, parent: Element) -> Result<()> {
974 parent.push_child(doc, self.as_node())
975 }
976
977 /// Equivalent to `vec.insert()`.
978 ///
979 /// # Panics
980 ///
981 /// Panics if `index > self.children().len()`
982 ///
983 /// # Errors
984 /// - [`Error::HasAParent`]: When you want to replace an element's parent with another,
985 /// call `element.detatch()` to make it parentless first.
986 /// This is to make it explicit that you are changing an element's parent, not adding another.
987 /// - [`Error::ContainerCannotMove`]: The container element's parent must always be None.
988 pub fn insert_child(&self, doc: &mut Document, index: usize, node: Node) -> Result<()> {
989 if let Node::Element(elem) = node {
990 if elem.is_container() {
991 return Err(Error::ContainerCannotMove);
992 }
993 let data = elem.mut_data(doc);
994 if data.parent.is_some() {
995 return Err(Error::HasAParent);
996 }
997 data.parent = Some(*self);
998 }
999 self.mut_data(doc).children.insert(index, node);
1000 Ok(())
1001 }
1002
1003 /// Equivalent to `vec.remove()`.
1004 ///
1005 /// # Panics
1006 ///
1007 /// Panics if `index >= self.children().len()`.
1008 pub fn remove_child(&self, doc: &mut Document, index: usize) -> Node {
1009 let node = self.mut_data(doc).children.remove(index);
1010 if let Node::Element(elem) = node {
1011 elem.mut_data(doc).parent = None;
1012 }
1013 node
1014 }
1015
1016 /// Equivalent to `vec.pop()`.
1017 pub fn pop_child(&self, doc: &mut Document) -> Option<Node> {
1018 let child = self.mut_data(doc).children.pop();
1019 if let Some(Node::Element(elem)) = &child {
1020 elem.mut_data(doc).parent = None;
1021 }
1022 child
1023 }
1024
1025 /// Remove all children and return them.
1026 pub fn clear_children(&self, doc: &mut Document) -> Vec<Node> {
1027 let count = self.children(doc).len();
1028 let mut removed = Vec::with_capacity(count);
1029 for _ in 0..count {
1030 let child = self.remove_child(doc, 0);
1031 removed.push(child);
1032 }
1033 removed
1034 }
1035
1036 /// Removes itself from its parent. Note that you can't attach this element to other documents.
1037 ///
1038 /// # Errors
1039 ///
1040 /// - [`Error::ContainerCannotMove`]: You can't detatch container element
1041 pub fn detatch(&self, doc: &mut Document) -> Result<()> {
1042 if self.is_container() {
1043 return Err(Error::ContainerCannotMove);
1044 }
1045 let data = self.mut_data(doc);
1046 if let Some(parent) = data.parent {
1047 let pos = parent
1048 .children(doc)
1049 .iter()
1050 .position(|n| n.as_element() == Some(*self))
1051 .unwrap();
1052 parent.remove_child(doc, pos);
1053 }
1054 Ok(())
1055 }
1056}
1057
1058#[cfg(test)]
1059mod tests {
1060 use super::{Document, Element, Node};
1061
1062 #[test]
1063 fn test_children() {
1064 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1065 <outer>
1066 inside outer
1067 <middle>
1068 <inner>
1069 inside
1070 </inner>
1071 after inside
1072 </middle>
1073 <after>
1074 inside after
1075 </after>
1076 </outer>
1077 "#;
1078 let doc = Document::parse_str(xml).unwrap();
1079 let outer = doc.container().child_elements(&doc)[0];
1080 let middle = outer.child_elements(&doc)[0];
1081 let inner = middle.child_elements(&doc)[0];
1082 let after = outer.child_elements(&doc)[1];
1083 assert_eq!(doc.container().child_elements(&doc).len(), 1);
1084 assert_eq!(outer.name(&doc), "outer");
1085 assert_eq!(middle.name(&doc), "middle");
1086 assert_eq!(inner.name(&doc), "inner");
1087 assert_eq!(after.name(&doc), "after");
1088 assert_eq!(outer.children(&doc).len(), 3);
1089 assert_eq!(outer.child_elements(&doc).len(), 2);
1090 assert_eq!(doc.container().children_recursive(&doc).len(), 8);
1091 assert_eq!(
1092 doc.container().child_elements_recursive(&doc),
1093 vec![outer, middle, inner, after]
1094 );
1095 }
1096
1097 #[test]
1098 fn test_namespace() {
1099 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1100 <root xmlns="ns" xmlns:p="pns">
1101 <p:foo xmlns="inner">
1102 Hello
1103 </p:foo>
1104 <p:bar xmlns:p="in2">
1105 <c />
1106 World!
1107 </p:bar>
1108 </root>"#;
1109 let doc = Document::parse_str(xml).unwrap();
1110 let container = doc.container().children(&doc)[0].as_element().unwrap();
1111 let child_elements = container.child_elements(&doc);
1112 let foo = *child_elements.get(0).unwrap();
1113 let bar = *child_elements.get(1).unwrap();
1114 let c = bar.child_elements(&doc)[0];
1115 assert_eq!(c.prefix_name(&doc), ("", "c"));
1116 assert_eq!(bar.full_name(&doc), "p:bar");
1117 assert_eq!(bar.prefix(&doc), "p");
1118 assert_eq!(bar.name(&doc), "bar");
1119 assert_eq!(c.namespace(&doc).unwrap(), "ns");
1120 assert_eq!(c.namespace_for_prefix(&doc, "p").unwrap(), "in2");
1121 assert!(c.namespace_for_prefix(&doc, "random").is_none());
1122 assert_eq!(bar.namespace(&doc).unwrap(), "in2");
1123 assert_eq!(bar.namespace_for_prefix(&doc, "").unwrap(), "ns");
1124 assert_eq!(foo.namespace(&doc).unwrap(), "pns");
1125 assert_eq!(foo.namespace_for_prefix(&doc, "").unwrap(), "inner");
1126 assert_eq!(foo.namespace_for_prefix(&doc, "p").unwrap(), "pns");
1127 assert_eq!(container.namespace(&doc).unwrap(), "ns");
1128 }
1129
1130 #[test]
1131 fn test_find_text_content() {
1132 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1133 <core>
1134 <p>Text</p>
1135 <b>Text2</b>
1136 </core>
1137 "#;
1138 let doc = Document::parse_str(xml).unwrap();
1139 assert_eq!(
1140 doc.root_element()
1141 .unwrap()
1142 .find(&doc, "p")
1143 .unwrap()
1144 .text_content(&doc),
1145 "Text"
1146 );
1147 assert_eq!(
1148 doc.root_element()
1149 .unwrap()
1150 .find(&doc, "b")
1151 .unwrap()
1152 .text_content(&doc),
1153 "Text2"
1154 );
1155 assert_eq!(doc.root_element().unwrap().text_content(&doc), "TextText2")
1156 }
1157
1158 #[test]
1159 fn test_mutate_tree() {
1160 // Test tree consistency after mutating tree
1161 let mut doc = Document::new();
1162 let container = doc.container();
1163 assert_eq!(container.parent(&doc), None);
1164 assert_eq!(container.children(&doc).len(), 0);
1165
1166 // Element::build.push_to
1167 let root = Element::build("root").push_to(&mut doc, container);
1168 assert_eq!(root.parent(&doc).unwrap(), container);
1169 assert_eq!(doc.root_element().unwrap(), root);
1170
1171 // Element::new
1172 let a = Element::new(&mut doc, "a");
1173 assert_eq!(a.parent(&doc), None);
1174
1175 // Element.push_child
1176 root.push_child(&mut doc, Node::Element(a)).unwrap();
1177 assert_eq!(root.children(&doc)[0].as_element().unwrap(), a);
1178 assert_eq!(a.parent(&doc).unwrap(), root);
1179
1180 // Element.pop
1181 let popped = root.pop_child(&mut doc).unwrap().as_element().unwrap();
1182 assert_eq!(popped, a);
1183 assert_eq!(root.children(&doc).len(), 0);
1184 assert_eq!(a.parent(&doc), None);
1185
1186 // Element.push_to
1187 let a = Element::new(&mut doc, "a");
1188 a.push_to(&mut doc, root).unwrap();
1189 assert_eq!(root.children(&doc)[0].as_element().unwrap(), a);
1190 assert_eq!(a.parent(&doc).unwrap(), root);
1191
1192 // Element.remove_child
1193 root.remove_child(&mut doc, 0);
1194 assert_eq!(root.children(&doc).len(), 0);
1195 assert_eq!(a.parent(&doc), None);
1196
1197 // Element.insert_child
1198 let a = Element::new(&mut doc, "a");
1199 root.insert_child(&mut doc, 0, Node::Element(a)).unwrap();
1200 assert_eq!(root.children(&doc)[0].as_element().unwrap(), a);
1201 assert_eq!(a.parent(&doc).unwrap(), root);
1202
1203 // Element.detatch
1204 a.detatch(&mut doc).unwrap();
1205 assert_eq!(root.children(&doc).len(), 0);
1206 assert_eq!(a.parent(&doc), None);
1207 }
1208}