biodivine_xml_doc/
element.rs

1use crate::document::{Document, Node};
2use crate::error::{Error, Result};
3use std::collections::{HashMap, HashSet};
4
5#[derive(Debug)]
6pub(crate) struct ElementData {
7    full_name: String,
8    attributes: HashMap<String, String>, // q:attr="val" => {"q:attr": "val"}
9    namespace_decls: HashMap<String, String>, // local namespace newly defined in attributes
10    parent: Option<Element>,
11    children: Vec<Node>,
12}
13
14/// An easy way to build a new element
15/// by chaining methods to add properties.
16///
17/// Call [`Element::build()`] to start building.
18/// To finish building, either call `.finish()` or `.push_to(parent)`
19/// which returns [`Element`].
20///
21/// # Examples
22///
23/// ```
24/// use biodivine_xml_doc::{Document, Element, Node};
25///
26/// let mut doc = Document::new();
27///
28/// let root = Element::build("root")
29///     .attribute("id", "main")
30///     .attribute("class", "main")
31///     .finish(&mut doc);
32/// doc.push_root_node(root.as_node()).unwrap();
33///
34/// let name = Element::build("name")
35///     .text_content("No Name")
36///     .push_to(&mut doc, root);
37///
38/// /* Equivalent xml:
39///   <root id="main" class="main">
40///     <name>No Name</name>
41///   </root>
42/// */
43/// ```
44///
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub struct ElementBuilder {
47    full_name: String,
48    attributes: HashMap<String, String>,
49    namespace_decls: HashMap<String, String>,
50    text_content: Option<String>,
51}
52
53impl ElementBuilder {
54    fn new(full_name: String) -> ElementBuilder {
55        ElementBuilder {
56            full_name,
57            attributes: HashMap::new(),
58            namespace_decls: HashMap::new(),
59            text_content: None,
60        }
61    }
62
63    /// Removes previous prefix if it exists, and attach new prefix.
64    pub fn prefix(mut self, prefix: &str) -> Self {
65        let (_, name) = Element::separate_prefix_name(&self.full_name);
66        if prefix.is_empty() {
67            self.full_name = name.to_string();
68        } else {
69            self.full_name = format!("{}:{}", prefix, name);
70        }
71        self
72    }
73
74    pub fn attribute<S, T>(mut self, name: S, value: T) -> Self
75    where
76        S: Into<String>,
77        T: Into<String>,
78    {
79        self.attributes.insert(name.into(), value.into());
80        self
81    }
82
83    pub fn namespace_decl<S, T>(mut self, prefix: S, namespace: T) -> Self
84    where
85        S: Into<String>,
86        T: Into<String>,
87    {
88        self.namespace_decls.insert(prefix.into(), namespace.into());
89        self
90    }
91
92    pub fn text_content<S: Into<String>>(mut self, text: S) -> Self {
93        self.text_content = Some(text.into());
94        self
95    }
96
97    pub fn finish(self, doc: &mut Document) -> Element {
98        let elem = Element::with_data(doc, self.full_name, self.attributes, self.namespace_decls);
99        if let Some(text) = self.text_content {
100            elem.push_child(doc, Node::Text(text)).unwrap();
101        }
102        elem
103    }
104
105    /// Push this element to the parent's children.
106    pub fn push_to(self, doc: &mut Document, parent: Element) -> Element {
107        let elem = self.finish(doc);
108        elem.push_to(doc, parent).unwrap();
109        elem
110    }
111}
112
113/// Represents an XML element. It acts as a pointer to actual element data stored in Document.
114///
115/// This struct only contains a unique `usize` id and implements trait `Copy`.
116/// So you do not need to bother with having a reference.
117///
118/// Because the actual data of the element is stored in [`Document`],
119/// most methods takes `&Document` or `&mut Document` as its first argument.
120///
121/// Note that an element may only interact with elements of the same document,
122/// but the crate doesn't know which document an element is from.
123/// Trying to push an element from a different Document may result in unexpected errors.
124///
125/// # Examples
126///
127/// Find children nodes with attribute
128/// ```
129/// use biodivine_xml_doc::{Document, Element};
130///
131/// let doc = Document::parse_str(r#"<?xml version="1.0"?>
132/// <data>
133///   <item class="value">a</item>
134///   <item class="value">b</item>
135///   <item></item>
136/// </data>
137/// "#).unwrap();
138///
139/// let data = doc.root_element().unwrap();
140/// let value_items: Vec<Element> = data.children(&doc)
141///     .iter()
142///     .filter_map(|node| node.as_element())
143///     .filter(|elem| elem.attribute(&doc, "class") == Some("value"))
144///     .collect();
145/// ```
146///
147#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
148pub struct Element {
149    id: usize,
150}
151
152impl Element {
153    /// Create a new empty element with `full_name`.
154    ///
155    /// If full_name contains `:`,
156    /// everything before that will be interpreted as a namespace prefix.
157    pub fn new<S: Into<String>>(doc: &mut Document, full_name: S) -> Self {
158        Self::with_data(doc, full_name.into(), HashMap::new(), HashMap::new())
159    }
160
161    /// Chain methods to build an element easily.
162    /// The chain can be finished with `.finish()` or `.push_to(parent)`.
163    ///
164    /// # Example
165    /// ```
166    /// use biodivine_xml_doc::{Document, Element, Node};
167    ///
168    /// let mut doc = Document::new();
169    ///
170    /// let elem = Element::build("root")
171    ///     .attribute("id", "main")
172    ///     .attribute("class", "main")
173    ///     .finish(&mut doc);
174    ///
175    /// doc.push_root_node(elem.as_node()).unwrap();
176    /// ```
177    pub fn build<S: Into<String>>(name: S) -> ElementBuilder {
178        ElementBuilder::new(name.into())
179    }
180
181    pub(crate) fn with_data(
182        doc: &mut Document,
183        full_name: String,
184        attributes: HashMap<String, String>,
185        namespace_decls: HashMap<String, String>,
186    ) -> Element {
187        let elem = Element { id: doc.counter };
188        let elem_data = ElementData {
189            full_name,
190            attributes,
191            namespace_decls,
192            parent: None,
193            children: vec![],
194        };
195        doc.store.push(elem_data);
196        doc.counter += 1;
197        elem
198    }
199
200    /// Create a container Element
201    pub(crate) fn container() -> (Element, ElementData) {
202        let elem_data = ElementData {
203            full_name: String::new(),
204            attributes: HashMap::new(),
205            namespace_decls: HashMap::new(),
206            parent: None,
207            children: Vec::new(),
208        };
209        let elem = Element { id: 0 };
210        (elem, elem_data)
211    }
212
213    /// Returns `true` if element is a container.
214    ///
215    /// See [`Document::container()`] for more information on 'container'.
216    pub fn is_container(&self) -> bool {
217        self.id == 0
218    }
219
220    /// Equivalent to `Node::Element(self)`
221    pub fn as_node(&self) -> Node {
222        Node::Element(*self)
223    }
224
225    /// Seperate full_name by `:`, returning (prefix, name).
226    ///
227    /// The first str is `""` if `full_name` has no prefix.
228    pub fn separate_prefix_name(full_name: &str) -> (&str, &str) {
229        match full_name.split_once(':') {
230            Some((prefix, name)) => (prefix, name),
231            None => ("", full_name),
232        }
233    }
234}
235
236/// Below are methods that take `&Document` as its first argument.
237impl Element {
238    fn data<'a>(&self, doc: &'a Document) -> &'a ElementData {
239        doc.store.get(self.id).unwrap()
240    }
241
242    fn mut_data<'a>(&self, doc: &'a mut Document) -> &'a mut ElementData {
243        doc.store.get_mut(self.id).unwrap()
244    }
245
246    /// Returns true if this element is the root node of document.
247    ///
248    /// Note that this crate allows Document to have multiple elements, even though it's not valid xml.
249    pub fn is_root(&self, doc: &Document) -> bool {
250        self.parent(doc).map_or(false, |p| p.is_container())
251    }
252
253    /// Returns the "top" parent of this element. If the element is attached, the "top" parent
254    /// is the document root. Otherwise, the "top" parent is the root of the detached sub-tree.
255    pub fn top_parent(&self, doc: &Document) -> Element {
256        let mut e = *self;
257        while let Some(parent) = e.parent(doc) {
258            if parent.is_container() {
259                return e;
260            }
261            e = parent;
262        }
263        e
264    }
265
266    /// Get full name of element, including its namespace prefix.
267    /// Use [`Element::name()`] to get its name without the prefix.
268    pub fn full_name<'a>(&self, doc: &'a Document) -> &'a str {
269        &self.data(doc).full_name
270    }
271
272    pub fn set_full_name<S: Into<String>>(&self, doc: &mut Document, name: S) {
273        self.mut_data(doc).full_name = name.into();
274    }
275
276    /// Get prefix and name of element. If it doesn't have prefix, will return an empty string.
277    ///
278    /// `<prefix: name` -> `("prefix", "name")`
279    pub fn prefix_name<'a>(&self, doc: &'a Document) -> (&'a str, &'a str) {
280        Self::separate_prefix_name(self.full_name(doc))
281    }
282
283    /// Get namespace prefix of element, without name.
284    ///
285    /// `<prefix:name>` -> `"prefix"`
286    pub fn prefix<'a>(&self, doc: &'a Document) -> &'a str {
287        self.prefix_name(doc).0
288    }
289
290    /// Set prefix of element, preserving its name.
291    ///
292    /// `prefix` should not have a `:`,
293    /// or everything after `:` will be interpreted as part of element name.    
294    ///
295    /// If prefix is an empty string, removes prefix.
296    pub fn set_prefix<S: Into<String>>(&self, doc: &mut Document, prefix: S) {
297        let data = self.mut_data(doc);
298        let (_, name) = Self::separate_prefix_name(&data.full_name);
299        let prefix: String = prefix.into();
300        if prefix.is_empty() {
301            data.full_name = name.to_string();
302        } else {
303            data.full_name = format!("{}:{}", prefix, name);
304        }
305    }
306
307    /// Get name of element, without its namespace prefix.
308    /// Use `Element::full_name()` to get its full name with prefix.
309    ///
310    /// `<prefix:name>` -> `"name"`
311    pub fn name<'a>(&self, doc: &'a Document) -> &'a str {
312        self.prefix_name(doc).1
313    }
314
315    /// Set name of element, preserving its prefix.
316    ///
317    /// `name` should not have a `:`,
318    /// or everything before `:` may be interpreted as namespace prefix.
319    pub fn set_name<S: Into<String>>(&self, doc: &mut Document, name: S) {
320        let data = self.mut_data(doc);
321        let (prefix, _) = Self::separate_prefix_name(&data.full_name);
322        if prefix.is_empty() {
323            data.full_name = name.into();
324        } else {
325            data.full_name = format!("{}:{}", prefix, name.into());
326        }
327    }
328
329    /// Get attributes of element.
330    ///
331    /// The attribute names may have namespace prefix. To strip the prefix and only its name, call [`Element::separate_prefix_name`].
332    /// ```
333    /// use biodivine_xml_doc::{Document, Element};
334    ///
335    /// let mut doc = Document::new();
336    /// let element = Element::build("name")
337    ///     .attribute("id", "name")
338    ///     .attribute("pre:name", "value")
339    ///     .finish(&mut doc);
340    ///
341    /// let attrs = element.attributes(&doc);
342    /// for (full_name, value) in attrs {
343    ///     let (prefix, name) = Element::separate_prefix_name(full_name);
344    ///     // ("", "id"), ("pre", "name")
345    /// }
346    /// ```
347    pub fn attributes<'a>(&self, doc: &'a Document) -> &'a HashMap<String, String> {
348        &self.data(doc).attributes
349    }
350
351    /// Get attribute value of an element by its full name. (Namespace prefix isn't stripped)
352    pub fn attribute<'a>(&self, doc: &'a Document, name: &str) -> Option<&'a str> {
353        self.attributes(doc).get(name).map(|v| v.as_str())
354    }
355
356    /// Add or set attribute.
357    ///
358    /// If `name` contains a `:`,
359    /// everything before `:` will be interpreted as namespace prefix.
360    pub fn set_attribute<S, T>(&self, doc: &mut Document, name: S, value: T)
361    where
362        S: Into<String>,
363        T: Into<String>,
364    {
365        self.mut_attributes(doc).insert(name.into(), value.into());
366    }
367
368    pub fn mut_attributes<'a>(&self, doc: &'a mut Document) -> &'a mut HashMap<String, String> {
369        &mut self.mut_data(doc).attributes
370    }
371
372    /// Gets the namespace of this element.
373    ///
374    /// Shorthand for `self.namespace_for_prefix(doc, self.prefix(doc))`.
375    pub fn namespace<'a>(&self, doc: &'a Document) -> Option<&'a str> {
376        self.namespace_for_prefix(doc, self.prefix(doc))
377    }
378
379    /// Gets HashMap of `xmlns:prefix=namespace` declared in this element's attributes.
380    ///
381    /// Default namespace has empty string as key.
382    pub fn namespace_decls<'a>(&self, doc: &'a Document) -> &'a HashMap<String, String> {
383        &self.data(doc).namespace_decls
384    }
385
386    pub fn mut_namespace_decls<'a>(
387        &self,
388        doc: &'a mut Document,
389    ) -> &'a mut HashMap<String, String> {
390        &mut self.mut_data(doc).namespace_decls
391    }
392
393    pub fn set_namespace_decl<S, T>(&self, doc: &mut Document, prefix: S, namespace: T)
394    where
395        S: Into<String>,
396        T: Into<String>,
397    {
398        self.mut_namespace_decls(doc)
399            .insert(prefix.into(), namespace.into());
400    }
401
402    /// Get namespace value given prefix, for this element.
403    /// "xml" and "xmlns" returns its default namespace.
404    ///
405    /// This method can return an empty namespace, but only for an empty prefix assuming
406    /// there is no default namespace declared.
407    pub fn namespace_for_prefix<'a>(&self, doc: &'a Document, prefix: &str) -> Option<&'a str> {
408        match prefix {
409            "xml" => return Some("http://www.w3.org/XML/1998/namespace"),
410            "xmlns" => return Some("http://www.w3.org/2000/xmlns/"),
411            _ => (),
412        };
413        let mut elem = *self;
414        loop {
415            let data = elem.data(doc);
416            if let Some(value) = data.namespace_decls.get(prefix) {
417                return Some(value);
418            }
419            if let Some(parent) = elem.parent(doc) {
420                elem = parent;
421            } else if prefix.is_empty() {
422                return Some("");
423            } else {
424                return None;
425            }
426        }
427    }
428
429    /// Returns `true` if this element is quantified by the given `namespace_url`. That is,
430    /// either its prefix resolves to this namespace, or this is the default
431    /// namespace in this context.
432    ///
433    /// See also the usage example in [Self::quantify_with_closest].
434    pub fn is_quantified(&self, doc: &Document, namespace_url: &str) -> bool {
435        self.namespace(doc) == Some(namespace_url)
436    }
437
438    /// Ensure that this element belongs to the specified namespace using the *closest* prefix
439    /// which corresponds to the given `namespace_url`.
440    ///
441    /// If the namespace is not declared for this element, returns `None`, otherwise returns
442    /// the new prefix. As such, `None` actually represents an error and must be consumed.
443    ///
444    /// See [Self::closest_prefix] for the definitions of which prefix will be used.
445    ///
446    /// ```rust
447    /// use biodivine_xml_doc::Document;
448    ///
449    /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
450    /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns2">
451    ///     <child xmlns:ns="http://ns2" />
452    /// </parent>
453    /// "#).unwrap();
454    ///
455    /// let root = doc.root_element().unwrap();
456    /// let child = root.child_elements(&doc)[0];
457    ///
458    /// // Everybody is already quantified with ns1, since it is the default namespace.
459    ///
460    /// assert!(child.is_quantified(&doc, "http://ns1"));
461    /// assert!(!root.is_quantified(&doc, "http://ns2"));
462    ///
463    /// assert_eq!(child.quantify_with_closest(&mut doc, "http://ns1"), Some("".to_string()));
464    /// assert_eq!(root.quantify_with_closest(&mut doc, "http://ns2"), Some("ns2".to_string()));
465    ///
466    /// assert!(child.is_quantified(&doc, "http://ns1"));
467    /// assert!(root.is_quantified(&doc, "http://ns2"));
468    /// ```
469    #[must_use]
470    pub fn quantify_with_closest(&self, doc: &mut Document, namespace_url: &str) -> Option<String> {
471        let prefix = self.closest_prefix(doc, namespace_url);
472        if let Some(prefix) = prefix {
473            let prefix = prefix.to_string();
474            self.set_prefix(doc, prefix.as_str());
475            Some(prefix)
476        } else {
477            None
478        }
479    }
480
481    pub(crate) fn build_text_content<'a>(&self, doc: &'a Document, buf: &'a mut String) {
482        for child in self.children(doc) {
483            child.build_text_content(doc, buf);
484        }
485    }
486
487    /// Concatenate all text content of this element, including its child elements `text_content()`.
488    ///
489    /// Implementation of [Node.textContent](https://developer.mozilla.org/en-US/docs/Web/API/Node/textContent)
490    pub fn text_content(&self, doc: &Document) -> String {
491        let mut buf = String::new();
492        self.build_text_content(doc, &mut buf);
493        buf
494    }
495
496    /// Clears all its children and inserts a [`Node::Text`] with given text.
497    pub fn set_text_content<S: Into<String>>(&self, doc: &mut Document, text: S) {
498        self.clear_children(doc);
499        let node = Node::Text(text.into());
500        self.push_child(doc, node).unwrap();
501    }
502}
503
504/// Below are methods related to finding nodes in tree.
505impl Element {
506    pub fn parent(&self, doc: &Document) -> Option<Element> {
507        self.data(doc).parent
508    }
509
510    /// `self.parent(doc).is_some()`
511    pub fn has_parent(&self, doc: &Document) -> bool {
512        self.parent(doc).is_some()
513    }
514
515    /// Get child [`Node`]s of this element.
516    pub fn children<'a>(&self, doc: &'a Document) -> &'a Vec<Node> {
517        &self.data(doc).children
518    }
519
520    fn _children_recursive<'a>(&self, doc: &'a Document, nodes: &mut Vec<&'a Node>) {
521        for node in self.children(doc) {
522            nodes.push(node);
523            if let Node::Element(elem) = &node {
524                elem._children_recursive(doc, nodes);
525            }
526        }
527    }
528
529    /// Get all child nodes recursively. (i.e. includes its children's children.)
530    pub fn children_recursive<'a>(&self, doc: &'a Document) -> Vec<&'a Node> {
531        let mut nodes = Vec::new();
532        self._children_recursive(doc, &mut nodes);
533        nodes
534    }
535
536    /// `!self.children(doc).is_empty()`
537    pub fn has_children(&self, doc: &Document) -> bool {
538        !self.children(doc).is_empty()
539    }
540
541    /// Get only child [`Element`]s of this element.
542    ///
543    /// This calls `.children().iter().filter_map().collect()`.
544    /// Use [`Element::children()`] if performance is important.
545    pub fn child_elements(&self, doc: &Document) -> Vec<Element> {
546        self.children(doc)
547            .iter()
548            .filter_map(|node| {
549                if let Node::Element(elemid) = node {
550                    Some(*elemid)
551                } else {
552                    None
553                }
554            })
555            .collect()
556    }
557
558    /// Get child [`Element`]s recursively. (i.e. includes its child element's child elements)
559    pub fn child_elements_recursive(&self, doc: &Document) -> Vec<Element> {
560        self.children_recursive(doc)
561            .iter()
562            .filter_map(|node| {
563                if let Node::Element(elemid) = node {
564                    Some(*elemid)
565                } else {
566                    None
567                }
568            })
569            .collect()
570    }
571
572    /// Find first direct child element with name `name`.
573    pub fn find(&self, doc: &Document, name: &str) -> Option<Element> {
574        self.children(doc)
575            .iter()
576            .filter_map(|n| n.as_element())
577            .find(|e| e.name(doc) == name)
578    }
579
580    /// Find all direct child elements with name `name`.
581    pub fn find_all(&self, doc: &Document, name: &str) -> Vec<Element> {
582        self.children(doc)
583            .iter()
584            .filter_map(|n| n.as_element())
585            .filter(|e| e.name(doc) == name)
586            .collect()
587    }
588
589    /// A helper method that identifies child based on namespace if the namespace is
590    /// declared directly on this child.
591    fn has_self_declared_namespace(
592        &self,
593        doc: &Document,
594        prefix: &str,
595        namespace_url: &str,
596    ) -> bool {
597        let self_namespaces = self.namespace_decls(doc);
598        if let Some(namespace) = self_namespaces.get(prefix) {
599            namespace_url == namespace.as_str()
600        } else {
601            false
602        }
603    }
604
605    /// Find the first direct child element with the given tag `name` belonging to the
606    /// specified namespace (identified by a `namespace_url`).
607    ///
608    /// ```rust
609    /// use biodivine_xml_doc::Document;
610    ///
611    /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
612    /// <parent xmlns:ns1="http://ns1" xmlns:ns2="http://ns2">
613    ///     <ns2:child id="1"/>
614    ///     <ns1:child id="2"/>
615    /// </parent>
616    /// "#).unwrap();
617    ///
618    /// let root = doc.root_element().unwrap();
619    /// let child = root.find_quantified(&doc, "child", "http://ns1").unwrap();
620    /// assert_eq!(child.attribute(&doc, "id"), Some("2"));
621    /// ```
622    pub fn find_quantified(
623        &self,
624        doc: &Document,
625        name: &str,
626        namespace_url: &str,
627    ) -> Option<Element> {
628        let admissible_prefix = self.collect_namespace_prefixes(doc, namespace_url);
629        for child in self.child_elements(doc) {
630            let (child_prefix, child_name) = child.prefix_name(doc);
631            if name != child_name {
632                continue;
633            }
634            if admissible_prefix.contains(child_prefix) {
635                return Some(child);
636            }
637            if child.has_self_declared_namespace(doc, child_prefix, namespace_url) {
638                return Some(child);
639            }
640        }
641        None
642    }
643
644    /// Find *all* the direct child elements with the given tag `name` belonging to the
645    /// specified namespace (identified by a `namespace_url`).
646    ///
647    /// ```rust
648    /// use biodivine_xml_doc::Document;
649    ///
650    /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
651    /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns2">
652    ///     <ns2:child id="1" />
653    ///     <child id="2" />
654    ///     <ns1:child id="3" />
655    /// </parent>
656    /// "#).unwrap();
657    ///
658    /// let root = doc.root_element().unwrap();
659    /// let children = root.find_all_quantified(&doc, "child", "http://ns1");
660    /// assert_eq!(children.len(), 2);
661    /// assert_eq!(children[0].attribute(&doc, "id"), Some("2"));
662    /// assert_eq!(children[1].attribute(&doc, "id"), Some("3"));
663    /// ```
664    pub fn find_all_quantified(
665        &self,
666        doc: &Document,
667        name: &str,
668        namespace_url: &str,
669    ) -> Vec<Element> {
670        let mut result = Vec::new();
671        let admissible_prefix = self.collect_namespace_prefixes(doc, namespace_url);
672        for child in self.child_elements(doc) {
673            let (child_prefix, child_name) = child.prefix_name(doc);
674            if name != child_name {
675                continue;
676            }
677            if admissible_prefix.contains(child_prefix) {
678                result.push(child);
679            }
680            if child.has_self_declared_namespace(doc, child_prefix, namespace_url) {
681                result.push(child);
682            }
683        }
684        result
685    }
686
687    /// Compute all namespace prefixes that are valid for the given `namespace_url` in the context
688    /// of *this* XML element.
689    ///
690    /// The default prefix is represented as an empty string slice.
691    ///
692    /// ```rust
693    /// use biodivine_xml_doc::Document;
694    ///
695    /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
696    /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns1">
697    ///     <child xmlns:ns2="http://ns2" />
698    /// </parent>
699    /// "#).unwrap();
700    ///
701    /// let root = doc.root_element().unwrap();
702    /// let child = root.child_elements(&doc)[0];
703    /// // Three prefixes: `default`, `ns1`, and `ns2`
704    /// assert_eq!(root.collect_namespace_prefixes(&doc, "http://ns1").len(), 3);
705    /// // Only two prefixes. `ns2` is overridden.
706    /// assert_eq!(child.collect_namespace_prefixes(&doc, "http://ns1").len(), 2);
707    /// ```
708    pub fn collect_namespace_prefixes<'a>(
709        &self,
710        doc: &'a Document,
711        namespace_url: &str,
712    ) -> HashSet<&'a str> {
713        /// The idea is that we first go all the way to the root element,
714        /// and then as we are returning from the recursion, we are adding prefix "candidates".
715        /// However, at the same time, we are removing candidates which are overwritten
716        /// by another prefix lower on the path.
717        fn recursion<'a>(
718            document: &'a Document,
719            valid_prefixes: &mut HashSet<&'a str>,
720            element: &Element,
721            namespace_url: &str,
722        ) {
723            if let Some(parent) = element.parent(document) {
724                recursion(document, valid_prefixes, &parent, namespace_url);
725            }
726            // At this point, `valid_prefixes` contains all prefixes that are declared in
727            // some of our parents for the requested URL. As such, we can go through the
728            // declarations in this tag and add new prefix if it is valid, or remove prefix
729            // if it is overwritten by a different url.
730            for (prefix, namespace) in element.namespace_decls(document) {
731                if namespace.as_str() == namespace_url {
732                    valid_prefixes.insert(prefix);
733                } else if valid_prefixes.contains(prefix.as_str()) {
734                    valid_prefixes.remove(prefix.as_str());
735                }
736            }
737        }
738
739        let mut result = HashSet::new();
740        if namespace_url.is_empty() {
741            // "no namespace" has by default an empty prefix, but this can be removed
742            // if a different namespace is found along the way.
743            result.insert("");
744        }
745        recursion(doc, &mut result, self, namespace_url);
746        result
747    }
748
749    /// Collect namespace declarations which apply to this XML `Element`.
750    ///
751    /// The result contains the empty prefix only if it is declared with a non-empty namespace url.
752    ///
753    /// ```rust
754    /// use std::collections::HashMap;
755    /// use biodivine_xml_doc::Document;
756    ///
757    /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
758    /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns1">
759    ///     <child xmlns:ns2="http://ns2">
760    ///         <ns1:child/>
761    ///         <ns2:child/>
762    ///     </child>
763    /// </parent>
764    /// "#).unwrap();
765    ///
766    /// let root = doc.root_element().unwrap();
767    /// let child = root.child_elements(&doc)[0];
768    /// let declarations = child.collect_applicable_namespace_decls(&doc);
769    /// // The result should contain "" and "ns1". "ns2" is-redeclared on child, so is not needed.
770    /// let expected = HashMap::from([
771    ///     ("ns2".to_string(), "http://ns2".to_string()),
772    ///     ("ns1".to_string(), "http://ns1".to_string()),
773    ///     ("".to_string(), "http://ns1".to_string())
774    /// ]);
775    /// assert_eq!(declarations.len(), 3);
776    /// assert_eq!(declarations, expected);
777    /// ```
778    pub fn collect_applicable_namespace_decls(&self, doc: &Document) -> HashMap<String, String> {
779        let mut e = *self;
780        let mut result = e.namespace_decls(doc).clone();
781        while let Some(parent) = e.parent(doc) {
782            e = parent;
783            for (prefix, url) in e.namespace_decls(doc) {
784                if !result.contains_key(prefix) {
785                    result.insert(prefix.clone(), url.clone());
786                }
787            }
788        }
789        result
790    }
791
792    /// Collect "parent" namespace declarations which apply to the XML sub-tree of this `Element`.
793    ///
794    /// "Parent" declarations are those which appear on one of the parent tags of `Element`,
795    /// not in the `Element` sub-tree. Each namespace prefix resolves to a specific URL based
796    /// on standard XML namespace shadowing rules.
797    ///
798    /// Note that the method can return a combination of an empty prefix and an empty url
799    /// when the sub-tree contains elements with no prefix and there is no default namespace url
800    /// declared by the parents.
801    ///
802    /// ```rust
803    /// use std::collections::HashMap;
804    /// use biodivine_xml_doc::Document;
805    ///
806    /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
807    /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns1">
808    ///     <child xmlns:ns2="http://ns2">
809    ///         <ns1:child/>
810    ///         <ns2:child/>
811    ///     </child>
812    /// </parent>
813    /// "#).unwrap();
814    ///
815    /// let root = doc.root_element().unwrap();
816    /// let child = root.child_elements(&doc)[0];
817    /// let declarations = child.collect_external_namespace_decls(&doc);
818    /// // The result should contain "" and "ns1". "ns2" is-redeclared on child, so is not needed.
819    /// let expected = HashMap::from([
820    ///     ("".to_string(), "http://ns1".to_string()),
821    ///     ("ns1".to_string(), "http://ns1".to_string())
822    /// ]);
823    /// assert_eq!(declarations.len(), 2);
824    /// assert_eq!(declarations, expected);
825    /// ```
826    pub fn collect_external_namespace_decls(&self, doc: &Document) -> HashMap<String, String> {
827        /// Collect all prefixes within the element subtree that are not declared
828        /// within the sub-tree itself.
829        fn collect_prefixes<'a>(
830            e: &Element,
831            doc: &'a Document,
832            known_prefixes: &HashSet<&'a str>,
833            unknown_prefixes: &mut HashSet<&'a str>,
834        ) {
835            let my_declarations = e.namespace_decls(doc);
836            if my_declarations.is_empty() {
837                // This element has no namespace declarations, hence we just check it and continue
838                // recursively to the child elements.
839                let my_prefix = e.prefix(doc);
840                if !known_prefixes.contains(my_prefix) {
841                    unknown_prefixes.insert(my_prefix);
842                }
843                for child in e.child_elements(doc) {
844                    collect_prefixes(&child, doc, known_prefixes, unknown_prefixes);
845                }
846            } else {
847                // This element actually has declarations, so we need to copy the existing prefix
848                // map and update it with new values.
849                let mut my_known_prefixes = known_prefixes.clone();
850                for prefix in my_declarations.keys() {
851                    my_known_prefixes.insert(prefix.as_str());
852                }
853                let my_prefix = e.prefix(doc);
854                if !known_prefixes.contains(my_prefix) {
855                    unknown_prefixes.insert(my_prefix);
856                }
857                for child in e.child_elements(doc) {
858                    collect_prefixes(&child, doc, &my_known_prefixes, unknown_prefixes);
859                }
860            }
861        }
862
863        let known = HashSet::new();
864        let mut unknown = HashSet::new();
865        collect_prefixes(self, doc, &known, &mut unknown);
866
867        unknown
868            .into_iter()
869            .map(|prefix| {
870                let Some(namespace) = self.namespace_for_prefix(doc, prefix) else {
871                    panic!("Invalid XML document. Prefix `{}` not declared.", prefix);
872                };
873                (prefix.to_string(), namespace.to_string())
874            })
875            .collect::<HashMap<_, _>>()
876    }
877
878    /// Find the "closest" namespace prefix which is associated with the given `namespace_url`.
879    ///
880    /// If the namespace is declared on the element itself, then its prefix is returned.
881    /// Otherwise, the closest parent with the declared namespace is found and this prefix
882    /// is returned. If the namespace is not declared for this element, `None` is returned.
883    ///
884    /// If the "closest" element has multiple declarations of the namespace in question,
885    /// the lexicographically first prefix is return (i.e. compared through standard
886    /// string ordering).
887    ///
888    /// You can use empty namespace url to signify "no namespace", in which case the method
889    /// can only return an empty prefix, but it can also return `None` if there is a default
890    /// namespace which prevents you from having "no namespace" on this element.
891    ///
892    /// ```rust
893    /// use biodivine_xml_doc::Document;
894    ///
895    /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
896    /// <parent xmlns="http://ns1" xmlns:ns1="http://ns1" xmlns:ns2="http://ns2">
897    ///     <child xmlns:ns="http://ns2" />
898    /// </parent>
899    /// "#).unwrap();
900    ///
901    /// let root = doc.root_element().unwrap();
902    /// let child = root.child_elements(&doc)[0];
903    /// assert_eq!(root.closest_prefix(&doc, "http://ns1"), Some(""));
904    /// assert_eq!(root.closest_prefix(&doc, "http://ns2"), Some("ns2"));
905    /// assert_eq!(child.closest_prefix(&doc, "http://ns1"), Some(""));
906    /// assert_eq!(child.closest_prefix(&doc, "http://ns2"), Some("ns"));
907    /// ```
908    ///
909    pub fn closest_prefix<'a>(&self, doc: &'a Document, namespace_url: &str) -> Option<&'a str> {
910        let mut search = *self;
911        loop {
912            let mut candidate: Option<&str> = None;
913            for (prefix, url) in search.namespace_decls(doc) {
914                if url == namespace_url {
915                    if let Some(current) = candidate {
916                        if prefix.as_str() < current {
917                            candidate = Some(prefix);
918                        }
919                    } else {
920                        candidate = Some(prefix);
921                    }
922                }
923            }
924            if candidate.is_some() {
925                return candidate;
926            }
927            if let Some(parent) = search.parent(doc) {
928                search = parent;
929            } else if namespace_url.is_empty() {
930                return Some("");
931            } else {
932                return None;
933            }
934        }
935    }
936}
937
938/// Below are functions that modify its tree-structure.
939///
940/// Because an element has reference to both its parent and its children,
941/// an element's parent and children is not directly exposed for modification.
942/// But in return, it is not possible for a document to be in an inconsistant state,
943/// where an element's parent doesn't have the element as its children.
944impl Element {
945    /// Equivalent to `vec.push()`.
946    /// # Errors
947    /// - [`Error::HasAParent`]: When you want to replace an element's parent with another,
948    /// call `element.detatch()` to make it parentless first.
949    /// This is to make it explicit that you are changing an element's parent, not adding another.
950    /// - [`Error::ContainerCannotMove`]: The container element's parent must always be None.
951    pub fn push_child(&self, doc: &mut Document, node: Node) -> Result<()> {
952        if let Node::Element(elem) = node {
953            if elem.is_container() {
954                return Err(Error::ContainerCannotMove);
955            }
956            let data = elem.mut_data(doc);
957            if data.parent.is_some() {
958                return Err(Error::HasAParent);
959            }
960            data.parent = Some(*self);
961        }
962        self.mut_data(doc).children.push(node);
963        Ok(())
964    }
965
966    /// Equivalent to `parent.push_child()`.
967    ///
968    /// # Errors
969    /// - [`Error::HasAParent`]: When you want to replace an element's parent with another,
970    /// call `element.detatch()` to make it parentless first.
971    /// This is to make it explicit that you are changing an element's parent, not adding another.
972    /// - [`Error::ContainerCannotMove`]: The container element's parent must always be None.
973    pub fn push_to(&self, doc: &mut Document, parent: Element) -> Result<()> {
974        parent.push_child(doc, self.as_node())
975    }
976
977    /// Equivalent to `vec.insert()`.
978    ///
979    /// # Panics
980    ///
981    /// Panics if `index > self.children().len()`
982    ///
983    /// # Errors
984    /// - [`Error::HasAParent`]: When you want to replace an element's parent with another,
985    /// call `element.detatch()` to make it parentless first.
986    /// This is to make it explicit that you are changing an element's parent, not adding another.
987    /// - [`Error::ContainerCannotMove`]: The container element's parent must always be None.
988    pub fn insert_child(&self, doc: &mut Document, index: usize, node: Node) -> Result<()> {
989        if let Node::Element(elem) = node {
990            if elem.is_container() {
991                return Err(Error::ContainerCannotMove);
992            }
993            let data = elem.mut_data(doc);
994            if data.parent.is_some() {
995                return Err(Error::HasAParent);
996            }
997            data.parent = Some(*self);
998        }
999        self.mut_data(doc).children.insert(index, node);
1000        Ok(())
1001    }
1002
1003    /// Equivalent to `vec.remove()`.
1004    ///
1005    /// # Panics
1006    ///
1007    /// Panics if `index >= self.children().len()`.
1008    pub fn remove_child(&self, doc: &mut Document, index: usize) -> Node {
1009        let node = self.mut_data(doc).children.remove(index);
1010        if let Node::Element(elem) = node {
1011            elem.mut_data(doc).parent = None;
1012        }
1013        node
1014    }
1015
1016    /// Equivalent to `vec.pop()`.
1017    pub fn pop_child(&self, doc: &mut Document) -> Option<Node> {
1018        let child = self.mut_data(doc).children.pop();
1019        if let Some(Node::Element(elem)) = &child {
1020            elem.mut_data(doc).parent = None;
1021        }
1022        child
1023    }
1024
1025    /// Remove all children and return them.
1026    pub fn clear_children(&self, doc: &mut Document) -> Vec<Node> {
1027        let count = self.children(doc).len();
1028        let mut removed = Vec::with_capacity(count);
1029        for _ in 0..count {
1030            let child = self.remove_child(doc, 0);
1031            removed.push(child);
1032        }
1033        removed
1034    }
1035
1036    /// Removes itself from its parent. Note that you can't attach this element to other documents.
1037    ///
1038    /// # Errors
1039    ///
1040    /// - [`Error::ContainerCannotMove`]: You can't detatch container element
1041    pub fn detatch(&self, doc: &mut Document) -> Result<()> {
1042        if self.is_container() {
1043            return Err(Error::ContainerCannotMove);
1044        }
1045        let data = self.mut_data(doc);
1046        if let Some(parent) = data.parent {
1047            let pos = parent
1048                .children(doc)
1049                .iter()
1050                .position(|n| n.as_element() == Some(*self))
1051                .unwrap();
1052            parent.remove_child(doc, pos);
1053        }
1054        Ok(())
1055    }
1056}
1057
1058#[cfg(test)]
1059mod tests {
1060    use super::{Document, Element, Node};
1061
1062    #[test]
1063    fn test_children() {
1064        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1065        <outer>
1066            inside outer
1067            <middle>
1068                <inner>
1069                    inside
1070                </inner>
1071                after inside
1072            </middle>
1073            <after>
1074                inside after
1075            </after>
1076        </outer>
1077        "#;
1078        let doc = Document::parse_str(xml).unwrap();
1079        let outer = doc.container().child_elements(&doc)[0];
1080        let middle = outer.child_elements(&doc)[0];
1081        let inner = middle.child_elements(&doc)[0];
1082        let after = outer.child_elements(&doc)[1];
1083        assert_eq!(doc.container().child_elements(&doc).len(), 1);
1084        assert_eq!(outer.name(&doc), "outer");
1085        assert_eq!(middle.name(&doc), "middle");
1086        assert_eq!(inner.name(&doc), "inner");
1087        assert_eq!(after.name(&doc), "after");
1088        assert_eq!(outer.children(&doc).len(), 3);
1089        assert_eq!(outer.child_elements(&doc).len(), 2);
1090        assert_eq!(doc.container().children_recursive(&doc).len(), 8);
1091        assert_eq!(
1092            doc.container().child_elements_recursive(&doc),
1093            vec![outer, middle, inner, after]
1094        );
1095    }
1096
1097    #[test]
1098    fn test_namespace() {
1099        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1100        <root xmlns="ns" xmlns:p="pns">
1101            <p:foo xmlns="inner">
1102                Hello
1103            </p:foo>
1104            <p:bar xmlns:p="in2">
1105                <c />
1106                World!
1107            </p:bar>
1108        </root>"#;
1109        let doc = Document::parse_str(xml).unwrap();
1110        let container = doc.container().children(&doc)[0].as_element().unwrap();
1111        let child_elements = container.child_elements(&doc);
1112        let foo = *child_elements.get(0).unwrap();
1113        let bar = *child_elements.get(1).unwrap();
1114        let c = bar.child_elements(&doc)[0];
1115        assert_eq!(c.prefix_name(&doc), ("", "c"));
1116        assert_eq!(bar.full_name(&doc), "p:bar");
1117        assert_eq!(bar.prefix(&doc), "p");
1118        assert_eq!(bar.name(&doc), "bar");
1119        assert_eq!(c.namespace(&doc).unwrap(), "ns");
1120        assert_eq!(c.namespace_for_prefix(&doc, "p").unwrap(), "in2");
1121        assert!(c.namespace_for_prefix(&doc, "random").is_none());
1122        assert_eq!(bar.namespace(&doc).unwrap(), "in2");
1123        assert_eq!(bar.namespace_for_prefix(&doc, "").unwrap(), "ns");
1124        assert_eq!(foo.namespace(&doc).unwrap(), "pns");
1125        assert_eq!(foo.namespace_for_prefix(&doc, "").unwrap(), "inner");
1126        assert_eq!(foo.namespace_for_prefix(&doc, "p").unwrap(), "pns");
1127        assert_eq!(container.namespace(&doc).unwrap(), "ns");
1128    }
1129
1130    #[test]
1131    fn test_find_text_content() {
1132        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1133        <core>
1134            <p>Text</p>
1135            <b>Text2</b>
1136        </core>
1137        "#;
1138        let doc = Document::parse_str(xml).unwrap();
1139        assert_eq!(
1140            doc.root_element()
1141                .unwrap()
1142                .find(&doc, "p")
1143                .unwrap()
1144                .text_content(&doc),
1145            "Text"
1146        );
1147        assert_eq!(
1148            doc.root_element()
1149                .unwrap()
1150                .find(&doc, "b")
1151                .unwrap()
1152                .text_content(&doc),
1153            "Text2"
1154        );
1155        assert_eq!(doc.root_element().unwrap().text_content(&doc), "TextText2")
1156    }
1157
1158    #[test]
1159    fn test_mutate_tree() {
1160        // Test tree consistency after mutating tree
1161        let mut doc = Document::new();
1162        let container = doc.container();
1163        assert_eq!(container.parent(&doc), None);
1164        assert_eq!(container.children(&doc).len(), 0);
1165
1166        // Element::build.push_to
1167        let root = Element::build("root").push_to(&mut doc, container);
1168        assert_eq!(root.parent(&doc).unwrap(), container);
1169        assert_eq!(doc.root_element().unwrap(), root);
1170
1171        // Element::new
1172        let a = Element::new(&mut doc, "a");
1173        assert_eq!(a.parent(&doc), None);
1174
1175        // Element.push_child
1176        root.push_child(&mut doc, Node::Element(a)).unwrap();
1177        assert_eq!(root.children(&doc)[0].as_element().unwrap(), a);
1178        assert_eq!(a.parent(&doc).unwrap(), root);
1179
1180        // Element.pop
1181        let popped = root.pop_child(&mut doc).unwrap().as_element().unwrap();
1182        assert_eq!(popped, a);
1183        assert_eq!(root.children(&doc).len(), 0);
1184        assert_eq!(a.parent(&doc), None);
1185
1186        // Element.push_to
1187        let a = Element::new(&mut doc, "a");
1188        a.push_to(&mut doc, root).unwrap();
1189        assert_eq!(root.children(&doc)[0].as_element().unwrap(), a);
1190        assert_eq!(a.parent(&doc).unwrap(), root);
1191
1192        // Element.remove_child
1193        root.remove_child(&mut doc, 0);
1194        assert_eq!(root.children(&doc).len(), 0);
1195        assert_eq!(a.parent(&doc), None);
1196
1197        // Element.insert_child
1198        let a = Element::new(&mut doc, "a");
1199        root.insert_child(&mut doc, 0, Node::Element(a)).unwrap();
1200        assert_eq!(root.children(&doc)[0].as_element().unwrap(), a);
1201        assert_eq!(a.parent(&doc).unwrap(), root);
1202
1203        // Element.detatch
1204        a.detatch(&mut doc).unwrap();
1205        assert_eq!(root.children(&doc).len(), 0);
1206        assert_eq!(a.parent(&doc), None);
1207    }
1208}