xot/
nameaccess.rs

1use ahash::{HashMap, HashSet};
2use genawaiter::rc::gen;
3use genawaiter::yield_;
4
5use crate::access::NodeEdge;
6use crate::error::Error;
7use crate::id::{Name, NameId, NamespaceId, PrefixId};
8use crate::output::FullnameSerializer;
9use crate::xmlvalue::Prefixes;
10use crate::xotdata::{Node, Xot};
11use crate::{xmlname, Value};
12
13/// ## Names, namespaces and prefixes.
14///
15/// Xot does not let you use names, prefixes and URIs directly. Instead you use
16/// the types [`NameId`], [`NamespaceId`] and [`PrefixId`] to refer to these.
17///
18/// This has some advantages:
19///
20/// * It's faster to compare and hash names, namespaces and prefixes.
21///
22/// * It takes less memory to store a tree.
23///
24/// * You get type-checks and can't mix up names, namespaces and prefixes.
25///
26/// Names, namespaces and prefixes are shared in a single Xot, so are the same
27/// in multiple trees. This makes it safe to copy and move nodes between trees.
28/// If you care about the readability of the serialized XML you do need to
29/// ensure that each tree uses `xmlns` attributes to declare the namespaces it
30/// uses; otherwise prefixes are generated during serialization.
31///
32/// The minor drawback is that you need to use multiple steps to create a name,
33/// prefix or namespace for use, or to access the string value of a name,
34/// prefix or namepace. This drawback may be an advantage at times, as typical
35/// code needs to use a single name, namespace or prefix multiple times, so
36/// assigning to a variable is more convenient than repeating strings.
37///
38/// There are also APIs that help with namespace access and manipulation in
39/// other sections: [`Xot::namespaces`], [`Xot::namespaces_mut`], and
40/// [`Xot::get_namespace`], [`Xot::set_namespace`] and
41/// [`Xot::remove_namespace`], and [`Xot::append_namespace_node`].
42impl Xot {
43    /// Look up name without a namespace.
44    ///
45    /// This is the immutable version of [`Xot::add_name`]; it returns
46    /// `None` if the name doesn't exist.
47    ///
48    /// ```rust
49    /// use xot::Xot;
50    ///
51    /// let mut xot = Xot::new();
52    /// assert!(xot.name("z").is_none());
53    ///
54    /// let name = xot.add_name("z");
55    /// assert_eq!(xot.name("z"), Some(name));
56    /// ```
57    pub fn name(&self, name: &str) -> Option<NameId> {
58        self.name_ns(name, self.no_namespace_id)
59    }
60
61    /// Add name without a namespace.
62    ///
63    /// If the name already exists, return its id, otherwise creates it.
64    ///
65    /// ```rust
66    /// use xot::Xot;
67    ///
68    /// let mut xot = Xot::new();
69    ///
70    /// let name = xot.add_name("a");
71    /// // the namespace is "" for no namespace
72    /// assert_eq!(xot.name_ns_str(name), ("a", ""));
73    ///
74    /// let root = xot.parse(r#"<doc/>"#)?;
75    /// let doc_el = xot.document_element(root).unwrap();
76    /// // add an element, using the name
77    /// let node = xot.append_element(doc_el, name)?;
78    ///
79    /// assert_eq!(xot.to_string(root)?, "<doc><a/></doc>");
80    ///
81    /// # Ok::<(), xot::Error>(())
82    /// ```
83    pub fn add_name(&mut self, name: &str) -> NameId {
84        self.add_name_ns(name, self.no_namespace_id)
85    }
86
87    /// Look up name with a namespace.
88    ///
89    /// ```rust
90    /// use xot::Xot;
91    ///
92    /// let mut xot = Xot::new();
93    ///
94    /// let ns = xot.add_namespace("http://example.com");
95    /// let name = xot.add_name_ns("a", ns);
96    /// assert_eq!(xot.name_ns_str(name), ("a", "http://example.com"));
97    ///
98    /// # Ok::<(), xot::Error>(())
99    /// ```
100    ///
101    /// Look up name of an element:
102    ///
103    /// ```rust
104    /// use xot::Xot;
105    ///
106    /// let mut xot = Xot::new();
107    /// let root = xot.parse(r#"<doc xmlns="http://example.com"><a/></doc>"#)?;
108    /// let doc_el = xot.document_element(root).unwrap();
109    ///
110    /// let doc_value = xot.element(doc_el).unwrap();
111    ///
112    /// // get the name of the element
113    /// let name = xot.name_ns_str(doc_value.name());
114    ///
115    /// # Ok::<(), xot::Error>(())
116    /// ```
117    pub fn name_ns(&self, name: &str, namespace_id: NamespaceId) -> Option<NameId> {
118        self.name_lookup.get_id(&Name::new(name, namespace_id))
119    }
120
121    /// Add name with a namespace.
122    ///
123    /// If the name already exists, return its id.
124    ///
125    /// ```rust
126    /// use xot::Xot;
127    ///
128    /// let mut xot = Xot::new();
129    ///
130    /// let ns = xot.add_namespace("http://example.com");
131    /// let name_a = xot.add_name_ns("a", ns);
132    ///
133    /// let root = xot.parse(r#"<doc xmlns="http://example.com"><a/></doc>"#)?;
134    /// let doc_el = xot.document_element(root).unwrap();
135    /// let a_el = xot.first_child(doc_el).unwrap();
136    ///
137    /// let doc_value = xot.element(doc_el).unwrap();
138    /// let a_value = xot.element(a_el).unwrap();
139    ///
140    /// // we know a is the right name, but doc is not
141    /// assert_eq!(a_value.name(), name_a);
142    /// assert_ne!(doc_value.name(), name_a);
143    ///
144    /// # Ok::<(), xot::Error>(())
145    /// ```
146    pub fn add_name_ns(&mut self, name: &str, namespace_id: NamespaceId) -> NameId {
147        self.name_lookup.get_id_mut(&Name::new(name, namespace_id))
148    }
149
150    /// Look up namespace.
151    ///
152    /// This is the immutable version of [`Xot::add_namespace`]; it returns
153    /// `None` if the namespace doesn't exist.
154    pub fn namespace(&self, namespace: &str) -> Option<NamespaceId> {
155        self.namespace_lookup.get_id(namespace)
156    }
157
158    /// Add namespace.
159    ///
160    /// If the namespace already exists, return its id.
161    pub fn add_namespace(&mut self, namespace: &str) -> NamespaceId {
162        self.namespace_lookup.get_id_mut(namespace)
163    }
164
165    /// Look up prefix.
166    ///
167    /// This is the immutable version of [`Xot::add_prefix`]; it returns
168    /// `None` if the prefix doesn't exist.
169    pub fn prefix(&self, prefix: &str) -> Option<PrefixId> {
170        self.prefix_lookup.get_id(prefix)
171    }
172
173    /// Add prefix.
174    ///
175    /// If the prefix already exists, return its id.
176    pub fn add_prefix(&mut self, prefix: &str) -> PrefixId {
177        self.prefix_lookup.get_id_mut(prefix)
178    }
179
180    /// No namespace
181    ///
182    /// Returns the namespace id used when an element or attribute
183    /// isn't in any namespace.
184    #[inline]
185    pub fn no_namespace(&self) -> NamespaceId {
186        self.no_namespace_id
187    }
188
189    /// Empty prefix
190    ///
191    /// Returns the prefix id used when an element or attribute
192    /// doesn't have a prefix.
193    #[inline]
194    pub fn empty_prefix(&self) -> PrefixId {
195        self.empty_prefix_id
196    }
197
198    /// XML prefix
199    ///
200    /// The prefix `xml` used for the XML namespace.
201    #[inline]
202    pub fn xml_prefix(&self) -> PrefixId {
203        self.xml_prefix_id
204    }
205
206    /// XML namespace
207    ///
208    /// Returns the namespace id used for the XML namespace.
209    ///
210    /// Also known as `http://wwww.w3.org/XML/1998/namespace`
211    #[inline]
212    pub fn xml_namespace(&self) -> NamespaceId {
213        self.xml_namespace_id
214    }
215
216    /// xml:space
217    ///
218    /// Returns the name id used for the `xml:space` attribute.
219    #[inline]
220    pub fn xml_space_name(&self) -> NameId {
221        self.xml_space_id
222    }
223
224    /// xml:id
225    //
226    /// Returns the name id used for the `xml:id` attribute.
227    #[inline]
228    pub fn xml_id_name(&self) -> NameId {
229        self.xml_id_id
230    }
231
232    /// Given a name id, and a context node (to provide namespace prefix
233    /// lookup), return a [`xmlname::RefName`]. If you import the trait
234    /// [`xmlname::NameStrInfo`] you can look up more information about the
235    /// name.
236    ///
237    /// ```rust
238    /// use xot::Xot;
239    /// use xot::xmlname::NameStrInfo;
240    ///
241    /// let mut xot = Xot::new();
242    /// let root = xot.parse(r#"<ex:doc xmlns:ex="http://example.com"><a/></ex:doc>"#)?;
243    /// let doc_el = xot.document_element(root).unwrap();
244    /// let a_el = xot.first_child(doc_el).unwrap();
245    ///
246    /// let doc_name = xot.name_ref(xot.node_name(doc_el).unwrap(), a_el)?;
247    ///
248    /// assert_eq!(doc_name.local_name(), "doc");
249    /// assert_eq!(doc_name.namespace(), "http://example.com");
250    /// assert_eq!(doc_name.prefix(), "ex");
251    /// assert_eq!(doc_name.full_name(), "ex:doc");
252    ///
253    /// let a_name = xot.name_ref(xot.node_name(a_el).unwrap(), a_el)?;
254    /// assert_eq!(a_name.local_name(), "a");
255    /// assert_eq!(a_name.namespace(), "");
256    /// assert_eq!(a_name.prefix(), "");
257    /// assert_eq!(a_name.full_name(), "a");
258    ///
259    /// # Ok::<(), xot::Error>(())
260    /// ```
261    pub fn name_ref(&self, name_id: NameId, context: Node) -> Result<xmlname::RefName, Error> {
262        xmlname::RefName::from_node(self, context, name_id)
263    }
264
265    ///
266    /// Look up localname, namespace uri for name id
267    ///
268    /// If this name id is not in a namespace, the namespace uri is the empty
269    /// string.
270    ///
271    /// No namespace:
272    ///
273    /// ```rust
274    /// use xot::Xot;
275    ///
276    /// let mut xot = Xot::new();
277    /// let root = xot.parse(r#"<doc><a/></doc>"#)?;
278    /// let doc_el = xot.document_element(root).unwrap();
279    /// let a_el = xot.first_child(doc_el).unwrap();
280    ///
281    /// let a_value = xot.element(a_el).unwrap();
282    ///
283    /// let (localname, namespace) = xot.name_ns_str(a_value.name());
284    /// assert_eq!(localname, "a");
285    /// assert_eq!(namespace, "");
286    /// # Ok::<(), xot::Error>(())
287    /// ```
288    ///
289    /// With namespace:
290    /// ```rust
291    /// use xot::Xot;
292    ///
293    /// let mut xot = Xot::new();
294    /// let root = xot.parse(r#"<doc xmlns="http://example.com"><a/></doc>"#)?;
295    /// let doc_el = xot.document_element(root).unwrap();
296    /// let a_el = xot.first_child(doc_el).unwrap();
297    ///
298    /// let a_value = xot.element(a_el).unwrap();
299    ///
300    /// let (localname, namespace) = xot.name_ns_str(a_value.name());
301    /// assert_eq!(localname, "a");
302    /// assert_eq!(namespace, "http://example.com");
303    /// # Ok::<(), xot::Error>(())
304    /// ```
305    #[inline]
306    pub fn name_ns_str(&self, name: NameId) -> (&str, &str) {
307        let name = self.name_lookup.get_value(name);
308        let namespace = self.namespace_lookup.get_value(name.namespace_id);
309        (name.name.as_ref(), namespace)
310    }
311
312    /// Get the localname of a name.
313    #[inline]
314    pub fn local_name_str(&self, name: NameId) -> &str {
315        let name = self.name_lookup.get_value(name);
316        name.name.as_ref()
317    }
318
319    /// Get the namespace URI of a name
320    #[inline]
321    pub fn uri_str(&self, name: NameId) -> &str {
322        let name = self.name_lookup.get_value(name);
323        self.namespace_str(name.namespace_id)
324    }
325
326    /// Look up namespace uri for namespace id
327    ///
328    /// An empty string slice indicates the no namespace.
329    #[inline]
330    pub fn namespace_str(&self, namespace: NamespaceId) -> &str {
331        let namespace = self.namespace_lookup.get_value(namespace);
332        namespace
333    }
334
335    /// Look up string slice for prefix id
336    ///
337    /// If the prefix id is the empty prefix, the string slice is the empty string.
338    #[inline]
339    pub fn prefix_str(&self, prefix: PrefixId) -> &str {
340        let prefix = self.prefix_lookup.get_value(prefix);
341        prefix
342    }
343
344    /// Get the Namespace for a Name
345    ///
346    /// ```rust
347    /// use xot::Xot;
348    ///
349    /// let mut xot = Xot::new();
350    /// let ns = xot.add_namespace("http://example.com");
351    /// let name = xot.add_name_ns("a", ns);
352    ///
353    /// assert_eq!(xot.namespace_for_name(name), ns);
354    /// # Ok::<(), xot::Error>(())
355    /// ```
356    #[inline]
357    pub fn namespace_for_name(&self, name: NameId) -> NamespaceId {
358        self.name_lookup.get_value(name).namespace_id
359    }
360
361    /// Full name.
362    ///
363    /// Given a context node, determine the full name string of the given name.
364    ///
365    /// If the name doesn't have a namespace, that's identical to the localname.
366    /// If the name is in a namespace, a prefix is looked up. If no prefix
367    /// exists, that's an error.
368    ///
369    /// ```rust
370    /// use xot::Xot;
371    ///
372    /// // prefixed
373    /// let mut xot = Xot::new();
374    /// let doc = xot.parse(r#"<foo:doc xmlns:foo="http://example.com"/>"#)?;
375    /// let doc_el = xot.document_element(doc).unwrap();
376    /// let name = xot.node_name(doc_el).unwrap();
377    ///
378    /// let full_name = xot.full_name(doc_el, name)?;
379    /// let full_name = xot.full_name(doc_el, name)?;
380    /// assert_eq!(full_name, "foo:doc");
381    ///
382    /// // default namespace
383    /// let doc = xot.parse(r#"<doc xmlns="http://example.com"/>"#)?;
384    /// let doc_el = xot.document_element(doc).unwrap();
385    /// let name = xot.node_name(doc_el).unwrap();
386    /// let full_name = xot.full_name(doc_el, name)?;
387    /// assert_eq!(full_name, "doc");
388    ///
389    /// // no namespace
390    /// let doc = xot.parse(r#"<doc/>"#)?;
391    /// let doc_el = xot.document_element(doc).unwrap();
392    /// let name = xot.node_name(doc_el).unwrap();
393    /// let full_name = xot.full_name(doc_el, name)?;
394    /// assert_eq!(full_name, "doc");
395    ///
396    /// # Ok::<(), xot::Error>(())
397    /// ```
398    pub fn full_name(&self, node: Node, name: NameId) -> Result<String, Error> {
399        let namespace = self.namespace_for_name(name);
400        let local_name = self.local_name_str(name);
401        if namespace == self.no_namespace() {
402            return Ok(local_name.to_string());
403        }
404        // look up the prefix for the namespace
405        if let Some(prefix) = self.prefix_for_namespace(node, namespace) {
406            let prefix = self.prefix_str(prefix);
407            if !prefix.is_empty() {
408                Ok(format!("{}:{}", prefix, local_name))
409            } else {
410                Ok(local_name.to_string())
411            }
412        } else {
413            Err(Error::MissingPrefix(
414                self.namespace_str(namespace).to_string(),
415            ))
416        }
417    }
418
419    /// Given a node, give back the name id of this node.
420    ///
421    /// For elements and attribute that is their name, for processing
422    /// instructions this is a name based on the target attribute.
423    ///
424    /// For anything else, it's `None`.
425    pub fn node_name(&self, node: Node) -> Option<NameId> {
426        match self.value(node) {
427            Value::Element(element) => Some(element.name()),
428            Value::Text(..) => None,
429            Value::ProcessingInstruction(pi) => Some(pi.target()),
430            Value::Comment(..) => None,
431            Value::Document => None,
432            Value::Attribute(attribute) => Some(attribute.name()),
433            Value::Namespace(_) => None,
434        }
435    }
436
437    /// Given a node, give back the [`xmlname::RefName`] of this node.
438    ///
439    /// For elements and attribute that is their name, for processing
440    /// instructions this is a name based on the target attribute.
441    ///
442    /// For anything else, it's `None`.
443    ///
444    /// ```rust
445    /// use xot::Xot;
446    /// use xot::xmlname::NameStrInfo;
447    ///
448    /// let mut xot = Xot::new();
449    /// let root = xot.parse(r#"<ex:doc xmlns:ex="http://example.com" ex:b="B"><a/></ex:doc>"#)?;
450    /// let doc_el = xot.document_element(root).unwrap();
451    /// let a_el = xot.first_child(doc_el).unwrap();
452    ///
453    /// let doc_name = xot.node_name_ref(doc_el)?.unwrap();
454    /// assert_eq!(doc_name.local_name(), "doc");
455    /// assert_eq!(doc_name.namespace(), "http://example.com");
456    /// assert_eq!(doc_name.prefix(), "ex");
457    /// assert_eq!(doc_name.full_name(), "ex:doc");
458    ///
459    /// let a_name = xot.node_name_ref(a_el)?.unwrap();
460    /// assert_eq!(a_name.local_name(), "a");
461    /// assert_eq!(a_name.namespace(), "");
462    /// assert_eq!(a_name.prefix(), "");
463    /// assert_eq!(a_name.full_name(), "a");
464    ///
465    /// // it also works on attribute nodes
466    /// let b_attribute = xot.attributes(doc_el).nodes().next().unwrap();
467    /// let b_name = xot.node_name_ref(b_attribute)?.unwrap();
468    /// assert_eq!(b_name.local_name(), "b");
469    /// assert_eq!(b_name.namespace(), "http://example.com");
470    /// assert_eq!(b_name.prefix(), "ex");
471    /// assert_eq!(b_name.full_name(), "ex:b");
472    ///
473    /// # Ok::<(), xot::Error>(())
474    /// ```
475    pub fn node_name_ref(&self, node: Node) -> Result<Option<xmlname::RefName>, Error> {
476        if let Some(name) = self.node_name(node) {
477            Ok(Some(self.name_ref(name, node)?))
478        } else {
479            Ok(None)
480        }
481    }
482
483    /// Check whether a prefix is defined in node or its ancestors.
484    pub fn is_prefix_defined(&self, node: Node, prefix: PrefixId) -> bool {
485        for ancestor in self.ancestors(node) {
486            if self.namespaces(ancestor).contains_key(prefix) {
487                return true;
488            }
489        }
490        if self.base_prefixes().contains_key(&prefix) {
491            return true;
492        }
493        false
494    }
495
496    /// Find prefixes we inherit from ancestors and aren't defined locally
497    pub fn inherited_prefixes(&self, node: Node) -> Prefixes {
498        let prefixes = if let Some(node) = self.parent(node) {
499            self.prefixes_in_scope(node)
500        } else {
501            Prefixes::new()
502        };
503        // now filter these by namespaces actually required
504        let unresolved_namespaces = HashSet::from_iter(self.unresolved_namespaces(node));
505        prefixes
506            .into_iter()
507            .filter(|(_, ns)| unresolved_namespaces.contains(ns))
508            .collect::<Prefixes>()
509    }
510
511    /// Find prefix for a namespace in node or ancestors.
512    ///
513    /// Returns `None` if no prefix is defined for the namespace.
514    pub fn prefix_for_namespace(&self, node: Node, namespace: NamespaceId) -> Option<PrefixId> {
515        let mut seen = HashSet::default();
516
517        for ancestor in self.ancestors(node) {
518            for (key, value) in self.namespaces(ancestor).iter() {
519                if seen.contains(&key) {
520                    return None;
521                }
522                seen.insert(key);
523                if *value == namespace {
524                    return Some(key);
525                }
526            }
527        }
528        for (key, value) in self.base_prefixes() {
529            if seen.contains(&key) {
530                return None;
531            }
532            seen.insert(key);
533            if value == namespace {
534                return Some(key);
535            }
536        }
537        None
538    }
539
540    /// Find namespace for prefix in node or ancestors.
541    ///
542    /// Return `None` if no namespace is defined for the prefix.
543    pub fn namespace_for_prefix(&self, node: Node, prefix: PrefixId) -> Option<NamespaceId> {
544        for ancestor in self.ancestors(node) {
545            if let Some(namespace) = self.namespaces(ancestor).get(prefix) {
546                if *namespace == self.no_namespace() {
547                    return None;
548                }
549                return Some(*namespace);
550            }
551        }
552        for (key, value) in self.base_prefixes() {
553            if key == prefix {
554                return Some(value);
555            }
556        }
557        None
558    }
559
560    /// Creating missing prefixes.
561    ///
562    /// Due to creation or moving subtrees you can end up with XML elements or
563    /// attributes that have names in a namespace without a prefix to define
564    /// the namespace in its ancestors.
565    ///
566    /// This function creates the missing prefixes on the given node. The
567    /// prefixes are named "n0", "n1", "n2", etc.
568    ///
569    /// You can use this function just before serializing the tree to XML
570    /// using [`Xot::write`] or [`Xot::to_string`].
571    pub fn create_missing_prefixes(&mut self, node: Node) -> Result<(), Error> {
572        let node = if self.is_document(node) {
573            self.document_element(node).unwrap()
574        } else {
575            node
576        };
577        if !self.is_element(node) {
578            return Err(Error::NotElement(node));
579        };
580        let mut fullname_serializer = FullnameSerializer::new(self, vec![]);
581        let mut missing_namespace_ids = HashSet::default();
582        for edge in self.traverse(node) {
583            match edge {
584                NodeEdge::Start(node) => {
585                    let element = self.element(node);
586                    if let Some(element) = element {
587                        fullname_serializer.push(self.namespace_declarations(node));
588                        let element_fullname =
589                            fullname_serializer.element_fullname(element.name_id);
590                        if element_fullname.is_err() {
591                            let namespace_id = self.namespace_for_name(element.name_id);
592                            missing_namespace_ids.insert(namespace_id);
593                        }
594                        for name_id in self.attributes(node).keys() {
595                            let attribute_fullname =
596                                fullname_serializer.attribute_fullname(name_id);
597                            if attribute_fullname.is_err() {
598                                let namespace_id = self.namespace_for_name(name_id);
599                                missing_namespace_ids.insert(namespace_id);
600                            }
601                        }
602                    }
603                }
604                NodeEdge::End(node) => {
605                    if self.is_element(node) {
606                        fullname_serializer.pop(self.has_namespace_declarations(node));
607                    }
608                }
609            }
610        }
611        let mut prefixes_to_add = HashMap::default();
612        for (i, namespace_id) in missing_namespace_ids.iter().enumerate() {
613            let prefix = format!("n{}", i);
614            let prefix_id = self.add_prefix(&prefix);
615            prefixes_to_add.insert(prefix_id, namespace_id);
616        }
617        let mut namespaces = self.namespaces_mut(node);
618
619        for (prefix_id, namespace_id) in prefixes_to_add {
620            namespaces.insert(prefix_id, *namespace_id);
621        }
622        Ok(())
623    }
624
625    /// Deduplicate namespaces.
626    ///
627    /// Any namespace definition lower down that defines a prefix for a
628    /// namespace that is already known in an ancestor is removed.
629    ///
630    /// There is a special rule for attributes, as they can only be in a
631    /// namespace if they have an explicit prefix; the prefix is not removed if
632    /// it overlaps with a default namespace.
633    ///
634    /// With default namespaces:
635    ///
636    /// ```rust
637    /// use xot::Xot;
638    ///
639    /// let mut xot = Xot::new();
640    /// let root = xot.parse(r#"<doc xmlns="http://example.com"><a xmlns="http://example.com"/></doc>"#)?;
641    /// xot.deduplicate_namespaces(root);
642    ///
643    /// assert_eq!(xot.to_string(root)?, r#"<doc xmlns="http://example.com"><a/></doc>"#);
644    /// # Ok::<(), xot::Error>(())
645    /// ```
646    ///
647    /// With explicit prefixes:
648    ///
649    /// ```rust
650    /// use xot::Xot;
651    ///
652    /// let mut xot = Xot::new();
653    /// let root = xot.parse(r#"<ns:doc xmlns:ns="http://example.com"><ns:a xmlns:ns="http://example.com"/></ns:doc>"#)?;
654    ///
655    /// xot.deduplicate_namespaces(root);
656    ///
657    /// assert_eq!(xot.to_string(root)?, r#"<ns:doc xmlns:ns="http://example.com"><ns:a/></ns:doc>"#);
658    /// # Ok::<(), xot::Error>(())
659    /// ```
660    ///
661    /// This also works if you use different prefixes for the same namespace
662    /// URI:
663    ///
664    /// ```rust
665    /// use xot::Xot;
666    ///
667    /// let mut xot = Xot::new();
668    /// let root = xot.parse(r#"<ns:doc xmlns:ns="http://example.com"><other:a xmlns:other="http://example.com"/></ns:doc>"#)?;
669    ///
670    /// xot.deduplicate_namespaces(root);
671    ///
672    /// assert_eq!(xot.to_string(root)?, r#"<ns:doc xmlns:ns="http://example.com"><ns:a/></ns:doc>"#);
673    /// # Ok::<(), xot::Error>(())
674    /// ```
675    pub fn deduplicate_namespaces(&mut self, node: Node) {
676        let mut fullname_serializer = FullnameSerializer::new(self, vec![]);
677        let mut fixup_nodes = Vec::new();
678        let mut deduplicate_tracker = DeduplicateTracker::new();
679        // determine nodes we need to fix up
680        for edge in self.traverse(node) {
681            match edge {
682                NodeEdge::Start(node) => {
683                    if self.is_element(node) {
684                        // an attribute in a namespace *has* to have a non-empty
685                        // prefix. This means we cannot remove a prefix if that
686                        // prefix overlaps with a previously defined default
687                        // namespace: that's fine for elements which fall
688                        // in the default namespace, but not for attributes.
689                        // The tracker keeps track of all this.
690                        deduplicate_tracker.push(self, node);
691                        // we don't need to remove the fixed up prefixes because
692                        // as duplicates they will definitely exist.
693                        // In fact if we remove them first the push will fail to create
694                        // a new entry in the namespace stack, as prefixes can become empty
695                        fullname_serializer.push(self.namespace_declarations(node));
696                    }
697                }
698                NodeEdge::End(node) => {
699                    if self.is_element(node) {
700                        // to_prefix is only used to determine whether to pop
701                        // so should be okay to send here
702                        fullname_serializer.pop(self.has_namespace_declarations(node));
703                        deduplicate_tracker.pop();
704                        // if we already know a namespace, remove it
705                        // we do this at the end so the deduplicate tracker
706                        // has had a change to do its work for sub-elements
707                        let namespaces = self.namespaces(node);
708                        let to_remove = namespaces
709                            .iter()
710                            .filter_map(|(_, namespace_id)| {
711                                if fullname_serializer.is_namespace_known(*namespace_id)
712                                    && deduplicate_tracker.is_safe_to_remove(*namespace_id)
713                                {
714                                    Some(*namespace_id)
715                                } else {
716                                    None
717                                }
718                            })
719                            .collect::<Vec<_>>();
720                        if !to_remove.is_empty() {
721                            fixup_nodes.push((node, to_remove.clone()));
722                        }
723                    }
724                }
725            }
726        }
727        // now actually fix up the nodes, removing superfluous namespaces
728        // TODO: this whole thing is a bit a multi-step mess. Perhaps
729        // direct namespace node access would help.
730        let mut fixup_prefixes = Vec::new();
731        for (node, to_remove) in fixup_nodes {
732            let namespaces = self.namespaces(node);
733            for namespace_id in to_remove {
734                let prefixes_to_remove = namespaces
735                    .iter()
736                    .filter(|(_, ns)| **ns == namespace_id)
737                    .map(|(prefix, _)| prefix);
738                fixup_prefixes.push((node, prefixes_to_remove.collect::<Vec<_>>()));
739            }
740        }
741        for (node, prefix) in fixup_prefixes {
742            let mut namespaces = self.namespaces_mut(node);
743            for prefix in prefix {
744                namespaces.remove(prefix);
745            }
746        }
747    }
748
749    pub(crate) fn prefixes_in_scope(&self, node: Node) -> Prefixes {
750        self.namespaces_in_scope(node).collect()
751    }
752
753    /// Get namespaces without prefix within node or its descendants.
754    ///
755    /// Any elements or attribute with namespaces that don't have a prefix
756    /// defined for them in the context of the node are reported.
757    pub fn unresolved_namespaces(&self, node: Node) -> Vec<NamespaceId> {
758        let mut namespaces = Vec::new();
759        let mut fullname_serializer = FullnameSerializer::new(self, vec![]);
760        for edge in self.traverse(node) {
761            match edge {
762                NodeEdge::Start(node) => {
763                    let element = self.element(node);
764                    if let Some(element) = element {
765                        fullname_serializer.push(self.namespace_declarations(node));
766                        let namespace_id = self.namespace_for_name(element.name());
767                        if !fullname_serializer.is_namespace_known(namespace_id) {
768                            namespaces.push(namespace_id);
769                        }
770                        for name in self.attributes(node).keys() {
771                            let namespace_id = self.namespace_for_name(name);
772                            if !fullname_serializer.is_namespace_known(namespace_id) {
773                                namespaces.push(namespace_id);
774                            }
775                        }
776                    }
777                }
778                NodeEdge::End(node) => {
779                    if self.is_element(node) {
780                        fullname_serializer.pop(self.has_namespace_declarations(node));
781                    }
782                }
783            }
784        }
785        namespaces
786    }
787
788    /// Returns an iterator that yields all the prefix/namespace combinations.
789    ///
790    /// Once a prefix has been yielded, it's not yielded again, as the
791    /// overriding prefix has already been yielded.
792    pub fn namespaces_in_scope(
793        &self,
794        node: Node,
795    ) -> impl Iterator<Item = (PrefixId, NamespaceId)> + '_ {
796        namespace_traverse(self, node)
797    }
798
799    pub(crate) fn base_prefixes(&self) -> Prefixes {
800        let mut prefixes = Prefixes::new();
801        prefixes.insert(self.xml_prefix_id, self.xml_namespace_id);
802        prefixes
803    }
804}
805
806struct DeduplicateTracker {
807    stack: Vec<DeduplicateTrackerEntry>,
808}
809
810struct DeduplicateTrackerEntry {
811    default_namespace: Option<NamespaceId>,
812    in_use_by_attribute: bool,
813}
814
815impl DeduplicateTracker {
816    fn new() -> Self {
817        Self { stack: Vec::new() }
818    }
819
820    fn push(&mut self, xot: &Xot, node: Node) {
821        let namespaces = xot.namespaces(node);
822        let default_namespace = namespaces.get(xot.empty_prefix());
823        self.stack.push(DeduplicateTrackerEntry {
824            default_namespace: default_namespace.copied(),
825            in_use_by_attribute: false,
826        });
827        for attribute_name in xot.attributes(node).keys() {
828            self.attribute_name(xot, attribute_name);
829        }
830    }
831
832    fn pop(&mut self) {
833        self.stack.pop();
834    }
835
836    fn attribute_name(&mut self, xot: &Xot, name: NameId) {
837        let namespace = xot.namespace_for_name(name);
838        for entry in self.stack.iter_mut().rev() {
839            if entry.default_namespace == Some(namespace) {
840                entry.in_use_by_attribute = true;
841                return;
842            }
843        }
844    }
845
846    fn is_safe_to_remove(&self, namespace: NamespaceId) -> bool {
847        for entry in self.stack.iter().rev() {
848            if entry.default_namespace == Some(namespace) {
849                return !entry.in_use_by_attribute;
850            }
851        }
852        true
853    }
854}
855
856pub(crate) fn namespace_traverse(
857    xot: &Xot,
858    node: Node,
859) -> impl Iterator<Item = (PrefixId, NamespaceId)> + '_ {
860    gen!({
861        let mut seen: Vec<PrefixId> = Vec::new();
862
863        for ancestor in xot.ancestors(node) {
864            let namespaces = xot.namespaces(ancestor);
865            for (prefix_id, namespace_id) in namespaces.iter() {
866                if seen.contains(&prefix_id) {
867                    continue;
868                }
869                let undeclaration =
870                    xot.empty_prefix() == prefix_id && *namespace_id == xot.no_namespace();
871                seen.push(prefix_id);
872                if !undeclaration {
873                    yield_!((prefix_id, *namespace_id));
874                }
875            }
876        }
877        for (prefix_id, namespace_id) in xot.base_prefixes() {
878            if seen.contains(&prefix_id) {
879                continue;
880            }
881            seen.push(prefix_id);
882            yield_!((prefix_id, namespace_id));
883        }
884    })
885    .into_iter()
886}
887
888#[cfg(test)]
889mod tests {
890    use super::*;
891
892    #[test]
893    fn test_prefixes_in_scope() {
894        let mut xot = Xot::new();
895        let root = xot
896            .parse(r#"<doc xmlns:foo="http://example.com"><a><b xmlns:foo="http://example.com/foo" xmlns:bar="http://example.com/bar" /></a></doc>"#)
897            .unwrap();
898        let doc_el = xot.document_element(root).unwrap();
899        let a = xot.first_child(doc_el).unwrap();
900        let b = xot.first_child(a).unwrap();
901
902        let foo = xot.prefix("foo").unwrap();
903        let ns = xot.namespace("http://example.com").unwrap();
904        let ns_foo = xot.namespace("http://example.com/foo").unwrap();
905        let ns_bar = xot.namespace("http://example.com/bar").unwrap();
906        let bar = xot.prefix("bar").unwrap();
907
908        assert_eq!(
909            xot.prefixes_in_scope(doc_el),
910            Prefixes::from_iter(vec![(foo, ns), (xot.xml_prefix(), xot.xml_namespace())])
911        );
912
913        assert_eq!(
914            xot.prefixes_in_scope(a),
915            Prefixes::from_iter(vec![(foo, ns), (xot.xml_prefix(), xot.xml_namespace())])
916        );
917
918        assert_eq!(
919            xot.prefixes_in_scope(b),
920            Prefixes::from_iter(vec![
921                (foo, ns_foo),
922                (bar, ns_bar),
923                (xot.xml_prefix(), xot.xml_namespace())
924            ])
925        );
926    }
927}