xot/nameaccess.rs
1use ahash::{HashMap, HashSet};
2use genawaiter::rc::gen;
3use genawaiter::yield_;
4
5use crate::access::NodeEdge;
6use crate::error::Error;
7use crate::id::{Name, NameId, NamespaceId, PrefixId};
8use crate::output::FullnameSerializer;
9use crate::xmlvalue::Prefixes;
10use crate::xotdata::{Node, Xot};
11use crate::{xmlname, Value};
12
13/// ## Names, namespaces and prefixes.
14///
15/// Xot does not let you use names, prefixes and URIs directly. Instead you use
16/// the types [`NameId`], [`NamespaceId`] and [`PrefixId`] to refer to these.
17///
18/// This has some advantages:
19///
20/// * It's faster to compare and hash names, namespaces and prefixes.
21///
22/// * It takes less memory to store a tree.
23///
24/// * You get type-checks and can't mix up names, namespaces and prefixes.
25///
26/// Names, namespaces and prefixes are shared in a single Xot, so are the same
27/// in multiple trees. This makes it safe to copy and move nodes between trees.
28/// If you care about the readability of the serialized XML you do need to
29/// ensure that each tree uses `xmlns` attributes to declare the namespaces it
30/// uses; otherwise prefixes are generated during serialization.
31///
32/// The minor drawback is that you need to use multiple steps to create a name,
33/// prefix or namespace for use, or to access the string value of a name,
34/// prefix or namepace. This drawback may be an advantage at times, as typical
35/// code needs to use a single name, namespace or prefix multiple times, so
36/// assigning to a variable is more convenient than repeating strings.
37///
38/// There are also APIs that help with namespace access and manipulation in
39/// other sections: [`Xot::namespaces`], [`Xot::namespaces_mut`], and
40/// [`Xot::get_namespace`], [`Xot::set_namespace`] and
41/// [`Xot::remove_namespace`], and [`Xot::append_namespace_node`].
42impl Xot {
43 /// Look up name without a namespace.
44 ///
45 /// This is the immutable version of [`Xot::add_name`]; it returns
46 /// `None` if the name doesn't exist.
47 ///
48 /// ```rust
49 /// use xot::Xot;
50 ///
51 /// let mut xot = Xot::new();
52 /// assert!(xot.name("z").is_none());
53 ///
54 /// let name = xot.add_name("z");
55 /// assert_eq!(xot.name("z"), Some(name));
56 /// ```
57 pub fn name(&self, name: &str) -> Option<NameId> {
58 self.name_ns(name, self.no_namespace_id)
59 }
60
61 /// Add name without a namespace.
62 ///
63 /// If the name already exists, return its id, otherwise creates it.
64 ///
65 /// ```rust
66 /// use xot::Xot;
67 ///
68 /// let mut xot = Xot::new();
69 ///
70 /// let name = xot.add_name("a");
71 /// // the namespace is "" for no namespace
72 /// assert_eq!(xot.name_ns_str(name), ("a", ""));
73 ///
74 /// let root = xot.parse(r#"<doc/>"#)?;
75 /// let doc_el = xot.document_element(root).unwrap();
76 /// // add an element, using the name
77 /// let node = xot.append_element(doc_el, name)?;
78 ///
79 /// assert_eq!(xot.to_string(root)?, "<doc><a/></doc>");
80 ///
81 /// # Ok::<(), xot::Error>(())
82 /// ```
83 pub fn add_name(&mut self, name: &str) -> NameId {
84 self.add_name_ns(name, self.no_namespace_id)
85 }
86
87 /// Look up name with a namespace.
88 ///
89 /// ```rust
90 /// use xot::Xot;
91 ///
92 /// let mut xot = Xot::new();
93 ///
94 /// let ns = xot.add_namespace("http://example.com");
95 /// let name = xot.add_name_ns("a", ns);
96 /// assert_eq!(xot.name_ns_str(name), ("a", "http://example.com"));
97 ///
98 /// # Ok::<(), xot::Error>(())
99 /// ```
100 ///
101 /// Look up name of an element:
102 ///
103 /// ```rust
104 /// use xot::Xot;
105 ///
106 /// let mut xot = Xot::new();
107 /// let root = xot.parse(r#"<doc xmlns="http://example.com"><a/></doc>"#)?;
108 /// let doc_el = xot.document_element(root).unwrap();
109 ///
110 /// let doc_value = xot.element(doc_el).unwrap();
111 ///
112 /// // get the name of the element
113 /// let name = xot.name_ns_str(doc_value.name());
114 ///
115 /// # Ok::<(), xot::Error>(())
116 /// ```
117 pub fn name_ns(&self, name: &str, namespace_id: NamespaceId) -> Option<NameId> {
118 self.name_lookup.get_id(&Name::new(name, namespace_id))
119 }
120
121 /// Add name with a namespace.
122 ///
123 /// If the name already exists, return its id.
124 ///
125 /// ```rust
126 /// use xot::Xot;
127 ///
128 /// let mut xot = Xot::new();
129 ///
130 /// let ns = xot.add_namespace("http://example.com");
131 /// let name_a = xot.add_name_ns("a", ns);
132 ///
133 /// let root = xot.parse(r#"<doc xmlns="http://example.com"><a/></doc>"#)?;
134 /// let doc_el = xot.document_element(root).unwrap();
135 /// let a_el = xot.first_child(doc_el).unwrap();
136 ///
137 /// let doc_value = xot.element(doc_el).unwrap();
138 /// let a_value = xot.element(a_el).unwrap();
139 ///
140 /// // we know a is the right name, but doc is not
141 /// assert_eq!(a_value.name(), name_a);
142 /// assert_ne!(doc_value.name(), name_a);
143 ///
144 /// # Ok::<(), xot::Error>(())
145 /// ```
146 pub fn add_name_ns(&mut self, name: &str, namespace_id: NamespaceId) -> NameId {
147 self.name_lookup.get_id_mut(&Name::new(name, namespace_id))
148 }
149
150 /// Look up namespace.
151 ///
152 /// This is the immutable version of [`Xot::add_namespace`]; it returns
153 /// `None` if the namespace doesn't exist.
154 pub fn namespace(&self, namespace: &str) -> Option<NamespaceId> {
155 self.namespace_lookup.get_id(namespace)
156 }
157
158 /// Add namespace.
159 ///
160 /// If the namespace already exists, return its id.
161 pub fn add_namespace(&mut self, namespace: &str) -> NamespaceId {
162 self.namespace_lookup.get_id_mut(namespace)
163 }
164
165 /// Look up prefix.
166 ///
167 /// This is the immutable version of [`Xot::add_prefix`]; it returns
168 /// `None` if the prefix doesn't exist.
169 pub fn prefix(&self, prefix: &str) -> Option<PrefixId> {
170 self.prefix_lookup.get_id(prefix)
171 }
172
173 /// Add prefix.
174 ///
175 /// If the prefix already exists, return its id.
176 pub fn add_prefix(&mut self, prefix: &str) -> PrefixId {
177 self.prefix_lookup.get_id_mut(prefix)
178 }
179
180 /// No namespace
181 ///
182 /// Returns the namespace id used when an element or attribute
183 /// isn't in any namespace.
184 #[inline]
185 pub fn no_namespace(&self) -> NamespaceId {
186 self.no_namespace_id
187 }
188
189 /// Empty prefix
190 ///
191 /// Returns the prefix id used when an element or attribute
192 /// doesn't have a prefix.
193 #[inline]
194 pub fn empty_prefix(&self) -> PrefixId {
195 self.empty_prefix_id
196 }
197
198 /// XML prefix
199 ///
200 /// The prefix `xml` used for the XML namespace.
201 #[inline]
202 pub fn xml_prefix(&self) -> PrefixId {
203 self.xml_prefix_id
204 }
205
206 /// XML namespace
207 ///
208 /// Returns the namespace id used for the XML namespace.
209 ///
210 /// Also known as `http://wwww.w3.org/XML/1998/namespace`
211 #[inline]
212 pub fn xml_namespace(&self) -> NamespaceId {
213 self.xml_namespace_id
214 }
215
216 /// xml:space
217 ///
218 /// Returns the name id used for the `xml:space` attribute.
219 #[inline]
220 pub fn xml_space_name(&self) -> NameId {
221 self.xml_space_id
222 }
223
224 /// xml:id
225 //
226 /// Returns the name id used for the `xml:id` attribute.
227 #[inline]
228 pub fn xml_id_name(&self) -> NameId {
229 self.xml_id_id
230 }
231
232 /// Given a name id, and a context node (to provide namespace prefix
233 /// lookup), return a [`xmlname::RefName`]. If you import the trait
234 /// [`xmlname::NameStrInfo`] you can look up more information about the
235 /// name.
236 ///
237 /// ```rust
238 /// use xot::Xot;
239 /// use xot::xmlname::NameStrInfo;
240 ///
241 /// let mut xot = Xot::new();
242 /// let root = xot.parse(r#"<ex:doc xmlns:ex="http://example.com"><a/></ex:doc>"#)?;
243 /// let doc_el = xot.document_element(root).unwrap();
244 /// let a_el = xot.first_child(doc_el).unwrap();
245 ///
246 /// let doc_name = xot.name_ref(xot.node_name(doc_el).unwrap(), a_el)?;
247 ///
248 /// assert_eq!(doc_name.local_name(), "doc");
249 /// assert_eq!(doc_name.namespace(), "http://example.com");
250 /// assert_eq!(doc_name.prefix(), "ex");
251 /// assert_eq!(doc_name.full_name(), "ex:doc");
252 ///
253 /// let a_name = xot.name_ref(xot.node_name(a_el).unwrap(), a_el)?;
254 /// assert_eq!(a_name.local_name(), "a");
255 /// assert_eq!(a_name.namespace(), "");
256 /// assert_eq!(a_name.prefix(), "");
257 /// assert_eq!(a_name.full_name(), "a");
258 ///
259 /// # Ok::<(), xot::Error>(())
260 /// ```
261 pub fn name_ref(&self, name_id: NameId, context: Node) -> Result<xmlname::RefName, Error> {
262 xmlname::RefName::from_node(self, context, name_id)
263 }
264
265 ///
266 /// Look up localname, namespace uri for name id
267 ///
268 /// If this name id is not in a namespace, the namespace uri is the empty
269 /// string.
270 ///
271 /// No namespace:
272 ///
273 /// ```rust
274 /// use xot::Xot;
275 ///
276 /// let mut xot = Xot::new();
277 /// let root = xot.parse(r#"<doc><a/></doc>"#)?;
278 /// let doc_el = xot.document_element(root).unwrap();
279 /// let a_el = xot.first_child(doc_el).unwrap();
280 ///
281 /// let a_value = xot.element(a_el).unwrap();
282 ///
283 /// let (localname, namespace) = xot.name_ns_str(a_value.name());
284 /// assert_eq!(localname, "a");
285 /// assert_eq!(namespace, "");
286 /// # Ok::<(), xot::Error>(())
287 /// ```
288 ///
289 /// With namespace:
290 /// ```rust
291 /// use xot::Xot;
292 ///
293 /// let mut xot = Xot::new();
294 /// let root = xot.parse(r#"<doc xmlns="http://example.com"><a/></doc>"#)?;
295 /// let doc_el = xot.document_element(root).unwrap();
296 /// let a_el = xot.first_child(doc_el).unwrap();
297 ///
298 /// let a_value = xot.element(a_el).unwrap();
299 ///
300 /// let (localname, namespace) = xot.name_ns_str(a_value.name());
301 /// assert_eq!(localname, "a");
302 /// assert_eq!(namespace, "http://example.com");
303 /// # Ok::<(), xot::Error>(())
304 /// ```
305 #[inline]
306 pub fn name_ns_str(&self, name: NameId) -> (&str, &str) {
307 let name = self.name_lookup.get_value(name);
308 let namespace = self.namespace_lookup.get_value(name.namespace_id);
309 (name.name.as_ref(), namespace)
310 }
311
312 /// Get the localname of a name.
313 #[inline]
314 pub fn local_name_str(&self, name: NameId) -> &str {
315 let name = self.name_lookup.get_value(name);
316 name.name.as_ref()
317 }
318
319 /// Get the namespace URI of a name
320 #[inline]
321 pub fn uri_str(&self, name: NameId) -> &str {
322 let name = self.name_lookup.get_value(name);
323 self.namespace_str(name.namespace_id)
324 }
325
326 /// Look up namespace uri for namespace id
327 ///
328 /// An empty string slice indicates the no namespace.
329 #[inline]
330 pub fn namespace_str(&self, namespace: NamespaceId) -> &str {
331 let namespace = self.namespace_lookup.get_value(namespace);
332 namespace
333 }
334
335 /// Look up string slice for prefix id
336 ///
337 /// If the prefix id is the empty prefix, the string slice is the empty string.
338 #[inline]
339 pub fn prefix_str(&self, prefix: PrefixId) -> &str {
340 let prefix = self.prefix_lookup.get_value(prefix);
341 prefix
342 }
343
344 /// Get the Namespace for a Name
345 ///
346 /// ```rust
347 /// use xot::Xot;
348 ///
349 /// let mut xot = Xot::new();
350 /// let ns = xot.add_namespace("http://example.com");
351 /// let name = xot.add_name_ns("a", ns);
352 ///
353 /// assert_eq!(xot.namespace_for_name(name), ns);
354 /// # Ok::<(), xot::Error>(())
355 /// ```
356 #[inline]
357 pub fn namespace_for_name(&self, name: NameId) -> NamespaceId {
358 self.name_lookup.get_value(name).namespace_id
359 }
360
361 /// Full name.
362 ///
363 /// Given a context node, determine the full name string of the given name.
364 ///
365 /// If the name doesn't have a namespace, that's identical to the localname.
366 /// If the name is in a namespace, a prefix is looked up. If no prefix
367 /// exists, that's an error.
368 ///
369 /// ```rust
370 /// use xot::Xot;
371 ///
372 /// // prefixed
373 /// let mut xot = Xot::new();
374 /// let doc = xot.parse(r#"<foo:doc xmlns:foo="http://example.com"/>"#)?;
375 /// let doc_el = xot.document_element(doc).unwrap();
376 /// let name = xot.node_name(doc_el).unwrap();
377 ///
378 /// let full_name = xot.full_name(doc_el, name)?;
379 /// let full_name = xot.full_name(doc_el, name)?;
380 /// assert_eq!(full_name, "foo:doc");
381 ///
382 /// // default namespace
383 /// let doc = xot.parse(r#"<doc xmlns="http://example.com"/>"#)?;
384 /// let doc_el = xot.document_element(doc).unwrap();
385 /// let name = xot.node_name(doc_el).unwrap();
386 /// let full_name = xot.full_name(doc_el, name)?;
387 /// assert_eq!(full_name, "doc");
388 ///
389 /// // no namespace
390 /// let doc = xot.parse(r#"<doc/>"#)?;
391 /// let doc_el = xot.document_element(doc).unwrap();
392 /// let name = xot.node_name(doc_el).unwrap();
393 /// let full_name = xot.full_name(doc_el, name)?;
394 /// assert_eq!(full_name, "doc");
395 ///
396 /// # Ok::<(), xot::Error>(())
397 /// ```
398 pub fn full_name(&self, node: Node, name: NameId) -> Result<String, Error> {
399 let namespace = self.namespace_for_name(name);
400 let local_name = self.local_name_str(name);
401 if namespace == self.no_namespace() {
402 return Ok(local_name.to_string());
403 }
404 // look up the prefix for the namespace
405 if let Some(prefix) = self.prefix_for_namespace(node, namespace) {
406 let prefix = self.prefix_str(prefix);
407 if !prefix.is_empty() {
408 Ok(format!("{}:{}", prefix, local_name))
409 } else {
410 Ok(local_name.to_string())
411 }
412 } else {
413 Err(Error::MissingPrefix(
414 self.namespace_str(namespace).to_string(),
415 ))
416 }
417 }
418
419 /// Given a node, give back the name id of this node.
420 ///
421 /// For elements and attribute that is their name, for processing
422 /// instructions this is a name based on the target attribute.
423 ///
424 /// For anything else, it's `None`.
425 pub fn node_name(&self, node: Node) -> Option<NameId> {
426 match self.value(node) {
427 Value::Element(element) => Some(element.name()),
428 Value::Text(..) => None,
429 Value::ProcessingInstruction(pi) => Some(pi.target()),
430 Value::Comment(..) => None,
431 Value::Document => None,
432 Value::Attribute(attribute) => Some(attribute.name()),
433 Value::Namespace(_) => None,
434 }
435 }
436
437 /// Given a node, give back the [`xmlname::RefName`] of this node.
438 ///
439 /// For elements and attribute that is their name, for processing
440 /// instructions this is a name based on the target attribute.
441 ///
442 /// For anything else, it's `None`.
443 ///
444 /// ```rust
445 /// use xot::Xot;
446 /// use xot::xmlname::NameStrInfo;
447 ///
448 /// let mut xot = Xot::new();
449 /// let root = xot.parse(r#"<ex:doc xmlns:ex="http://example.com" ex:b="B"><a/></ex:doc>"#)?;
450 /// let doc_el = xot.document_element(root).unwrap();
451 /// let a_el = xot.first_child(doc_el).unwrap();
452 ///
453 /// let doc_name = xot.node_name_ref(doc_el)?.unwrap();
454 /// assert_eq!(doc_name.local_name(), "doc");
455 /// assert_eq!(doc_name.namespace(), "http://example.com");
456 /// assert_eq!(doc_name.prefix(), "ex");
457 /// assert_eq!(doc_name.full_name(), "ex:doc");
458 ///
459 /// let a_name = xot.node_name_ref(a_el)?.unwrap();
460 /// assert_eq!(a_name.local_name(), "a");
461 /// assert_eq!(a_name.namespace(), "");
462 /// assert_eq!(a_name.prefix(), "");
463 /// assert_eq!(a_name.full_name(), "a");
464 ///
465 /// // it also works on attribute nodes
466 /// let b_attribute = xot.attributes(doc_el).nodes().next().unwrap();
467 /// let b_name = xot.node_name_ref(b_attribute)?.unwrap();
468 /// assert_eq!(b_name.local_name(), "b");
469 /// assert_eq!(b_name.namespace(), "http://example.com");
470 /// assert_eq!(b_name.prefix(), "ex");
471 /// assert_eq!(b_name.full_name(), "ex:b");
472 ///
473 /// # Ok::<(), xot::Error>(())
474 /// ```
475 pub fn node_name_ref(&self, node: Node) -> Result<Option<xmlname::RefName>, Error> {
476 if let Some(name) = self.node_name(node) {
477 Ok(Some(self.name_ref(name, node)?))
478 } else {
479 Ok(None)
480 }
481 }
482
483 /// Check whether a prefix is defined in node or its ancestors.
484 pub fn is_prefix_defined(&self, node: Node, prefix: PrefixId) -> bool {
485 for ancestor in self.ancestors(node) {
486 if self.namespaces(ancestor).contains_key(prefix) {
487 return true;
488 }
489 }
490 if self.base_prefixes().contains_key(&prefix) {
491 return true;
492 }
493 false
494 }
495
496 /// Find prefixes we inherit from ancestors and aren't defined locally
497 pub fn inherited_prefixes(&self, node: Node) -> Prefixes {
498 let prefixes = if let Some(node) = self.parent(node) {
499 self.prefixes_in_scope(node)
500 } else {
501 Prefixes::new()
502 };
503 // now filter these by namespaces actually required
504 let unresolved_namespaces = HashSet::from_iter(self.unresolved_namespaces(node));
505 prefixes
506 .into_iter()
507 .filter(|(_, ns)| unresolved_namespaces.contains(ns))
508 .collect::<Prefixes>()
509 }
510
511 /// Find prefix for a namespace in node or ancestors.
512 ///
513 /// Returns `None` if no prefix is defined for the namespace.
514 pub fn prefix_for_namespace(&self, node: Node, namespace: NamespaceId) -> Option<PrefixId> {
515 let mut seen = HashSet::default();
516
517 for ancestor in self.ancestors(node) {
518 for (key, value) in self.namespaces(ancestor).iter() {
519 if seen.contains(&key) {
520 return None;
521 }
522 seen.insert(key);
523 if *value == namespace {
524 return Some(key);
525 }
526 }
527 }
528 for (key, value) in self.base_prefixes() {
529 if seen.contains(&key) {
530 return None;
531 }
532 seen.insert(key);
533 if value == namespace {
534 return Some(key);
535 }
536 }
537 None
538 }
539
540 /// Find namespace for prefix in node or ancestors.
541 ///
542 /// Return `None` if no namespace is defined for the prefix.
543 pub fn namespace_for_prefix(&self, node: Node, prefix: PrefixId) -> Option<NamespaceId> {
544 for ancestor in self.ancestors(node) {
545 if let Some(namespace) = self.namespaces(ancestor).get(prefix) {
546 if *namespace == self.no_namespace() {
547 return None;
548 }
549 return Some(*namespace);
550 }
551 }
552 for (key, value) in self.base_prefixes() {
553 if key == prefix {
554 return Some(value);
555 }
556 }
557 None
558 }
559
560 /// Creating missing prefixes.
561 ///
562 /// Due to creation or moving subtrees you can end up with XML elements or
563 /// attributes that have names in a namespace without a prefix to define
564 /// the namespace in its ancestors.
565 ///
566 /// This function creates the missing prefixes on the given node. The
567 /// prefixes are named "n0", "n1", "n2", etc.
568 ///
569 /// You can use this function just before serializing the tree to XML
570 /// using [`Xot::write`] or [`Xot::to_string`].
571 pub fn create_missing_prefixes(&mut self, node: Node) -> Result<(), Error> {
572 let node = if self.is_document(node) {
573 self.document_element(node).unwrap()
574 } else {
575 node
576 };
577 if !self.is_element(node) {
578 return Err(Error::NotElement(node));
579 };
580 let mut fullname_serializer = FullnameSerializer::new(self, vec![]);
581 let mut missing_namespace_ids = HashSet::default();
582 for edge in self.traverse(node) {
583 match edge {
584 NodeEdge::Start(node) => {
585 let element = self.element(node);
586 if let Some(element) = element {
587 fullname_serializer.push(self.namespace_declarations(node));
588 let element_fullname =
589 fullname_serializer.element_fullname(element.name_id);
590 if element_fullname.is_err() {
591 let namespace_id = self.namespace_for_name(element.name_id);
592 missing_namespace_ids.insert(namespace_id);
593 }
594 for name_id in self.attributes(node).keys() {
595 let attribute_fullname =
596 fullname_serializer.attribute_fullname(name_id);
597 if attribute_fullname.is_err() {
598 let namespace_id = self.namespace_for_name(name_id);
599 missing_namespace_ids.insert(namespace_id);
600 }
601 }
602 }
603 }
604 NodeEdge::End(node) => {
605 if self.is_element(node) {
606 fullname_serializer.pop(self.has_namespace_declarations(node));
607 }
608 }
609 }
610 }
611 let mut prefixes_to_add = HashMap::default();
612 for (i, namespace_id) in missing_namespace_ids.iter().enumerate() {
613 let prefix = format!("n{}", i);
614 let prefix_id = self.add_prefix(&prefix);
615 prefixes_to_add.insert(prefix_id, namespace_id);
616 }
617 let mut namespaces = self.namespaces_mut(node);
618
619 for (prefix_id, namespace_id) in prefixes_to_add {
620 namespaces.insert(prefix_id, *namespace_id);
621 }
622 Ok(())
623 }
624
625 /// Deduplicate namespaces.
626 ///
627 /// Any namespace definition lower down that defines a prefix for a
628 /// namespace that is already known in an ancestor is removed.
629 ///
630 /// There is a special rule for attributes, as they can only be in a
631 /// namespace if they have an explicit prefix; the prefix is not removed if
632 /// it overlaps with a default namespace.
633 ///
634 /// With default namespaces:
635 ///
636 /// ```rust
637 /// use xot::Xot;
638 ///
639 /// let mut xot = Xot::new();
640 /// let root = xot.parse(r#"<doc xmlns="http://example.com"><a xmlns="http://example.com"/></doc>"#)?;
641 /// xot.deduplicate_namespaces(root);
642 ///
643 /// assert_eq!(xot.to_string(root)?, r#"<doc xmlns="http://example.com"><a/></doc>"#);
644 /// # Ok::<(), xot::Error>(())
645 /// ```
646 ///
647 /// With explicit prefixes:
648 ///
649 /// ```rust
650 /// use xot::Xot;
651 ///
652 /// let mut xot = Xot::new();
653 /// let root = xot.parse(r#"<ns:doc xmlns:ns="http://example.com"><ns:a xmlns:ns="http://example.com"/></ns:doc>"#)?;
654 ///
655 /// xot.deduplicate_namespaces(root);
656 ///
657 /// assert_eq!(xot.to_string(root)?, r#"<ns:doc xmlns:ns="http://example.com"><ns:a/></ns:doc>"#);
658 /// # Ok::<(), xot::Error>(())
659 /// ```
660 ///
661 /// This also works if you use different prefixes for the same namespace
662 /// URI:
663 ///
664 /// ```rust
665 /// use xot::Xot;
666 ///
667 /// let mut xot = Xot::new();
668 /// let root = xot.parse(r#"<ns:doc xmlns:ns="http://example.com"><other:a xmlns:other="http://example.com"/></ns:doc>"#)?;
669 ///
670 /// xot.deduplicate_namespaces(root);
671 ///
672 /// assert_eq!(xot.to_string(root)?, r#"<ns:doc xmlns:ns="http://example.com"><ns:a/></ns:doc>"#);
673 /// # Ok::<(), xot::Error>(())
674 /// ```
675 pub fn deduplicate_namespaces(&mut self, node: Node) {
676 let mut fullname_serializer = FullnameSerializer::new(self, vec![]);
677 let mut fixup_nodes = Vec::new();
678 let mut deduplicate_tracker = DeduplicateTracker::new();
679 // determine nodes we need to fix up
680 for edge in self.traverse(node) {
681 match edge {
682 NodeEdge::Start(node) => {
683 if self.is_element(node) {
684 // an attribute in a namespace *has* to have a non-empty
685 // prefix. This means we cannot remove a prefix if that
686 // prefix overlaps with a previously defined default
687 // namespace: that's fine for elements which fall
688 // in the default namespace, but not for attributes.
689 // The tracker keeps track of all this.
690 deduplicate_tracker.push(self, node);
691 // we don't need to remove the fixed up prefixes because
692 // as duplicates they will definitely exist.
693 // In fact if we remove them first the push will fail to create
694 // a new entry in the namespace stack, as prefixes can become empty
695 fullname_serializer.push(self.namespace_declarations(node));
696 }
697 }
698 NodeEdge::End(node) => {
699 if self.is_element(node) {
700 // to_prefix is only used to determine whether to pop
701 // so should be okay to send here
702 fullname_serializer.pop(self.has_namespace_declarations(node));
703 deduplicate_tracker.pop();
704 // if we already know a namespace, remove it
705 // we do this at the end so the deduplicate tracker
706 // has had a change to do its work for sub-elements
707 let namespaces = self.namespaces(node);
708 let to_remove = namespaces
709 .iter()
710 .filter_map(|(_, namespace_id)| {
711 if fullname_serializer.is_namespace_known(*namespace_id)
712 && deduplicate_tracker.is_safe_to_remove(*namespace_id)
713 {
714 Some(*namespace_id)
715 } else {
716 None
717 }
718 })
719 .collect::<Vec<_>>();
720 if !to_remove.is_empty() {
721 fixup_nodes.push((node, to_remove.clone()));
722 }
723 }
724 }
725 }
726 }
727 // now actually fix up the nodes, removing superfluous namespaces
728 // TODO: this whole thing is a bit a multi-step mess. Perhaps
729 // direct namespace node access would help.
730 let mut fixup_prefixes = Vec::new();
731 for (node, to_remove) in fixup_nodes {
732 let namespaces = self.namespaces(node);
733 for namespace_id in to_remove {
734 let prefixes_to_remove = namespaces
735 .iter()
736 .filter(|(_, ns)| **ns == namespace_id)
737 .map(|(prefix, _)| prefix);
738 fixup_prefixes.push((node, prefixes_to_remove.collect::<Vec<_>>()));
739 }
740 }
741 for (node, prefix) in fixup_prefixes {
742 let mut namespaces = self.namespaces_mut(node);
743 for prefix in prefix {
744 namespaces.remove(prefix);
745 }
746 }
747 }
748
749 pub(crate) fn prefixes_in_scope(&self, node: Node) -> Prefixes {
750 self.namespaces_in_scope(node).collect()
751 }
752
753 /// Get namespaces without prefix within node or its descendants.
754 ///
755 /// Any elements or attribute with namespaces that don't have a prefix
756 /// defined for them in the context of the node are reported.
757 pub fn unresolved_namespaces(&self, node: Node) -> Vec<NamespaceId> {
758 let mut namespaces = Vec::new();
759 let mut fullname_serializer = FullnameSerializer::new(self, vec![]);
760 for edge in self.traverse(node) {
761 match edge {
762 NodeEdge::Start(node) => {
763 let element = self.element(node);
764 if let Some(element) = element {
765 fullname_serializer.push(self.namespace_declarations(node));
766 let namespace_id = self.namespace_for_name(element.name());
767 if !fullname_serializer.is_namespace_known(namespace_id) {
768 namespaces.push(namespace_id);
769 }
770 for name in self.attributes(node).keys() {
771 let namespace_id = self.namespace_for_name(name);
772 if !fullname_serializer.is_namespace_known(namespace_id) {
773 namespaces.push(namespace_id);
774 }
775 }
776 }
777 }
778 NodeEdge::End(node) => {
779 if self.is_element(node) {
780 fullname_serializer.pop(self.has_namespace_declarations(node));
781 }
782 }
783 }
784 }
785 namespaces
786 }
787
788 /// Returns an iterator that yields all the prefix/namespace combinations.
789 ///
790 /// Once a prefix has been yielded, it's not yielded again, as the
791 /// overriding prefix has already been yielded.
792 pub fn namespaces_in_scope(
793 &self,
794 node: Node,
795 ) -> impl Iterator<Item = (PrefixId, NamespaceId)> + '_ {
796 namespace_traverse(self, node)
797 }
798
799 pub(crate) fn base_prefixes(&self) -> Prefixes {
800 let mut prefixes = Prefixes::new();
801 prefixes.insert(self.xml_prefix_id, self.xml_namespace_id);
802 prefixes
803 }
804}
805
806struct DeduplicateTracker {
807 stack: Vec<DeduplicateTrackerEntry>,
808}
809
810struct DeduplicateTrackerEntry {
811 default_namespace: Option<NamespaceId>,
812 in_use_by_attribute: bool,
813}
814
815impl DeduplicateTracker {
816 fn new() -> Self {
817 Self { stack: Vec::new() }
818 }
819
820 fn push(&mut self, xot: &Xot, node: Node) {
821 let namespaces = xot.namespaces(node);
822 let default_namespace = namespaces.get(xot.empty_prefix());
823 self.stack.push(DeduplicateTrackerEntry {
824 default_namespace: default_namespace.copied(),
825 in_use_by_attribute: false,
826 });
827 for attribute_name in xot.attributes(node).keys() {
828 self.attribute_name(xot, attribute_name);
829 }
830 }
831
832 fn pop(&mut self) {
833 self.stack.pop();
834 }
835
836 fn attribute_name(&mut self, xot: &Xot, name: NameId) {
837 let namespace = xot.namespace_for_name(name);
838 for entry in self.stack.iter_mut().rev() {
839 if entry.default_namespace == Some(namespace) {
840 entry.in_use_by_attribute = true;
841 return;
842 }
843 }
844 }
845
846 fn is_safe_to_remove(&self, namespace: NamespaceId) -> bool {
847 for entry in self.stack.iter().rev() {
848 if entry.default_namespace == Some(namespace) {
849 return !entry.in_use_by_attribute;
850 }
851 }
852 true
853 }
854}
855
856pub(crate) fn namespace_traverse(
857 xot: &Xot,
858 node: Node,
859) -> impl Iterator<Item = (PrefixId, NamespaceId)> + '_ {
860 gen!({
861 let mut seen: Vec<PrefixId> = Vec::new();
862
863 for ancestor in xot.ancestors(node) {
864 let namespaces = xot.namespaces(ancestor);
865 for (prefix_id, namespace_id) in namespaces.iter() {
866 if seen.contains(&prefix_id) {
867 continue;
868 }
869 let undeclaration =
870 xot.empty_prefix() == prefix_id && *namespace_id == xot.no_namespace();
871 seen.push(prefix_id);
872 if !undeclaration {
873 yield_!((prefix_id, *namespace_id));
874 }
875 }
876 }
877 for (prefix_id, namespace_id) in xot.base_prefixes() {
878 if seen.contains(&prefix_id) {
879 continue;
880 }
881 seen.push(prefix_id);
882 yield_!((prefix_id, namespace_id));
883 }
884 })
885 .into_iter()
886}
887
888#[cfg(test)]
889mod tests {
890 use super::*;
891
892 #[test]
893 fn test_prefixes_in_scope() {
894 let mut xot = Xot::new();
895 let root = xot
896 .parse(r#"<doc xmlns:foo="http://example.com"><a><b xmlns:foo="http://example.com/foo" xmlns:bar="http://example.com/bar" /></a></doc>"#)
897 .unwrap();
898 let doc_el = xot.document_element(root).unwrap();
899 let a = xot.first_child(doc_el).unwrap();
900 let b = xot.first_child(a).unwrap();
901
902 let foo = xot.prefix("foo").unwrap();
903 let ns = xot.namespace("http://example.com").unwrap();
904 let ns_foo = xot.namespace("http://example.com/foo").unwrap();
905 let ns_bar = xot.namespace("http://example.com/bar").unwrap();
906 let bar = xot.prefix("bar").unwrap();
907
908 assert_eq!(
909 xot.prefixes_in_scope(doc_el),
910 Prefixes::from_iter(vec![(foo, ns), (xot.xml_prefix(), xot.xml_namespace())])
911 );
912
913 assert_eq!(
914 xot.prefixes_in_scope(a),
915 Prefixes::from_iter(vec![(foo, ns), (xot.xml_prefix(), xot.xml_namespace())])
916 );
917
918 assert_eq!(
919 xot.prefixes_in_scope(b),
920 Prefixes::from_iter(vec![
921 (foo, ns_foo),
922 (bar, ns_bar),
923 (xot.xml_prefix(), xot.xml_namespace())
924 ])
925 );
926 }
927}