use crate::tree::NodeRef;
use crate::{Attribute, Attributes, ExpandedName};
use html5ever::{LocalName, Namespace, Prefix, QualName};
use std::collections::{HashMap, HashSet};
use super::{NsError, NsResult};
#[derive(Debug, Clone, Default)]
pub struct NsOptions {
pub namespaces: HashMap<String, Namespace>,
pub strict: bool,
}
pub fn apply_xmlns(root: &NodeRef) -> NsResult<NodeRef> {
apply_xmlns_opts(root, &NsOptions::default())
}
pub fn apply_xmlns_opts(root: &NodeRef, options: &NsOptions) -> NsResult<NodeRef> {
let xmlns_map = extract_xmlns_declarations(root, options);
let mut undefined_prefixes = HashSet::new();
let new_root = rebuild_tree(root, &xmlns_map, &mut undefined_prefixes);
if undefined_prefixes.is_empty() || !options.strict {
Ok(new_root)
} else {
let mut prefix_list: Vec<_> = undefined_prefixes.into_iter().collect();
prefix_list.sort();
Err(NsError::UndefinedPrefix(new_root, prefix_list))
}
}
#[deprecated(
since = "0.9.2",
note = "Use `apply_xmlns_opts` with `NsOptions { strict: true, .. }` instead"
)]
pub fn apply_xmlns_strict(root: &NodeRef) -> NsResult<NodeRef> {
apply_xmlns_opts(
root,
&NsOptions {
namespaces: HashMap::new(),
strict: true,
},
)
}
fn extract_xmlns_declarations(root: &NodeRef, options: &NsOptions) -> HashMap<String, Namespace> {
let mut xmlns_map = options.namespaces.clone();
for node in root.descendants() {
if let Some(element) = node.as_element() {
if element.name.local.as_ref() == "html" {
let attrs = element.attributes.borrow();
for (expanded_name, attr) in &attrs.map {
let local_str = expanded_name.local.as_ref();
if let Some(prefix) = local_str.strip_prefix("xmlns:") {
xmlns_map.insert(prefix.to_string(), Namespace::from(attr.value.as_str()));
}
}
break;
}
}
}
xmlns_map
}
fn rebuild_tree(
node: &NodeRef,
xmlns_map: &HashMap<String, Namespace>,
undefined_prefixes: &mut HashSet<String>,
) -> NodeRef {
use crate::tree::NodeData;
match node.data() {
NodeData::Element(element) => {
let new_name = process_qualified_name(&element.name, xmlns_map, undefined_prefixes);
let attrs = element.attributes.borrow();
let new_attrs = process_attributes(&attrs, xmlns_map, undefined_prefixes);
let new_node = NodeRef::new_element(new_name, new_attrs.map);
if let Some(ref template_contents) = element.template_contents {
if let Some(new_element) = new_node.as_element() {
if let Some(ref new_template_frag) = new_element.template_contents {
for child in template_contents.children() {
let new_child = rebuild_tree(&child, xmlns_map, undefined_prefixes);
new_template_frag.append(new_child);
}
}
}
}
for child in node.children() {
let new_child = rebuild_tree(&child, xmlns_map, undefined_prefixes);
new_node.append(new_child);
}
new_node
}
NodeData::Text(text) => NodeRef::new_text(text.borrow().clone()),
NodeData::Comment(comment) => NodeRef::new_comment(comment.borrow().clone()),
NodeData::ProcessingInstruction(pi) => {
let pi_data = pi.borrow();
NodeRef::new_processing_instruction(pi_data.0.clone(), pi_data.1.clone())
}
NodeData::Doctype(doctype) => NodeRef::new_doctype(
doctype.name.clone(),
doctype.public_id.clone(),
doctype.system_id.clone(),
),
NodeData::Document(_) => {
let new_doc = NodeRef::new_document();
for child in node.children() {
let new_child = rebuild_tree(&child, xmlns_map, undefined_prefixes);
new_doc.append(new_child);
}
new_doc
}
NodeData::DocumentFragment => {
let new_frag = NodeRef::new(NodeData::DocumentFragment);
for child in node.children() {
let new_child = rebuild_tree(&child, xmlns_map, undefined_prefixes);
new_frag.append(new_child);
}
new_frag
}
}
}
fn process_qualified_name(
name: &QualName,
xmlns_map: &HashMap<String, Namespace>,
undefined_prefixes: &mut HashSet<String>,
) -> QualName {
let local_str = name.local.as_ref();
if let Some(colon_pos) = local_str.find(':') {
let prefix_str = &local_str[..colon_pos];
let local_part = &local_str[colon_pos + 1..];
if let Some(namespace) = xmlns_map.get(prefix_str) {
QualName::new(
Some(Prefix::from(prefix_str)),
namespace.clone(),
LocalName::from(local_part),
)
} else {
undefined_prefixes.insert(prefix_str.to_string());
QualName::new(
Some(Prefix::from(prefix_str)),
ns!(),
LocalName::from(local_part),
)
}
} else {
name.clone()
}
}
fn process_attributes(
attrs: &Attributes,
xmlns_map: &HashMap<String, Namespace>,
undefined_prefixes: &mut HashSet<String>,
) -> Attributes {
let mut new_map = indexmap::IndexMap::new();
for (expanded_name, attr) in &attrs.map {
let local_str = expanded_name.local.as_ref();
if local_str.starts_with("xmlns:") || local_str == "xmlns" {
continue;
}
if let Some(colon_pos) = local_str.find(':') {
let prefix_str = &local_str[..colon_pos];
let local_part = &local_str[colon_pos + 1..];
let (namespace, prefix) = if let Some(ns) = xmlns_map.get(prefix_str) {
(ns.clone(), Some(Prefix::from(prefix_str)))
} else {
undefined_prefixes.insert(prefix_str.to_string());
(ns!(), Some(Prefix::from(prefix_str)))
};
let new_expanded = ExpandedName::new(namespace, LocalName::from(local_part));
new_map.insert(
new_expanded,
Attribute {
prefix,
value: attr.value.clone(),
},
);
} else {
new_map.insert(expanded_name.clone(), attr.clone());
}
}
Attributes { map: new_map }
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parse_html;
use crate::traits::*;
#[test]
#[cfg(feature = "namespaces")]
fn apply_xmlns_with_defined_prefix() {
let html = r#"<html xmlns:c="https://example.com/custom">
<body><c:widget id="test">Content</c:widget></body>
</html>"#;
let doc = parse_html().one(html);
let result = apply_xmlns(&doc).unwrap();
let widget = result.select_first("widget").unwrap();
assert_eq!(widget.local_name().as_ref(), "widget");
assert_eq!(widget.prefix().unwrap().as_ref(), "c");
assert_eq!(
widget.namespace_uri().as_ref(),
"https://example.com/custom"
);
}
#[test]
#[cfg(feature = "namespaces")]
fn apply_xmlns_lenient_undefined_prefix() {
let html = r#"<html>
<body><c:widget>Content</c:widget></body>
</html>"#;
let doc = parse_html().one(html);
let result = apply_xmlns(&doc).unwrap();
let widget = result.select_first("widget").unwrap();
assert_eq!(widget.local_name().as_ref(), "widget");
assert_eq!(widget.prefix().unwrap().as_ref(), "c");
assert_eq!(widget.namespace_uri().as_ref(), ""); }
#[test]
#[cfg(feature = "namespaces")]
fn apply_xmlns_opts_strict_undefined_prefix() {
let html = r#"<html>
<body><c:widget foo:bar="test">Content</c:widget></body>
</html>"#;
let doc = parse_html().one(html);
let options = NsOptions {
namespaces: HashMap::new(),
strict: true,
};
let err = apply_xmlns_opts(&doc, &options)
.expect_err("Should return error for undefined prefixes");
match err {
NsError::UndefinedPrefix(new_doc, prefixes) => {
assert_eq!(prefixes.len(), 2);
assert!(prefixes.contains(&"c".to_string()));
assert!(prefixes.contains(&"foo".to_string()));
let widget = new_doc.select_first("widget").unwrap();
assert_eq!(widget.local_name().as_ref(), "widget");
}
_ => unreachable!("Only UndefinedPrefix errors are possible from strict mode"),
}
}
#[test]
#[cfg(feature = "namespaces")]
#[allow(deprecated)]
fn apply_xmlns_strict_deprecated() {
let html = r#"<html>
<body><c:widget foo:bar="test">Content</c:widget></body>
</html>"#;
let doc = parse_html().one(html);
let err = apply_xmlns_strict(&doc).expect_err("Should return error for undefined prefixes");
match err {
NsError::UndefinedPrefix(new_doc, prefixes) => {
assert_eq!(prefixes.len(), 2);
assert!(prefixes.contains(&"c".to_string()));
assert!(prefixes.contains(&"foo".to_string()));
let widget = new_doc.select_first("widget").unwrap();
assert_eq!(widget.local_name().as_ref(), "widget");
}
_ => unreachable!("Only UndefinedPrefix errors are possible from apply_xmlns_strict"),
}
}
#[test]
#[cfg(feature = "namespaces")]
fn apply_xmlns_opts_with_provided_namespaces() {
let html = r#"<html xmlns:c="https://example.com/custom">
<body>
<svg:rect />
<c:widget>Content</c:widget>
</body>
</html>"#;
let doc = parse_html().one(html);
let mut namespaces = HashMap::new();
namespaces.insert("svg".to_string(), ns!(svg));
let options = NsOptions {
namespaces,
strict: false,
};
let result = apply_xmlns_opts(&doc, &options).unwrap();
let rect = result.select_first("rect").unwrap();
assert_eq!(rect.local_name().as_ref(), "rect");
assert_eq!(rect.prefix().unwrap().as_ref(), "svg");
assert_eq!(rect.namespace_uri().as_ref(), "http://www.w3.org/2000/svg");
let widget = result.select_first("widget").unwrap();
assert_eq!(widget.local_name().as_ref(), "widget");
assert_eq!(widget.prefix().unwrap().as_ref(), "c");
assert_eq!(
widget.namespace_uri().as_ref(),
"https://example.com/custom"
);
}
#[test]
#[cfg(feature = "namespaces")]
fn apply_xmlns_opts_html_overrides_options() {
let html = r#"<html xmlns:custom="https://example.com/html-version">
<body><custom:widget>Content</custom:widget></body>
</html>"#;
let doc = parse_html().one(html);
let mut namespaces = HashMap::new();
namespaces.insert(
"custom".to_string(),
Namespace::from("https://example.com/options-version"),
);
let options = NsOptions {
namespaces,
strict: false,
};
let result = apply_xmlns_opts(&doc, &options).unwrap();
let widget = result.select_first("widget").unwrap();
assert_eq!(
widget.namespace_uri().as_ref(),
"https://example.com/html-version"
);
}
#[test]
#[cfg(feature = "namespaces")]
fn apply_xmlns_handles_template_contents() {
let html = r#"<html xmlns:c="https://example.com/custom">
<body>
<template>
<c:widget>Template content</c:widget>
</template>
</body>
</html>"#;
let doc = parse_html().one(html);
let result = apply_xmlns(&doc).unwrap();
if let Ok(template) = result.select_first("template") {
if let Some(elem_data) = template.as_node().as_element() {
assert!(
elem_data.template_contents.is_some(),
"Template should have contents"
);
if let Some(ref contents) = elem_data.template_contents {
let mut found_widget = false;
for child in contents.descendants() {
if let Some(element) = child.as_element() {
if element.name.local.as_ref() == "widget" {
found_widget = true;
assert_eq!(element.name.prefix.as_ref().unwrap().as_ref(), "c");
assert_eq!(element.name.ns.as_ref(), "https://example.com/custom");
}
}
}
assert!(found_widget, "Should find widget in template contents");
}
}
}
}
#[test]
fn apply_xmlns_preserves_comments() {
let html = r#"<html>
<!-- This is a comment -->
<body>Content</body>
</html>"#;
let doc = parse_html().one(html);
let result = apply_xmlns(&doc).unwrap();
let mut found_comment = false;
for node in result.descendants() {
if let Some(comment) = node.as_comment() {
assert_eq!(comment.borrow().trim(), "This is a comment");
found_comment = true;
}
}
assert!(found_comment, "Should preserve comments");
}
#[test]
fn apply_xmlns_preserves_doctype() {
let html = r#"<!DOCTYPE html>
<html>
<body>Content</body>
</html>"#;
let doc = parse_html().one(html);
let result = apply_xmlns(&doc).unwrap();
let mut found_doctype = false;
for node in result.children() {
if let Some(doctype) = node.as_doctype() {
assert_eq!(doctype.name.as_str(), "html");
found_doctype = true;
}
}
assert!(found_doctype, "Should preserve DOCTYPE");
}
#[test]
#[cfg(feature = "namespaces")]
fn apply_xmlns_processes_prefixed_attributes() {
let html = r#"<html xmlns:data="https://example.com/data">
<body>
<div data:id="123" data:type="widget">Content</div>
</body>
</html>"#;
let doc = parse_html().one(html);
let result = apply_xmlns(&doc).unwrap();
if let Ok(div) = result.select_first("div") {
let attrs = div.attributes.borrow();
let mut found_id = false;
let mut found_type = false;
for (name, attr) in &attrs.map {
if name.local.as_ref() == "id" && name.ns.as_ref() == "https://example.com/data" {
assert_eq!(attr.value, "123");
assert_eq!(attr.prefix.as_ref().unwrap().as_ref(), "data");
found_id = true;
}
if name.local.as_ref() == "type" && name.ns.as_ref() == "https://example.com/data" {
assert_eq!(attr.value, "widget");
assert_eq!(attr.prefix.as_ref().unwrap().as_ref(), "data");
found_type = true;
}
}
assert!(found_id, "Should find namespaced id attribute");
assert!(found_type, "Should find namespaced type attribute");
}
}
#[test]
fn apply_xmlns_without_html_element() {
let html = r#"<body><div>Content</div></body>"#;
let doc = parse_html().one(html);
let result = apply_xmlns(&doc).unwrap();
assert!(result.select_first("div").is_ok());
}
#[test]
fn html5ever_pi_handling() {
let html = r#"<?xml version="1.0"?><html><body>Test</body></html>"#;
let doc = parse_html().one(html);
let mut found_pi = false;
for node in doc.descendants() {
if node.as_processing_instruction().is_some() {
found_pi = true;
break;
}
}
assert!(
!found_pi,
"HTML5 parser should not create ProcessingInstruction nodes"
);
}
#[test]
#[cfg(feature = "namespaces")]
fn apply_xmlns_preserves_processing_instructions() {
let html = r#"<html xmlns:c="https://example.com/custom">
<body><c:widget>Content</c:widget></body>
</html>"#;
let doc = parse_html().one(html);
let pi = NodeRef::new_processing_instruction(
"xml-stylesheet".to_string(),
"href=\"style.css\"".to_string(),
);
if let Some(html_elem) = doc.children().next() {
html_elem.insert_before(pi.clone());
}
let result = apply_xmlns(&doc).unwrap();
let mut found_pi = false;
for node in result.descendants() {
if let Some(pi_data) = node.as_processing_instruction() {
let (target, data) = &*pi_data.borrow();
assert_eq!(target, "xml-stylesheet");
assert_eq!(data, "href=\"style.css\"");
found_pi = true;
break;
}
}
assert!(
found_pi,
"ProcessingInstruction should be preserved during apply_xmlns"
);
let widget = result.select_first("widget").unwrap();
assert_eq!(
widget.namespace_uri().as_ref(),
"https://example.com/custom"
);
}
#[test]
fn apply_xmlns_preserves_document_fragments() {
use crate::tree::NodeData;
let html = r#"<html>
<body>Content</body>
</html>"#;
let doc = parse_html().one(html);
let frag = NodeRef::new(NodeData::DocumentFragment);
let text_node = NodeRef::new_text("Fragment content".to_string());
frag.append(text_node);
if let Ok(body) = doc.select_first("body") {
body.as_node().append(frag.clone());
}
let result = apply_xmlns(&doc).unwrap();
let mut found_frag = false;
let mut found_text = false;
for node in result.descendants() {
if node.as_document_fragment().is_some() {
found_frag = true;
for child in node.children() {
if let Some(text) = child.as_text() {
assert_eq!(text.borrow().as_str(), "Fragment content");
found_text = true;
}
}
}
}
assert!(
found_frag,
"DocumentFragment should be preserved during apply_xmlns"
);
assert!(found_text, "DocumentFragment children should be preserved");
}
#[test]
fn apply_xmlns_removes_xmlns_attributes() {
let html = r#"<html xmlns:c="https://example.com/custom">
<body><c:widget>Content</c:widget></body>
</html>"#;
let doc = parse_html().one(html);
let result = apply_xmlns(&doc).unwrap();
if let Ok(html_elem) = result.select_first("html") {
let attrs = html_elem.attributes.borrow();
assert!(!attrs
.map
.iter()
.any(|(name, _)| { name.local.as_ref().starts_with("xmlns:") }));
}
}
}