use indexmap::IndexMap;
use quick_xml::{events::Event, Reader};
use sha2::{Digest, Sha256};
use std::collections::BTreeMap;
pub mod rules;
#[allow(non_camel_case_types)] pub struct DB_C14N {
#[allow(dead_code)]
config: super::determinism::DeterminismConfig,
version: String,
}
impl DB_C14N {
pub fn new(config: super::determinism::DeterminismConfig) -> Self {
Self {
config: config,
version: "4.3".to_string(), }
}
pub fn with_version(config: super::determinism::DeterminismConfig, version: String) -> Self {
Self {
config: config,
version,
}
}
fn detect_version(&self, xml: &str) -> String {
if xml.contains("http://ddex.net/xml/ern/382") {
"3.8.2".to_string()
} else if xml.contains("http://ddex.net/xml/ern/42") {
"4.2".to_string()
} else if xml.contains("http://ddex.net/xml/ern/43") {
"4.3".to_string()
} else {
self.version.clone() }
}
pub fn canonicalize(&self, xml: &str) -> Result<String, super::error::BuildError> {
let detected_version = self.detect_version(xml);
let doc = self.parse_xml(xml)?;
let canonical_doc = self.canonicalize_document(doc, &detected_version)?;
self.serialize_canonical(canonical_doc)
}
pub fn canonical_hash(&self, xml: &str) -> Result<String, super::error::BuildError> {
let mut hasher = Sha256::new();
hasher.update(xml.as_bytes());
let result = hasher.finalize();
Ok(format!("{:x}", result))
}
fn parse_xml(&self, xml: &str) -> Result<XmlDocument, super::error::BuildError> {
let mut reader = Reader::from_str(xml);
reader.config_mut().trim_text(true);
let mut buf = Vec::new();
let mut element_stack: Vec<XmlElement> = Vec::new();
let mut text_content = String::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
if !text_content.trim().is_empty() {
if let Some(parent) = element_stack.last_mut() {
parent
.children
.push(XmlNode::Text(text_content.trim().to_string()));
}
text_content.clear();
}
let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
let mut attributes = IndexMap::new();
for attr in e.attributes() {
let attr = attr.map_err(|e| {
super::error::BuildError::XmlGeneration(format!(
"Attribute error: {}",
e
))
})?;
let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
let value = String::from_utf8_lossy(&attr.value).to_string();
attributes.insert(key, value);
}
let element = XmlElement {
name,
attributes,
children: Vec::new(),
};
element_stack.push(element);
}
Ok(Event::Empty(ref e)) => {
if !text_content.trim().is_empty() {
if let Some(parent) = element_stack.last_mut() {
parent
.children
.push(XmlNode::Text(text_content.trim().to_string()));
}
text_content.clear();
}
let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
let mut attributes = IndexMap::new();
for attr in e.attributes() {
let attr = attr.map_err(|e| {
super::error::BuildError::XmlGeneration(format!(
"Attribute error: {}",
e
))
})?;
let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
let value = String::from_utf8_lossy(&attr.value).to_string();
attributes.insert(key, value);
}
let element = XmlElement {
name,
attributes,
children: Vec::new(),
};
if let Some(parent) = element_stack.last_mut() {
parent.children.push(XmlNode::Element(element));
} else {
return Ok(XmlDocument { root: element });
}
}
Ok(Event::End(_)) => {
if !text_content.trim().is_empty() {
if let Some(parent) = element_stack.last_mut() {
parent
.children
.push(XmlNode::Text(text_content.trim().to_string()));
}
text_content.clear();
}
if let Some(completed_element) = element_stack.pop() {
if let Some(parent) = element_stack.last_mut() {
parent.children.push(XmlNode::Element(completed_element));
} else {
return Ok(XmlDocument {
root: completed_element,
});
}
}
}
Ok(Event::Text(e)) => {
text_content.push_str(&e.unescape().map_err(|e| {
super::error::BuildError::XmlGeneration(format!(
"Text unescape error: {}",
e
))
})?);
}
Ok(Event::Comment(e)) => {
let comment = String::from_utf8_lossy(&e).to_string();
if let Some(parent) = element_stack.last_mut() {
parent.children.push(XmlNode::Comment(comment));
}
}
Ok(Event::Eof) => break,
Err(e) => {
return Err(super::error::BuildError::XmlGeneration(format!(
"XML parse error: {}",
e
)))
}
_ => {} }
buf.clear();
}
Err(super::error::BuildError::XmlGeneration(
"No root element found".to_string(),
))
}
fn canonicalize_document(
&self,
mut doc: XmlDocument,
version: &str,
) -> Result<XmlDocument, super::error::BuildError> {
self.canonicalize_element(&mut doc.root, version)?;
Ok(doc)
}
fn canonicalize_element(
&self,
element: &mut XmlElement,
version: &str,
) -> Result<(), super::error::BuildError> {
let sorted_attributes: BTreeMap<String, String> =
element.attributes.clone().into_iter().collect();
element.attributes = sorted_attributes.into_iter().collect();
self.apply_namespace_prefix_locking(&mut element.attributes, version)?;
self.sort_child_elements(&mut element.children, &element.name, version)?;
for child in &mut element.children {
match child {
XmlNode::Element(ref mut child_element) => {
self.canonicalize_element(child_element, version)?;
}
XmlNode::Text(ref mut text) => {
*text = self.normalize_whitespace(text);
}
XmlNode::Comment(_) => {
}
}
}
Ok(())
}
fn apply_namespace_prefix_locking(
&self,
attributes: &mut IndexMap<String, String>,
version: &str,
) -> Result<(), super::error::BuildError> {
let manager = rules::CanonicalNamespaceManager::new();
let mut namespace_declarations = IndexMap::new();
let mut other_attributes = IndexMap::new();
for (key, value) in attributes.iter() {
if key.starts_with("xmlns:") {
let prefix = key.strip_prefix("xmlns:").unwrap_or("");
namespace_declarations.insert(prefix.to_string(), value.clone());
} else if key == "xmlns" {
namespace_declarations.insert("".to_string(), value.clone()); } else {
other_attributes.insert(key.clone(), value.clone());
}
}
let canonical_declarations =
manager.canonicalize_namespaces(&namespace_declarations, version);
let mut updated_attrs = IndexMap::new();
for (prefix, uri) in canonical_declarations {
let key = if prefix.is_empty() {
"xmlns".to_string()
} else {
format!("xmlns:{}", prefix)
};
updated_attrs.insert(key, uri);
}
for (key, value) in other_attributes {
updated_attrs.insert(key, value);
}
*attributes = updated_attrs;
Ok(())
}
fn sort_child_elements(
&self,
children: &mut Vec<XmlNode>,
parent_name: &str,
version: &str,
) -> Result<(), super::error::BuildError> {
let manager = rules::CanonicalNamespaceManager::new();
if let Some(order) = manager.get_canonical_element_order(parent_name, version) {
let order_map: IndexMap<String, usize> = order
.iter()
.enumerate()
.map(|(i, name)| (name.clone(), i))
.collect();
children.sort_by(|a, b| match (a, b) {
(XmlNode::Element(elem_a), XmlNode::Element(elem_b)) => {
let order_a = order_map.get(&elem_a.name).unwrap_or(&usize::MAX);
let order_b = order_map.get(&elem_b.name).unwrap_or(&usize::MAX);
order_a
.cmp(order_b)
.then_with(|| elem_a.name.cmp(&elem_b.name))
}
(XmlNode::Element(_), _) => std::cmp::Ordering::Less,
(_, XmlNode::Element(_)) => std::cmp::Ordering::Greater,
_ => std::cmp::Ordering::Equal,
});
}
Ok(())
}
fn normalize_whitespace(&self, text: &str) -> String {
text.replace("\r\n", "\n")
.replace("\r", "\n")
.lines()
.map(|line| line.trim())
.filter(|line| !line.is_empty())
.collect::<Vec<_>>()
.join(" ")
}
fn serialize_canonical(&self, doc: XmlDocument) -> Result<String, super::error::BuildError> {
let mut output = Vec::new();
output.clear();
output.extend_from_slice(rules::XML_DECLARATION.as_bytes());
output.push(b'\n');
self.serialize_element(&doc.root, &mut output, 0)?;
let result = String::from_utf8(output).map_err(|e| {
super::error::BuildError::XmlGeneration(format!("UTF-8 conversion error: {}", e))
})?;
let canonical = result
.lines()
.map(|line| line.trim_end()) .collect::<Vec<_>>()
.join("\n");
Ok(format!("{}\n", canonical))
}
fn serialize_element(
&self,
element: &XmlElement,
output: &mut Vec<u8>,
indent_level: usize,
) -> Result<(), super::error::BuildError> {
let indent = " ".repeat(indent_level);
output.extend_from_slice(indent.as_bytes());
output.push(b'<');
output.extend_from_slice(element.name.as_bytes());
for (key, value) in &element.attributes {
output.push(b' ');
output.extend_from_slice(key.as_bytes());
output.extend_from_slice(b"=\"");
output
.extend_from_slice(html_escape::encode_double_quoted_attribute(&value).as_bytes());
output.push(b'"');
}
if element.children.is_empty() {
output.extend_from_slice(b"/>");
output.push(b'\n');
} else {
output.push(b'>');
let has_only_text = element
.children
.iter()
.all(|child| matches!(child, XmlNode::Text(_)));
if has_only_text {
for child in &element.children {
if let XmlNode::Text(text) = child {
output.extend_from_slice(html_escape::encode_text(text).as_bytes());
}
}
} else {
output.push(b'\n');
for child in &element.children {
match child {
XmlNode::Element(child_element) => {
self.serialize_element(child_element, output, indent_level + 1)?;
}
XmlNode::Text(text) => {
if !text.trim().is_empty() {
let child_indent = " ".repeat(indent_level + 1);
output.extend_from_slice(child_indent.as_bytes());
output.extend_from_slice(
html_escape::encode_text(text.trim()).as_bytes(),
);
output.push(b'\n');
}
}
XmlNode::Comment(comment) => {
let child_indent = " ".repeat(indent_level + 1);
output.extend_from_slice(child_indent.as_bytes());
output.extend_from_slice(b"<!--");
output.extend_from_slice(comment.as_bytes());
output.extend_from_slice(b"-->");
output.push(b'\n');
}
}
}
output.extend_from_slice(indent.as_bytes());
}
output.extend_from_slice(b"</");
output.extend_from_slice(element.name.as_bytes());
output.push(b'>');
output.push(b'\n');
}
Ok(())
}
}
struct XmlDocument {
root: XmlElement,
}
struct XmlElement {
name: String,
attributes: IndexMap<String, String>, children: Vec<XmlNode>,
}
enum XmlNode {
Element(XmlElement),
Text(String),
Comment(String),
}
#[cfg(test)]
mod tests;
#[cfg(test)]
pub fn create_test_canonicalizer() -> DB_C14N {
let config = super::determinism::DeterminismConfig::default();
DB_C14N::new(config)
}
#[cfg(test)]
pub fn create_test_canonicalizer_with_version(version: &str) -> DB_C14N {
let config = super::determinism::DeterminismConfig::default();
DB_C14N::with_version(config, version.to_string())
}