use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct XmlFragment {
pub raw_content: String,
pub element_name: String,
pub namespace_uri: Option<String>,
pub namespace_prefix: Option<String>,
pub namespace_declarations: IndexMap<String, String>,
pub attributes: IndexMap<String, String>,
pub children: Vec<XmlFragment>,
pub text_content: Option<String>,
pub processing_instructions: Vec<ProcessingInstruction>,
pub comments: Vec<Comment>,
pub position_hint: Option<usize>,
pub preserve_formatting: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub enum CommentPosition {
Before,
FirstChild,
LastChild,
After,
Inline,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Comment {
pub content: String,
pub position: CommentPosition,
pub xpath: Option<String>,
pub line_number: Option<usize>,
pub column_number: Option<usize>,
pub preserve_formatting: bool,
pub processing_hints: IndexMap<String, String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ProcessingInstruction {
pub target: String,
pub data: Option<String>,
}
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct Extensions {
pub fragments: IndexMap<String, XmlFragment>,
pub global_namespaces: IndexMap<String, String>,
pub document_processing_instructions: Vec<ProcessingInstruction>,
pub document_comments: Vec<Comment>,
#[serde(flatten)]
pub legacy_data: HashMap<String, serde_json::Value>,
}
impl XmlFragment {
pub fn new(element_name: String, raw_content: String) -> Self {
Self {
raw_content,
element_name,
namespace_uri: None,
namespace_prefix: None,
namespace_declarations: IndexMap::new(),
attributes: IndexMap::new(),
children: Vec::new(),
text_content: None,
processing_instructions: Vec::new(),
comments: Vec::new(),
position_hint: None,
preserve_formatting: false,
}
}
pub fn with_namespace(
element_name: String,
namespace_uri: Option<String>,
namespace_prefix: Option<String>,
raw_content: String,
) -> Self {
Self {
raw_content,
element_name,
namespace_uri,
namespace_prefix,
namespace_declarations: IndexMap::new(),
attributes: IndexMap::new(),
children: Vec::new(),
text_content: None,
processing_instructions: Vec::new(),
comments: Vec::new(),
position_hint: None,
preserve_formatting: false,
}
}
pub fn qualified_name(&self) -> String {
if let Some(ref prefix) = self.namespace_prefix {
format!("{}:{}", prefix, self.element_name)
} else {
self.element_name.clone()
}
}
pub fn is_from_namespace(&self, namespace_uri: &str) -> bool {
self.namespace_uri
.as_ref()
.is_some_and(|uri| uri == namespace_uri)
}
pub fn add_child(&mut self, child: XmlFragment) {
self.children.push(child);
}
pub fn add_attribute(&mut self, name: String, value: String) {
self.attributes.insert(name, value);
}
pub fn add_namespace_declaration(&mut self, prefix: String, uri: String) {
self.namespace_declarations.insert(prefix, uri);
}
pub fn set_position_hint(&mut self, position: usize) {
self.position_hint = Some(position);
}
pub fn preserve_formatting(&mut self) {
self.preserve_formatting = true;
}
pub fn to_canonical_xml(&self, indent_level: usize) -> String {
if self.preserve_formatting {
return self.raw_content.clone();
}
let indent = " ".repeat(indent_level);
let mut xml = String::new();
xml.push_str(&format!("{}<{}", indent, self.qualified_name()));
let mut sorted_ns: Vec<_> = self.namespace_declarations.iter().collect();
sorted_ns.sort_by_key(|(prefix, _)| prefix.as_str());
for (prefix, uri) in sorted_ns {
if prefix.is_empty() {
xml.push_str(&format!(" xmlns=\"{}\"", uri));
} else {
xml.push_str(&format!(" xmlns:{}=\"{}\"", prefix, uri));
}
}
let mut sorted_attrs: Vec<_> = self.attributes.iter().collect();
sorted_attrs.sort_by_key(|(name, _)| name.as_str());
for (name, value) in sorted_attrs {
xml.push_str(&format!(
" {}=\"{}\"",
name,
html_escape::encode_double_quoted_attribute(value)
));
}
if let Some(ref text) = self.text_content {
xml.push('>');
xml.push_str(&html_escape::encode_text(text));
xml.push_str(&format!("</{}>", self.qualified_name()));
} else if self.children.is_empty()
&& self.processing_instructions.is_empty()
&& self.comments.is_empty()
{
xml.push_str("/>");
} else {
xml.push_str(">\n");
for pi in &self.processing_instructions {
xml.push_str(&format!("{} <?{}", indent, pi.target));
if let Some(ref data) = pi.data {
xml.push(' ');
xml.push_str(data);
}
xml.push_str("?>\n");
}
for comment in &self.comments {
let comment_indent = match comment.position {
CommentPosition::Before | CommentPosition::After => indent.clone(),
CommentPosition::FirstChild | CommentPosition::LastChild => {
format!("{} ", indent)
}
CommentPosition::Inline => String::new(),
};
xml.push_str(&format!("{}{}\n", comment_indent, comment.to_xml()));
}
for child in &self.children {
xml.push_str(&child.to_canonical_xml(indent_level + 1));
xml.push('\n');
}
xml.push_str(&format!("{}</{}>", indent, self.qualified_name()));
}
xml
}
}
impl Comment {
pub fn new(content: String, position: CommentPosition) -> Self {
Self {
content,
position,
xpath: None,
line_number: None,
column_number: None,
preserve_formatting: false,
processing_hints: IndexMap::new(),
}
}
pub fn with_location(
content: String,
position: CommentPosition,
xpath: Option<String>,
line_number: Option<usize>,
column_number: Option<usize>,
) -> Self {
Self {
content,
position,
xpath,
line_number,
column_number,
preserve_formatting: false,
processing_hints: IndexMap::new(),
}
}
pub fn document_comment(content: String) -> Self {
Self::new(content, CommentPosition::Before)
}
pub fn preserve_formatting(mut self) -> Self {
self.preserve_formatting = true;
self
}
pub fn with_hint(mut self, key: String, value: String) -> Self {
self.processing_hints.insert(key, value);
self
}
pub fn canonical_content(&self) -> String {
if self.preserve_formatting {
return self.content.clone();
}
self.content.trim().to_string()
}
pub fn to_xml(&self) -> String {
let content = if self.preserve_formatting {
self.content.clone()
} else {
self.content
.trim()
.replace("--", "- -")
.replace("<!--", "<!--")
.replace("-->", "-->")
};
format!("<!--{}-->", content)
}
}
impl ProcessingInstruction {
pub fn new(target: String, data: Option<String>) -> Self {
Self { target, data }
}
}
impl Extensions {
pub fn new() -> Self {
Self {
fragments: IndexMap::new(),
global_namespaces: IndexMap::new(),
document_processing_instructions: Vec::new(),
document_comments: Vec::new(),
legacy_data: HashMap::new(),
}
}
pub fn add_fragment(&mut self, location: String, fragment: XmlFragment) {
self.fragments.insert(location, fragment);
}
pub fn get_fragment(&self, location: &str) -> Option<&XmlFragment> {
self.fragments.get(location)
}
pub fn get_fragments_matching(&self, pattern: &str) -> Vec<(&String, &XmlFragment)> {
self.fragments
.iter()
.filter(|(location, _)| location.starts_with(pattern))
.collect()
}
pub fn add_global_namespace(&mut self, prefix: String, uri: String) {
self.global_namespaces.insert(prefix, uri);
}
pub fn add_document_processing_instruction(&mut self, pi: ProcessingInstruction) {
self.document_processing_instructions.push(pi);
}
pub fn add_document_comment(&mut self, comment: String) {
self.document_comments
.push(Comment::document_comment(comment));
}
pub fn add_document_comment_structured(&mut self, comment: Comment) {
self.document_comments.push(comment);
}
pub fn insert(&mut self, key: String, value: serde_json::Value) {
self.legacy_data.insert(key, value);
}
pub fn get(&self, key: &str) -> Option<&serde_json::Value> {
self.legacy_data.get(key)
}
pub fn is_empty(&self) -> bool {
self.fragments.is_empty()
&& self.global_namespaces.is_empty()
&& self.document_processing_instructions.is_empty()
&& self.document_comments.is_empty()
&& self.legacy_data.is_empty()
}
pub fn count(&self) -> usize {
self.fragments.len()
+ self.global_namespaces.len()
+ self.document_processing_instructions.len()
+ self.document_comments.len()
+ self.legacy_data.len()
}
pub fn merge(&mut self, other: Extensions) {
for (location, fragment) in other.fragments {
self.fragments.insert(location, fragment);
}
for (prefix, uri) in other.global_namespaces {
self.global_namespaces.insert(prefix, uri);
}
self.document_processing_instructions
.extend(other.document_processing_instructions);
self.document_comments.extend(other.document_comments);
for (key, value) in other.legacy_data {
self.legacy_data.insert(key, value);
}
}
pub fn clear(&mut self) {
self.fragments.clear();
self.global_namespaces.clear();
self.document_processing_instructions.clear();
self.document_comments.clear();
self.legacy_data.clear();
}
}
pub mod utils {
use super::*;
pub fn generate_location_key(
element_path: &[&str],
namespace_uri: Option<&str>,
element_name: &str,
) -> String {
let path = element_path.join("/");
match namespace_uri {
Some(ns) => format!("{}/{}/{}", path, ns, element_name),
None => format!("{}/{}", path, element_name),
}
}
pub fn is_ddex_namespace(namespace_uri: &str) -> bool {
const DDEX_NAMESPACES: &[&str] = &[
"http://ddex.net/xml/ern/382",
"http://ddex.net/xml/ern/42",
"http://ddex.net/xml/ern/43",
"http://ddex.net/xml/avs",
"http://www.w3.org/2001/XMLSchema-instance",
];
DDEX_NAMESPACES.contains(&namespace_uri)
}
pub fn extract_namespace_prefix(qualified_name: &str) -> Option<&str> {
qualified_name
.split(':')
.next()
.filter(|prefix| !prefix.is_empty())
}
pub fn extract_local_name(qualified_name: &str) -> &str {
qualified_name
.split(':')
.next_back()
.unwrap_or(qualified_name)
}
pub fn validate_xml_fragment(fragment: &XmlFragment) -> Result<(), String> {
if fragment.element_name.is_empty() {
return Err("Element name cannot be empty".to_string());
}
if fragment.raw_content.is_empty() {
return Err("Raw content cannot be empty".to_string());
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_xml_fragment_creation() {
let fragment = XmlFragment::new(
"customElement".to_string(),
"<customElement>content</customElement>".to_string(),
);
assert_eq!(fragment.element_name, "customElement");
assert_eq!(
fragment.raw_content,
"<customElement>content</customElement>"
);
assert_eq!(fragment.qualified_name(), "customElement");
}
#[test]
fn test_xml_fragment_with_namespace() {
let fragment = XmlFragment::with_namespace(
"customElement".to_string(),
Some("http://example.com/custom".to_string()),
Some("custom".to_string()),
"<custom:customElement>content</custom:customElement>".to_string(),
);
assert_eq!(fragment.qualified_name(), "custom:customElement");
assert!(fragment.is_from_namespace("http://example.com/custom"));
}
#[test]
fn test_extensions_container() {
let mut extensions = Extensions::new();
assert!(extensions.is_empty());
let fragment = XmlFragment::new("test".to_string(), "<test/>".to_string());
extensions.add_fragment("message/test".to_string(), fragment);
assert!(!extensions.is_empty());
assert_eq!(extensions.count(), 1);
}
#[test]
fn test_canonical_xml_generation() {
let mut fragment = XmlFragment::new(
"customElement".to_string(),
"<customElement attr=\"value\">text</customElement>".to_string(),
);
fragment.add_attribute("attr".to_string(), "value".to_string());
fragment.text_content = Some("text".to_string());
let xml = fragment.to_canonical_xml(0);
assert!(xml.contains("<customElement attr=\"value\">text</customElement>"));
}
#[test]
fn test_location_key_generation() {
let key = utils::generate_location_key(
&["message", "header"],
Some("http://example.com/ns"),
"customElement",
);
assert_eq!(key, "message/header/http://example.com/ns/customElement");
}
#[test]
fn test_ddex_namespace_detection() {
assert!(utils::is_ddex_namespace("http://ddex.net/xml/ern/43"));
assert!(utils::is_ddex_namespace("http://ddex.net/xml/avs"));
assert!(!utils::is_ddex_namespace("http://example.com/custom"));
}
}
#[cfg(test)]
pub mod test_data;
#[cfg(test)]
mod comprehensive_tests;