mod escape;
mod ns_common;
pub(crate) mod ns_exclusive;
pub(crate) mod ns_inclusive;
mod prefix;
pub(crate) mod serialize;
mod xml_base;
use std::collections::HashSet;
use roxmltree::{Document, Node};
use ns_exclusive::ExclusiveNsRenderer;
use ns_inclusive::InclusiveNsRenderer;
use serialize::{C14nConfig, serialize_canonical};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum C14nMode {
Inclusive1_0,
Inclusive1_1,
Exclusive1_0,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct C14nAlgorithm {
mode: C14nMode,
with_comments: bool,
inclusive_prefixes: HashSet<String>,
}
impl C14nAlgorithm {
pub fn mode(&self) -> C14nMode {
self.mode
}
pub fn with_comments(&self) -> bool {
self.with_comments
}
pub fn inclusive_prefixes(&self) -> &HashSet<String> {
&self.inclusive_prefixes
}
pub fn new(mode: C14nMode, with_comments: bool) -> Self {
Self {
mode,
with_comments,
inclusive_prefixes: HashSet::new(),
}
}
pub fn from_uri(uri: &str) -> Option<Self> {
let (mode, with_comments) = match uri {
"http://www.w3.org/TR/2001/REC-xml-c14n-20010315" => (C14nMode::Inclusive1_0, false),
"http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments" => {
(C14nMode::Inclusive1_0, true)
}
"http://www.w3.org/2006/12/xml-c14n11" => (C14nMode::Inclusive1_1, false),
"http://www.w3.org/2006/12/xml-c14n11#WithComments" => (C14nMode::Inclusive1_1, true),
"http://www.w3.org/2001/10/xml-exc-c14n#" => (C14nMode::Exclusive1_0, false),
"http://www.w3.org/2001/10/xml-exc-c14n#WithComments" => (C14nMode::Exclusive1_0, true),
_ => return None,
};
Some(Self {
mode,
with_comments,
inclusive_prefixes: HashSet::new(),
})
}
pub fn with_prefix_list(mut self, prefix_list: &str) -> Self {
self.inclusive_prefixes = prefix_list
.split_whitespace()
.map(|p| {
if p == "#default" {
String::new()
} else {
p.to_string()
}
})
.collect();
self
}
pub fn uri(&self) -> &'static str {
match (self.mode, self.with_comments) {
(C14nMode::Inclusive1_0, false) => "http://www.w3.org/TR/2001/REC-xml-c14n-20010315",
(C14nMode::Inclusive1_0, true) => {
"http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments"
}
(C14nMode::Inclusive1_1, false) => "http://www.w3.org/2006/12/xml-c14n11",
(C14nMode::Inclusive1_1, true) => "http://www.w3.org/2006/12/xml-c14n11#WithComments",
(C14nMode::Exclusive1_0, false) => "http://www.w3.org/2001/10/xml-exc-c14n#",
(C14nMode::Exclusive1_0, true) => "http://www.w3.org/2001/10/xml-exc-c14n#WithComments",
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum C14nError {
#[error("XML parse error: {0}")]
Parse(String),
#[error("invalid node reference")]
InvalidNode,
#[error("unsupported algorithm: {0}")]
UnsupportedAlgorithm(String),
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
}
pub fn canonicalize(
doc: &Document,
node_set: Option<&dyn Fn(Node) -> bool>,
algo: &C14nAlgorithm,
output: &mut Vec<u8>,
) -> Result<(), C14nError> {
match algo.mode {
C14nMode::Inclusive1_0 => {
let renderer = InclusiveNsRenderer;
let config = C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
};
serialize_canonical(doc, node_set, algo.with_comments, &renderer, config, output)
}
C14nMode::Inclusive1_1 => {
let renderer = InclusiveNsRenderer;
let config = C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: true,
};
serialize_canonical(doc, node_set, algo.with_comments, &renderer, config, output)
}
C14nMode::Exclusive1_0 => {
let renderer = ExclusiveNsRenderer::new(&algo.inclusive_prefixes);
let config = C14nConfig {
inherit_xml_attrs: false,
fixup_xml_base: false,
};
serialize_canonical(doc, node_set, algo.with_comments, &renderer, config, output)
}
}
}
pub fn canonicalize_xml(xml: &[u8], algo: &C14nAlgorithm) -> Result<Vec<u8>, C14nError> {
let xml_str =
std::str::from_utf8(xml).map_err(|e| C14nError::Parse(format!("invalid UTF-8: {e}")))?;
let doc = Document::parse(xml_str).map_err(|e| C14nError::Parse(e.to_string()))?;
let mut output = Vec::new();
canonicalize(&doc, None, algo, &mut output)?;
Ok(output)
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn from_uri_roundtrip() {
let uris = [
"http://www.w3.org/TR/2001/REC-xml-c14n-20010315",
"http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments",
"http://www.w3.org/2006/12/xml-c14n11",
"http://www.w3.org/2006/12/xml-c14n11#WithComments",
"http://www.w3.org/2001/10/xml-exc-c14n#",
"http://www.w3.org/2001/10/xml-exc-c14n#WithComments",
];
for uri in uris {
let algo = C14nAlgorithm::from_uri(uri).expect(uri);
assert_eq!(algo.uri(), uri);
}
}
#[test]
fn unknown_uri_returns_none() {
assert!(C14nAlgorithm::from_uri("http://example.com/unknown").is_none());
}
#[test]
fn prefix_list_parsing() {
let algo = C14nAlgorithm::new(C14nMode::Exclusive1_0, false)
.with_prefix_list("foo bar #default baz");
assert!(algo.inclusive_prefixes.contains("foo"));
assert!(algo.inclusive_prefixes.contains("bar"));
assert!(algo.inclusive_prefixes.contains("baz"));
assert!(algo.inclusive_prefixes.contains("")); assert_eq!(algo.inclusive_prefixes.len(), 4);
}
#[test]
fn canonicalize_xml_basic() {
let xml = b"<root b=\"2\" a=\"1\"><empty/></root>";
let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, false);
let result = canonicalize_xml(xml, &algo).expect("c14n");
assert_eq!(
String::from_utf8(result).expect("utf8"),
r#"<root a="1" b="2"><empty></empty></root>"#
);
}
#[test]
fn c14n_1_1_basic() {
let xml = b"<root b=\"2\" a=\"1\"><empty/></root>";
let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
let result = canonicalize_xml(xml, &algo).expect("c14n 1.1");
assert_eq!(
String::from_utf8(result).expect("utf8"),
r#"<root a="1" b="2"><empty></empty></root>"#
);
}
#[test]
fn c14n_1_1_with_comments() {
let xml = b"<root><!-- comment -->text</root>";
let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, true);
let result = canonicalize_xml(xml, &algo).expect("c14n 1.1 with comments");
assert_eq!(
String::from_utf8(result).expect("utf8"),
"<root><!-- comment -->text</root>"
);
}
#[test]
fn c14n_1_1_without_comments() {
let xml = b"<root><!-- comment -->text</root>";
let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
let result = canonicalize_xml(xml, &algo).expect("c14n 1.1 without comments");
assert_eq!(
String::from_utf8(result).expect("utf8"),
"<root>text</root>"
);
}
#[test]
fn c14n_1_1_namespaces() {
let xml = b"<root xmlns:a=\"http://a\" xmlns:b=\"http://b\"><child/></root>";
let algo_10 = C14nAlgorithm::new(C14nMode::Inclusive1_0, false);
let algo_11 = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
let result_10 = canonicalize_xml(xml, &algo_10).expect("1.0");
let result_11 = canonicalize_xml(xml, &algo_11).expect("1.1");
assert_eq!(result_10, result_11);
}
#[test]
fn c14n_1_1_xml_id_inherited_in_subset() {
use roxmltree::Document;
use std::collections::HashSet;
let xml = r#"<root xml:id="r1"><child>text</child></root>"#;
let doc = Document::parse(xml).expect("parse");
let child = doc.root_element().first_element_child().expect("child");
let mut ids = HashSet::new();
let mut stack = vec![child];
while let Some(n) = stack.pop() {
ids.insert(n.id());
for c in n.children() {
stack.push(c);
}
}
let pred = move |n: roxmltree::Node| ids.contains(&n.id());
let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
let mut out = Vec::new();
canonicalize(&doc, Some(&pred), &algo, &mut out).expect("c14n 1.1 subset");
let result = String::from_utf8(out).expect("utf8");
assert!(
result.contains(r#"xml:id="r1""#),
"xml:id should be inherited in C14N 1.1 subset; got: {result}"
);
}
}