Skip to main content

xml_sec/c14n/
mod.rs

1//! XML Canonicalization (C14N).
2//!
3//! Implements:
4//! - [Canonical XML 1.0](https://www.w3.org/TR/xml-c14n/) (inclusive)
5//! - [Canonical XML 1.1](https://www.w3.org/TR/xml-c14n11/) (inclusive; xml:id propagation and xml:base fixup)
6//! - [Exclusive XML Canonicalization 1.0](https://www.w3.org/TR/xml-exc-c14n/) (exclusive)
7//!
8//! # Example
9//!
10//! ```
11//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
12//! use xml_sec::c14n::{C14nAlgorithm, C14nMode, canonicalize_xml};
13//!
14//! let xml = b"<root b=\"2\" a=\"1\"><empty/></root>";
15//! let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, false);
16//! let canonical = canonicalize_xml(xml, &algo)?;
17//! assert_eq!(
18//!     String::from_utf8(canonical)?,
19//!     "<root a=\"1\" b=\"2\"><empty></empty></root>"
20//! );
21//! # Ok(())
22//! # }
23//! ```
24
25mod escape;
26mod ns_common;
27pub(crate) mod ns_exclusive;
28pub(crate) mod ns_inclusive;
29mod prefix;
30pub(crate) mod serialize;
31mod xml_base;
32
33use std::collections::HashSet;
34
35use roxmltree::{Document, Node};
36
37use ns_exclusive::ExclusiveNsRenderer;
38use ns_inclusive::InclusiveNsRenderer;
39use serialize::{C14nConfig, serialize_canonical};
40
41/// C14N algorithm mode (without the comments flag).
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum C14nMode {
44    /// Inclusive C14N 1.0 — all in-scope namespaces rendered.
45    Inclusive1_0,
46    /// Inclusive C14N 1.1 — like 1.0 with xml:id propagation and xml:base fixup.
47    Inclusive1_1,
48    /// Exclusive C14N 1.0 — only visibly-utilized namespaces rendered.
49    Exclusive1_0,
50}
51
52/// Full C14N algorithm identifier.
53///
54/// Constructed from algorithm URIs found in `<CanonicalizationMethod>` or
55/// `<Transform>` elements.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct C14nAlgorithm {
58    mode: C14nMode,
59    with_comments: bool,
60    /// For Exclusive C14N: prefixes forced via InclusiveNamespaces PrefixList.
61    /// `"#default"` is normalized to `""` (empty string) by `with_prefix_list()`.
62    inclusive_prefixes: HashSet<String>,
63}
64
65impl C14nAlgorithm {
66    /// The canonicalization mode.
67    pub fn mode(&self) -> C14nMode {
68        self.mode
69    }
70
71    /// Whether comment nodes are preserved.
72    pub fn with_comments(&self) -> bool {
73        self.with_comments
74    }
75
76    /// Prefixes forced via InclusiveNamespaces PrefixList (exclusive C14N).
77    pub fn inclusive_prefixes(&self) -> &HashSet<String> {
78        &self.inclusive_prefixes
79    }
80
81    /// Create a new algorithm with the given mode and comments flag.
82    pub fn new(mode: C14nMode, with_comments: bool) -> Self {
83        Self {
84            mode,
85            with_comments,
86            inclusive_prefixes: HashSet::new(),
87        }
88    }
89
90    /// Parse from an algorithm URI. Returns `None` for unrecognized URIs.
91    pub fn from_uri(uri: &str) -> Option<Self> {
92        let (mode, with_comments) = match uri {
93            "http://www.w3.org/TR/2001/REC-xml-c14n-20010315" => (C14nMode::Inclusive1_0, false),
94            "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments" => {
95                (C14nMode::Inclusive1_0, true)
96            }
97            "http://www.w3.org/2006/12/xml-c14n11" => (C14nMode::Inclusive1_1, false),
98            "http://www.w3.org/2006/12/xml-c14n11#WithComments" => (C14nMode::Inclusive1_1, true),
99            "http://www.w3.org/2001/10/xml-exc-c14n#" => (C14nMode::Exclusive1_0, false),
100            "http://www.w3.org/2001/10/xml-exc-c14n#WithComments" => (C14nMode::Exclusive1_0, true),
101            _ => return None,
102        };
103        Some(Self {
104            mode,
105            with_comments,
106            inclusive_prefixes: HashSet::new(),
107        })
108    }
109
110    /// Set the InclusiveNamespaces PrefixList (exclusive C14N only).
111    /// `"#default"` is normalized to empty string `""`.
112    ///
113    /// Only meaningful for [`C14nMode::Exclusive1_0`]. For inclusive modes,
114    /// the prefix list is ignored during canonicalization.
115    pub fn with_prefix_list(mut self, prefix_list: &str) -> Self {
116        self.inclusive_prefixes = prefix_list
117            .split_whitespace()
118            .map(|p| {
119                if p == "#default" {
120                    String::new()
121                } else {
122                    p.to_string()
123                }
124            })
125            .collect();
126        self
127    }
128
129    /// Get the algorithm URI for this configuration.
130    pub fn uri(&self) -> &'static str {
131        match (self.mode, self.with_comments) {
132            (C14nMode::Inclusive1_0, false) => "http://www.w3.org/TR/2001/REC-xml-c14n-20010315",
133            (C14nMode::Inclusive1_0, true) => {
134                "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments"
135            }
136            (C14nMode::Inclusive1_1, false) => "http://www.w3.org/2006/12/xml-c14n11",
137            (C14nMode::Inclusive1_1, true) => "http://www.w3.org/2006/12/xml-c14n11#WithComments",
138            (C14nMode::Exclusive1_0, false) => "http://www.w3.org/2001/10/xml-exc-c14n#",
139            (C14nMode::Exclusive1_0, true) => "http://www.w3.org/2001/10/xml-exc-c14n#WithComments",
140        }
141    }
142}
143
144/// Error type for C14N operations.
145#[derive(Debug, thiserror::Error)]
146pub enum C14nError {
147    /// XML parsing error.
148    #[error("XML parse error: {0}")]
149    Parse(String),
150    /// Invalid node reference.
151    #[error("invalid node reference")]
152    InvalidNode,
153    /// Algorithm not yet implemented.
154    #[error("unsupported algorithm: {0}")]
155    UnsupportedAlgorithm(String),
156    /// I/O error.
157    #[error("I/O error: {0}")]
158    Io(#[from] std::io::Error),
159}
160
161/// Canonicalize an XML document or document subset.
162///
163/// - `doc`: parsed roxmltree document (read-only DOM).
164/// - `node_set`: optional predicate controlling which nodes appear in output.
165///   `None` means the entire document.
166/// - `algo`: algorithm parameters (mode, comments, prefix list).
167/// - `output`: byte buffer receiving canonical XML.
168pub fn canonicalize(
169    doc: &Document,
170    node_set: Option<&dyn Fn(Node) -> bool>,
171    algo: &C14nAlgorithm,
172    output: &mut Vec<u8>,
173) -> Result<(), C14nError> {
174    // inherit_xml_attrs: Inclusive C14N inherits xml:* attrs from ancestors
175    // per §2.4. Exclusive C14N explicitly omits this per Exc-C14N §3.
176    // fixup_xml_base: C14N 1.1 resolves relative xml:base URIs via RFC 3986.
177    match algo.mode {
178        C14nMode::Inclusive1_0 => {
179            let renderer = InclusiveNsRenderer;
180            let config = C14nConfig {
181                inherit_xml_attrs: true,
182                fixup_xml_base: false,
183            };
184            serialize_canonical(doc, node_set, algo.with_comments, &renderer, config, output)
185        }
186        C14nMode::Inclusive1_1 => {
187            let renderer = InclusiveNsRenderer;
188            let config = C14nConfig {
189                inherit_xml_attrs: true,
190                fixup_xml_base: true,
191            };
192            serialize_canonical(doc, node_set, algo.with_comments, &renderer, config, output)
193        }
194        C14nMode::Exclusive1_0 => {
195            let renderer = ExclusiveNsRenderer::new(&algo.inclusive_prefixes);
196            let config = C14nConfig {
197                inherit_xml_attrs: false,
198                fixup_xml_base: false,
199            };
200            serialize_canonical(doc, node_set, algo.with_comments, &renderer, config, output)
201        }
202    }
203}
204
205/// Convenience: parse XML bytes and canonicalize the whole document.
206///
207/// Input must be valid UTF-8 (XML 1.0 documents are UTF-8 or declare their
208/// encoding; roxmltree only accepts UTF-8). Returns `C14nError::Parse` for
209/// invalid UTF-8 or malformed XML.
210pub fn canonicalize_xml(xml: &[u8], algo: &C14nAlgorithm) -> Result<Vec<u8>, C14nError> {
211    let xml_str =
212        std::str::from_utf8(xml).map_err(|e| C14nError::Parse(format!("invalid UTF-8: {e}")))?;
213    let doc = Document::parse(xml_str).map_err(|e| C14nError::Parse(e.to_string()))?;
214    let mut output = Vec::new();
215    canonicalize(&doc, None, algo, &mut output)?;
216    Ok(output)
217}
218
219#[cfg(test)]
220#[allow(clippy::unwrap_used)]
221mod tests {
222    use super::*;
223
224    #[test]
225    fn from_uri_roundtrip() {
226        let uris = [
227            "http://www.w3.org/TR/2001/REC-xml-c14n-20010315",
228            "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments",
229            "http://www.w3.org/2006/12/xml-c14n11",
230            "http://www.w3.org/2006/12/xml-c14n11#WithComments",
231            "http://www.w3.org/2001/10/xml-exc-c14n#",
232            "http://www.w3.org/2001/10/xml-exc-c14n#WithComments",
233        ];
234        for uri in uris {
235            let algo = C14nAlgorithm::from_uri(uri).expect(uri);
236            assert_eq!(algo.uri(), uri);
237        }
238    }
239
240    #[test]
241    fn unknown_uri_returns_none() {
242        assert!(C14nAlgorithm::from_uri("http://example.com/unknown").is_none());
243    }
244
245    #[test]
246    fn prefix_list_parsing() {
247        let algo = C14nAlgorithm::new(C14nMode::Exclusive1_0, false)
248            .with_prefix_list("foo bar #default baz");
249        assert!(algo.inclusive_prefixes.contains("foo"));
250        assert!(algo.inclusive_prefixes.contains("bar"));
251        assert!(algo.inclusive_prefixes.contains("baz"));
252        assert!(algo.inclusive_prefixes.contains("")); // #default → ""
253        assert_eq!(algo.inclusive_prefixes.len(), 4);
254    }
255
256    #[test]
257    fn canonicalize_xml_basic() {
258        let xml = b"<root b=\"2\" a=\"1\"><empty/></root>";
259        let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, false);
260        let result = canonicalize_xml(xml, &algo).expect("c14n");
261        assert_eq!(
262            String::from_utf8(result).expect("utf8"),
263            r#"<root a="1" b="2"><empty></empty></root>"#
264        );
265    }
266
267    #[test]
268    fn c14n_1_1_basic() {
269        // C14N 1.1 serialization is identical to 1.0 for full documents.
270        let xml = b"<root b=\"2\" a=\"1\"><empty/></root>";
271        let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
272        let result = canonicalize_xml(xml, &algo).expect("c14n 1.1");
273        assert_eq!(
274            String::from_utf8(result).expect("utf8"),
275            r#"<root a="1" b="2"><empty></empty></root>"#
276        );
277    }
278
279    #[test]
280    fn c14n_1_1_with_comments() {
281        let xml = b"<root><!-- comment -->text</root>";
282        let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, true);
283        let result = canonicalize_xml(xml, &algo).expect("c14n 1.1 with comments");
284        assert_eq!(
285            String::from_utf8(result).expect("utf8"),
286            "<root><!-- comment -->text</root>"
287        );
288    }
289
290    #[test]
291    fn c14n_1_1_without_comments() {
292        let xml = b"<root><!-- comment -->text</root>";
293        let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
294        let result = canonicalize_xml(xml, &algo).expect("c14n 1.1 without comments");
295        assert_eq!(
296            String::from_utf8(result).expect("utf8"),
297            "<root>text</root>"
298        );
299    }
300
301    #[test]
302    fn c14n_1_1_namespaces() {
303        // C14N 1.1 renders all in-scope namespaces like 1.0.
304        let xml = b"<root xmlns:a=\"http://a\" xmlns:b=\"http://b\"><child/></root>";
305        let algo_10 = C14nAlgorithm::new(C14nMode::Inclusive1_0, false);
306        let algo_11 = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
307        let result_10 = canonicalize_xml(xml, &algo_10).expect("1.0");
308        let result_11 = canonicalize_xml(xml, &algo_11).expect("1.1");
309        // For full documents, 1.0 and 1.1 produce identical output.
310        assert_eq!(result_10, result_11);
311    }
312
313    #[test]
314    fn c14n_1_1_xml_id_inherited_in_subset() {
315        // C14N 1.1 propagates xml:id to document subsets, just like xml:lang.
316        use roxmltree::Document;
317        use std::collections::HashSet;
318
319        let xml = r#"<root xml:id="r1"><child>text</child></root>"#;
320        let doc = Document::parse(xml).expect("parse");
321        let child = doc.root_element().first_element_child().expect("child");
322
323        // Build subset: child + its descendants, excluding root
324        let mut ids = HashSet::new();
325        let mut stack = vec![child];
326        while let Some(n) = stack.pop() {
327            ids.insert(n.id());
328            for c in n.children() {
329                stack.push(c);
330            }
331        }
332        let pred = move |n: roxmltree::Node| ids.contains(&n.id());
333
334        let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
335        let mut out = Vec::new();
336        canonicalize(&doc, Some(&pred), &algo, &mut out).expect("c14n 1.1 subset");
337        let result = String::from_utf8(out).expect("utf8");
338
339        // xml:id="r1" should be inherited from root onto child
340        assert!(
341            result.contains(r#"xml:id="r1""#),
342            "xml:id should be inherited in C14N 1.1 subset; got: {result}"
343        );
344    }
345}