Skip to main content

xml_canonical/
lib.rs

1//! XML Canonicalization (C14N) library for Rust.
2//!
3//! This library provides implementations of the W3C XML Canonicalization standards:
4//!
5//! - **Canonical XML 1.0 (C14N)** - Standard canonicalization that preserves all namespace
6//!   declarations in scope.
7//! - **Exclusive Canonical XML 1.0 (Exc-C14N)** - Canonicalization that only outputs
8//!   "visibly utilized" namespaces, commonly used for XML Digital Signatures.
9//!
10//! # Quick Start
11//!
12//! ```
13//! use xml_canonical::{canonicalize, Algorithm};
14//!
15//! let xml = r#"<root xmlns="http://example.com" id="1"><child/></root>"#;
16//!
17//! // Using the simple API
18//! let canonical = canonicalize(xml, Algorithm::C14n).unwrap();
19//!
20//! // Using Exclusive C14N
21//! let exclusive = canonicalize(xml, Algorithm::ExcC14n).unwrap();
22//! ```
23//!
24//! # Algorithms
25//!
26//! ## Canonical XML 1.0
27//!
28//! Standard canonicalization that:
29//! - Outputs all namespace declarations that are in scope
30//! - Sorts namespace declarations (default first, then by prefix)
31//! - Sorts attributes (by namespace URI, then local name)
32//! - Normalizes attribute values
33//! - Converts empty elements to start-end tag pairs
34//!
35//! ## Exclusive Canonical XML 1.0
36//!
37//! Exclusive canonicalization that:
38//! - Only outputs namespaces that are "visibly utilized"
39//! - A namespace is visibly utilized if the element or an attribute uses that prefix
40//! - Supports `InclusiveNamespacesPrefixList` for forcing certain namespaces
41//!
42//! # Advanced Usage
43//!
44//! For more control, use the processor types directly:
45//!
46//! ```
47//! use xml_canonical::{Document, C14nProcessor, C14nOptions};
48//!
49//! let xml = "<root><!-- comment --></root>";
50//! let doc = Document::parse(xml).unwrap();
51//!
52//! // Canonicalize with comments included
53//! let processor = C14nProcessor::new(C14nOptions::with_comments());
54//! let result = processor.process(&doc).unwrap();
55//! assert!(result.contains("<!-- comment -->"));
56//! ```
57//!
58//! # Subset Canonicalization
59//!
60//! You can canonicalize a subset of nodes using node IDs:
61//!
62//! ```
63//! use xml_canonical::{Document, C14nProcessor, C14nOptions};
64//! use std::collections::HashSet;
65//!
66//! let xml = "<root><a/><b/><c/></root>";
67//! let doc = Document::parse(xml).unwrap();
68//!
69//! // Create a node set (you would typically use XPath to select nodes)
70//! let mut node_set = HashSet::new();
71//! node_set.insert(0); // Document node
72//! // Add other nodes as needed...
73//!
74//! let options = C14nOptions::new().with_node_set(node_set);
75//! let processor = C14nProcessor::new(options);
76//! let result = processor.process(&doc).unwrap();
77//! ```
78
79#![warn(missing_docs)]
80#![warn(rust_2018_idioms)]
81
82mod attribute;
83mod c14n;
84mod error;
85mod exc_c14n;
86mod namespace;
87mod node;
88mod writer;
89
90// Re-export main types
91pub use c14n::{canonicalize as c14n_canonicalize, canonicalize_with_comments, C14nOptions, C14nProcessor};
92pub use error::{Error, Result};
93pub use exc_c14n::{
94    canonicalize_exclusive, canonicalize_exclusive_with_comments,
95    canonicalize_exclusive_with_prefixes, ExcC14nOptions, ExcC14nProcessor,
96};
97pub use node::{Document, Node, NodeId};
98
99use std::io::Write;
100
101/// Canonicalization algorithm variants.
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub enum Algorithm {
104    /// Canonical XML 1.0 without comments.
105    C14n,
106    /// Canonical XML 1.0 with comments.
107    C14nWithComments,
108    /// Exclusive Canonical XML 1.0 without comments.
109    ExcC14n,
110    /// Exclusive Canonical XML 1.0 with comments.
111    ExcC14nWithComments,
112}
113
114impl Algorithm {
115    /// Returns the URI identifier for this algorithm.
116    pub fn uri(&self) -> &'static str {
117        match self {
118            Algorithm::C14n => "http://www.w3.org/TR/2001/REC-xml-c14n-20010315",
119            Algorithm::C14nWithComments => {
120                "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments"
121            }
122            Algorithm::ExcC14n => "http://www.w3.org/2001/10/xml-exc-c14n#",
123            Algorithm::ExcC14nWithComments => {
124                "http://www.w3.org/2001/10/xml-exc-c14n#WithComments"
125            }
126        }
127    }
128
129    /// Returns whether this algorithm includes comments.
130    pub fn includes_comments(&self) -> bool {
131        matches!(
132            self,
133            Algorithm::C14nWithComments | Algorithm::ExcC14nWithComments
134        )
135    }
136
137    /// Returns whether this is an exclusive canonicalization algorithm.
138    pub fn is_exclusive(&self) -> bool {
139        matches!(self, Algorithm::ExcC14n | Algorithm::ExcC14nWithComments)
140    }
141}
142
143/// Canonicalizes an XML string using the specified algorithm.
144///
145/// # Arguments
146///
147/// * `xml` - The XML string to canonicalize
148/// * `algorithm` - The canonicalization algorithm to use
149///
150/// # Returns
151///
152/// The canonicalized XML as a string, or an error if parsing fails.
153///
154/// # Example
155///
156/// ```
157/// use xml_canonical::{canonicalize, Algorithm};
158///
159/// let xml = r#"<root id="1" class="main"/>"#;
160/// let result = canonicalize(xml, Algorithm::C14n).unwrap();
161/// assert_eq!(result, r#"<root class="main" id="1"></root>"#);
162/// ```
163pub fn canonicalize(xml: &str, algorithm: Algorithm) -> Result<String> {
164    match algorithm {
165        Algorithm::C14n => c14n_canonicalize(xml),
166        Algorithm::C14nWithComments => canonicalize_with_comments(xml),
167        Algorithm::ExcC14n => canonicalize_exclusive(xml),
168        Algorithm::ExcC14nWithComments => canonicalize_exclusive_with_comments(xml),
169    }
170}
171
172/// Canonicalizes an XML string to a writer using the specified algorithm.
173///
174/// # Arguments
175///
176/// * `xml` - The XML string to canonicalize
177/// * `algorithm` - The canonicalization algorithm to use
178/// * `writer` - The writer to output the canonical form to
179///
180/// # Returns
181///
182/// `Ok(())` on success, or an error if parsing or writing fails.
183///
184/// # Example
185///
186/// ```
187/// use xml_canonical::{canonicalize_to, Algorithm};
188///
189/// let xml = "<root/>";
190/// let mut output = Vec::new();
191/// canonicalize_to(xml, Algorithm::C14n, &mut output).unwrap();
192/// assert_eq!(String::from_utf8(output).unwrap(), "<root></root>");
193/// ```
194pub fn canonicalize_to<W: Write>(xml: &str, algorithm: Algorithm, writer: W) -> Result<()> {
195    let doc = Document::parse(xml)?;
196
197    match algorithm {
198        Algorithm::C14n => {
199            C14nProcessor::new(C14nOptions::new()).process_to_writer(&doc, writer)
200        }
201        Algorithm::C14nWithComments => {
202            C14nProcessor::new(C14nOptions::with_comments()).process_to_writer(&doc, writer)
203        }
204        Algorithm::ExcC14n => {
205            ExcC14nProcessor::new(ExcC14nOptions::new()).process_to_writer(&doc, writer)
206        }
207        Algorithm::ExcC14nWithComments => {
208            ExcC14nProcessor::new(ExcC14nOptions::with_comments()).process_to_writer(&doc, writer)
209        }
210    }
211}
212
213/// Canonicalizes an XML string with exclusive C14N and inclusive namespace prefixes.
214///
215/// # Arguments
216///
217/// * `xml` - The XML string to canonicalize
218/// * `inclusive_prefixes` - List of namespace prefixes to always include
219/// * `with_comments` - Whether to include comments in the output
220///
221/// # Returns
222///
223/// The canonicalized XML as a string, or an error if parsing fails.
224///
225/// # Example
226///
227/// ```
228/// use xml_canonical::canonicalize_exclusive_with_options;
229///
230/// let xml = r#"<root xmlns:keep="http://keep.com" xmlns:drop="http://drop.com"/>"#;
231/// let result = canonicalize_exclusive_with_options(
232///     xml,
233///     &["keep".to_string()],
234///     false
235/// ).unwrap();
236/// assert!(result.contains("xmlns:keep"));
237/// assert!(!result.contains("xmlns:drop"));
238/// ```
239pub fn canonicalize_exclusive_with_options(
240    xml: &str,
241    inclusive_prefixes: &[String],
242    with_comments: bool,
243) -> Result<String> {
244    let doc = Document::parse(xml)?;
245
246    let options = if with_comments {
247        ExcC14nOptions::with_comments()
248    } else {
249        ExcC14nOptions::new()
250    }
251    .with_inclusive_prefixes(inclusive_prefixes.to_vec());
252
253    ExcC14nProcessor::new(options).process(&doc)
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259
260    #[test]
261    fn test_algorithm_uris() {
262        assert_eq!(
263            Algorithm::C14n.uri(),
264            "http://www.w3.org/TR/2001/REC-xml-c14n-20010315"
265        );
266        assert_eq!(
267            Algorithm::C14nWithComments.uri(),
268            "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments"
269        );
270        assert_eq!(
271            Algorithm::ExcC14n.uri(),
272            "http://www.w3.org/2001/10/xml-exc-c14n#"
273        );
274        assert_eq!(
275            Algorithm::ExcC14nWithComments.uri(),
276            "http://www.w3.org/2001/10/xml-exc-c14n#WithComments"
277        );
278    }
279
280    #[test]
281    fn test_algorithm_includes_comments() {
282        assert!(!Algorithm::C14n.includes_comments());
283        assert!(Algorithm::C14nWithComments.includes_comments());
284        assert!(!Algorithm::ExcC14n.includes_comments());
285        assert!(Algorithm::ExcC14nWithComments.includes_comments());
286    }
287
288    #[test]
289    fn test_algorithm_is_exclusive() {
290        assert!(!Algorithm::C14n.is_exclusive());
291        assert!(!Algorithm::C14nWithComments.is_exclusive());
292        assert!(Algorithm::ExcC14n.is_exclusive());
293        assert!(Algorithm::ExcC14nWithComments.is_exclusive());
294    }
295
296    #[test]
297    fn test_canonicalize_c14n() {
298        let xml = r#"<root z="1" a="2"/>"#;
299        let result = canonicalize(xml, Algorithm::C14n).unwrap();
300        assert_eq!(result, r#"<root a="2" z="1"></root>"#);
301    }
302
303    #[test]
304    fn test_canonicalize_c14n_with_comments() {
305        let xml = "<root><!-- comment --></root>";
306        let result = canonicalize(xml, Algorithm::C14nWithComments).unwrap();
307        assert!(result.contains("<!-- comment -->"));
308    }
309
310    #[test]
311    fn test_canonicalize_exc_c14n() {
312        let xml = r#"<root xmlns:unused="http://unused.com"/>"#;
313        let result = canonicalize(xml, Algorithm::ExcC14n).unwrap();
314        assert!(!result.contains("unused"));
315    }
316
317    #[test]
318    fn test_canonicalize_to_writer() {
319        let xml = "<root/>";
320        let mut output = Vec::new();
321        canonicalize_to(xml, Algorithm::C14n, &mut output).unwrap();
322        assert_eq!(String::from_utf8(output).unwrap(), "<root></root>");
323    }
324
325    #[test]
326    fn test_canonicalize_exclusive_with_options() {
327        let xml = r#"<root xmlns:a="http://a.com" xmlns:b="http://b.com"/>"#;
328
329        // Include 'a', exclude 'b'
330        let result =
331            canonicalize_exclusive_with_options(xml, &["a".to_string()], false).unwrap();
332
333        assert!(result.contains(r#"xmlns:a="http://a.com""#));
334        assert!(!result.contains("xmlns:b"));
335    }
336
337    #[test]
338    fn test_document_parse_and_access() {
339        let xml = "<root><child/></root>";
340        let doc = Document::parse(xml).unwrap();
341
342        let root_id = doc.document_element().unwrap();
343        let root = doc.get(root_id).unwrap();
344
345        assert!(root.is_element());
346    }
347
348    #[test]
349    fn test_empty_element_expansion() {
350        let xml = "<root/>";
351        let result = canonicalize(xml, Algorithm::C14n).unwrap();
352        assert_eq!(result, "<root></root>");
353    }
354
355    #[test]
356    fn test_attribute_sorting_comprehensive() {
357        let xml = r#"<root xmlns:b="http://b.com" xmlns:a="http://a.com" xmlns="http://default.com" z="1" a="2"/>"#;
358        let result = canonicalize(xml, Algorithm::C14n).unwrap();
359
360        // Default namespace first, then a, then b, then attributes
361        let expected = r#"<root xmlns="http://default.com" xmlns:a="http://a.com" xmlns:b="http://b.com" a="2" z="1"></root>"#;
362        assert_eq!(result, expected);
363    }
364
365    #[test]
366    fn test_text_escaping() {
367        let xml = "<root>&lt;&gt;&amp;</root>";
368        let result = canonicalize(xml, Algorithm::C14n).unwrap();
369        assert_eq!(result, "<root>&lt;&gt;&amp;</root>");
370    }
371
372    #[test]
373    fn test_cdata_conversion() {
374        let xml = "<root><![CDATA[<hello>]]></root>";
375        let result = canonicalize(xml, Algorithm::C14n).unwrap();
376        assert_eq!(result, "<root>&lt;hello&gt;</root>");
377    }
378
379    #[test]
380    fn test_xml_declaration_removed() {
381        let xml = r#"<?xml version="1.0" encoding="UTF-8"?><root/>"#;
382        let result = canonicalize(xml, Algorithm::C14n).unwrap();
383        assert_eq!(result, "<root></root>");
384    }
385
386    #[test]
387    fn test_processing_instruction_preserved() {
388        let xml = r#"<?target data?><root/>"#;
389        let result = canonicalize(xml, Algorithm::C14n).unwrap();
390        assert!(result.contains("<?target data?>"));
391    }
392
393    #[test]
394    fn test_nested_namespaces() {
395        let xml = r#"<a xmlns="http://a.com"><b xmlns="http://b.com"><c xmlns="http://a.com"/></b></a>"#;
396        let result = canonicalize(xml, Algorithm::C14n).unwrap();
397
398        // Each namespace change should be declared
399        assert!(result.contains(r#"<a xmlns="http://a.com">"#));
400        assert!(result.contains(r#"<b xmlns="http://b.com">"#));
401        assert!(result.contains(r#"<c xmlns="http://a.com">"#));
402    }
403
404    #[test]
405    fn test_whitespace_preservation() {
406        let xml = "<root>  text  </root>";
407        let result = canonicalize(xml, Algorithm::C14n).unwrap();
408        assert_eq!(result, "<root>  text  </root>");
409    }
410}