1mod escape;
26mod ns_common;
27pub(crate) mod ns_exclusive;
28pub(crate) mod ns_inclusive;
29mod prefix;
30pub(crate) mod serialize;
31mod xml_base;
32
33use std::collections::HashSet;
34
35use roxmltree::{Document, Node};
36
37use ns_exclusive::ExclusiveNsRenderer;
38use ns_inclusive::InclusiveNsRenderer;
39use serialize::{C14nConfig, serialize_canonical};
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum C14nMode {
44 Inclusive1_0,
46 Inclusive1_1,
48 Exclusive1_0,
50}
51
52#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct C14nAlgorithm {
58 mode: C14nMode,
59 with_comments: bool,
60 inclusive_prefixes: HashSet<String>,
63}
64
65impl C14nAlgorithm {
66 pub fn mode(&self) -> C14nMode {
68 self.mode
69 }
70
71 pub fn with_comments(&self) -> bool {
73 self.with_comments
74 }
75
76 pub fn inclusive_prefixes(&self) -> &HashSet<String> {
78 &self.inclusive_prefixes
79 }
80
81 pub fn new(mode: C14nMode, with_comments: bool) -> Self {
83 Self {
84 mode,
85 with_comments,
86 inclusive_prefixes: HashSet::new(),
87 }
88 }
89
90 pub fn from_uri(uri: &str) -> Option<Self> {
92 let (mode, with_comments) = match uri {
93 "http://www.w3.org/TR/2001/REC-xml-c14n-20010315" => (C14nMode::Inclusive1_0, false),
94 "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments" => {
95 (C14nMode::Inclusive1_0, true)
96 }
97 "http://www.w3.org/2006/12/xml-c14n11" => (C14nMode::Inclusive1_1, false),
98 "http://www.w3.org/2006/12/xml-c14n11#WithComments" => (C14nMode::Inclusive1_1, true),
99 "http://www.w3.org/2001/10/xml-exc-c14n#" => (C14nMode::Exclusive1_0, false),
100 "http://www.w3.org/2001/10/xml-exc-c14n#WithComments" => (C14nMode::Exclusive1_0, true),
101 _ => return None,
102 };
103 Some(Self {
104 mode,
105 with_comments,
106 inclusive_prefixes: HashSet::new(),
107 })
108 }
109
110 pub fn with_prefix_list(mut self, prefix_list: &str) -> Self {
116 self.inclusive_prefixes = prefix_list
117 .split_whitespace()
118 .map(|p| {
119 if p == "#default" {
120 String::new()
121 } else {
122 p.to_string()
123 }
124 })
125 .collect();
126 self
127 }
128
129 pub fn uri(&self) -> &'static str {
131 match (self.mode, self.with_comments) {
132 (C14nMode::Inclusive1_0, false) => "http://www.w3.org/TR/2001/REC-xml-c14n-20010315",
133 (C14nMode::Inclusive1_0, true) => {
134 "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments"
135 }
136 (C14nMode::Inclusive1_1, false) => "http://www.w3.org/2006/12/xml-c14n11",
137 (C14nMode::Inclusive1_1, true) => "http://www.w3.org/2006/12/xml-c14n11#WithComments",
138 (C14nMode::Exclusive1_0, false) => "http://www.w3.org/2001/10/xml-exc-c14n#",
139 (C14nMode::Exclusive1_0, true) => "http://www.w3.org/2001/10/xml-exc-c14n#WithComments",
140 }
141 }
142}
143
144#[derive(Debug, thiserror::Error)]
146pub enum C14nError {
147 #[error("XML parse error: {0}")]
149 Parse(String),
150 #[error("invalid node reference")]
152 InvalidNode,
153 #[error("unsupported algorithm: {0}")]
155 UnsupportedAlgorithm(String),
156 #[error("I/O error: {0}")]
158 Io(#[from] std::io::Error),
159}
160
161pub fn canonicalize(
169 doc: &Document,
170 node_set: Option<&dyn Fn(Node) -> bool>,
171 algo: &C14nAlgorithm,
172 output: &mut Vec<u8>,
173) -> Result<(), C14nError> {
174 match algo.mode {
178 C14nMode::Inclusive1_0 => {
179 let renderer = InclusiveNsRenderer;
180 let config = C14nConfig {
181 inherit_xml_attrs: true,
182 fixup_xml_base: false,
183 };
184 serialize_canonical(doc, node_set, algo.with_comments, &renderer, config, output)
185 }
186 C14nMode::Inclusive1_1 => {
187 let renderer = InclusiveNsRenderer;
188 let config = C14nConfig {
189 inherit_xml_attrs: true,
190 fixup_xml_base: true,
191 };
192 serialize_canonical(doc, node_set, algo.with_comments, &renderer, config, output)
193 }
194 C14nMode::Exclusive1_0 => {
195 let renderer = ExclusiveNsRenderer::new(&algo.inclusive_prefixes);
196 let config = C14nConfig {
197 inherit_xml_attrs: false,
198 fixup_xml_base: false,
199 };
200 serialize_canonical(doc, node_set, algo.with_comments, &renderer, config, output)
201 }
202 }
203}
204
205pub fn canonicalize_xml(xml: &[u8], algo: &C14nAlgorithm) -> Result<Vec<u8>, C14nError> {
211 let xml_str =
212 std::str::from_utf8(xml).map_err(|e| C14nError::Parse(format!("invalid UTF-8: {e}")))?;
213 let doc = Document::parse(xml_str).map_err(|e| C14nError::Parse(e.to_string()))?;
214 let mut output = Vec::new();
215 canonicalize(&doc, None, algo, &mut output)?;
216 Ok(output)
217}
218
219#[cfg(test)]
220#[allow(clippy::unwrap_used)]
221mod tests {
222 use super::*;
223
224 #[test]
225 fn from_uri_roundtrip() {
226 let uris = [
227 "http://www.w3.org/TR/2001/REC-xml-c14n-20010315",
228 "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments",
229 "http://www.w3.org/2006/12/xml-c14n11",
230 "http://www.w3.org/2006/12/xml-c14n11#WithComments",
231 "http://www.w3.org/2001/10/xml-exc-c14n#",
232 "http://www.w3.org/2001/10/xml-exc-c14n#WithComments",
233 ];
234 for uri in uris {
235 let algo = C14nAlgorithm::from_uri(uri).expect(uri);
236 assert_eq!(algo.uri(), uri);
237 }
238 }
239
240 #[test]
241 fn unknown_uri_returns_none() {
242 assert!(C14nAlgorithm::from_uri("http://example.com/unknown").is_none());
243 }
244
245 #[test]
246 fn prefix_list_parsing() {
247 let algo = C14nAlgorithm::new(C14nMode::Exclusive1_0, false)
248 .with_prefix_list("foo bar #default baz");
249 assert!(algo.inclusive_prefixes.contains("foo"));
250 assert!(algo.inclusive_prefixes.contains("bar"));
251 assert!(algo.inclusive_prefixes.contains("baz"));
252 assert!(algo.inclusive_prefixes.contains("")); assert_eq!(algo.inclusive_prefixes.len(), 4);
254 }
255
256 #[test]
257 fn canonicalize_xml_basic() {
258 let xml = b"<root b=\"2\" a=\"1\"><empty/></root>";
259 let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, false);
260 let result = canonicalize_xml(xml, &algo).expect("c14n");
261 assert_eq!(
262 String::from_utf8(result).expect("utf8"),
263 r#"<root a="1" b="2"><empty></empty></root>"#
264 );
265 }
266
267 #[test]
268 fn c14n_1_1_basic() {
269 let xml = b"<root b=\"2\" a=\"1\"><empty/></root>";
271 let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
272 let result = canonicalize_xml(xml, &algo).expect("c14n 1.1");
273 assert_eq!(
274 String::from_utf8(result).expect("utf8"),
275 r#"<root a="1" b="2"><empty></empty></root>"#
276 );
277 }
278
279 #[test]
280 fn c14n_1_1_with_comments() {
281 let xml = b"<root><!-- comment -->text</root>";
282 let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, true);
283 let result = canonicalize_xml(xml, &algo).expect("c14n 1.1 with comments");
284 assert_eq!(
285 String::from_utf8(result).expect("utf8"),
286 "<root><!-- comment -->text</root>"
287 );
288 }
289
290 #[test]
291 fn c14n_1_1_without_comments() {
292 let xml = b"<root><!-- comment -->text</root>";
293 let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
294 let result = canonicalize_xml(xml, &algo).expect("c14n 1.1 without comments");
295 assert_eq!(
296 String::from_utf8(result).expect("utf8"),
297 "<root>text</root>"
298 );
299 }
300
301 #[test]
302 fn c14n_1_1_namespaces() {
303 let xml = b"<root xmlns:a=\"http://a\" xmlns:b=\"http://b\"><child/></root>";
305 let algo_10 = C14nAlgorithm::new(C14nMode::Inclusive1_0, false);
306 let algo_11 = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
307 let result_10 = canonicalize_xml(xml, &algo_10).expect("1.0");
308 let result_11 = canonicalize_xml(xml, &algo_11).expect("1.1");
309 assert_eq!(result_10, result_11);
311 }
312
313 #[test]
314 fn c14n_1_1_xml_id_inherited_in_subset() {
315 use roxmltree::Document;
317 use std::collections::HashSet;
318
319 let xml = r#"<root xml:id="r1"><child>text</child></root>"#;
320 let doc = Document::parse(xml).expect("parse");
321 let child = doc.root_element().first_element_child().expect("child");
322
323 let mut ids = HashSet::new();
325 let mut stack = vec![child];
326 while let Some(n) = stack.pop() {
327 ids.insert(n.id());
328 for c in n.children() {
329 stack.push(c);
330 }
331 }
332 let pred = move |n: roxmltree::Node| ids.contains(&n.id());
333
334 let algo = C14nAlgorithm::new(C14nMode::Inclusive1_1, false);
335 let mut out = Vec::new();
336 canonicalize(&doc, Some(&pred), &algo, &mut out).expect("c14n 1.1 subset");
337 let result = String::from_utf8(out).expect("utf8");
338
339 assert!(
341 result.contains(r#"xml:id="r1""#),
342 "xml:id should be inherited in C14N 1.1 subset; got: {result}"
343 );
344 }
345}