1fn canon_attr_map<'a>(a: &'a xml::attribute::OwnedAttribute) -> (xml::name::Name<'a>, String) {
2 let attribute_re = regex::Regex::new(r"[ \r\n\t]").unwrap();
3
4 (
5 a.name.borrow(),
6 attribute_re
7 .replace_all(&a.value, " ")
8 .replace("&", "&")
9 .replace("<", "<")
10 .replace("\"", """)
11 .replace("\r", "
")
12 )
13}
14
15pub fn canonical_rfc3076(events: &[xml::reader::XmlEvent], include_comments: bool, offset: usize, exclusive: bool) -> Result<String, String> {
16 let mut output = Vec::new();
17 let mut output_writer = xml::writer::EventWriter::new_with_config(
18 &mut output,
19 xml::writer::EmitterConfig {
20 perform_indent: false,
21 perform_escaping: false,
22 write_document_declaration: false,
23 autopad_comments: false,
24 cdata_to_characters: true,
25 line_separator: std::borrow::Cow::Borrowed("\n"),
26 normalize_empty_elements: false,
27 ..std::default::Default::default()
28 },
29 );
30
31 let mut level: usize = 0;
32 let mut xml_ns_attrs = vec![];
33 let mut exc_ns_stack = xml::namespace::NamespaceStack::default();
34 for (i, event) in events.iter().enumerate() {
35 match match event {
36 xml::reader::XmlEvent::StartDocument { .. } => Ok(()),
37 xml::reader::XmlEvent::EndDocument { .. } => Ok(()),
38 xml::reader::XmlEvent::ProcessingInstruction {
39 name, data
40 } => {
41 if i >= offset {
42 output_writer.write(xml::writer::XmlEvent::ProcessingInstruction {
43 name,
44 data: data.as_deref(),
45 })
46 } else {
47 Ok(())
48 }
49 }
50 xml::reader::XmlEvent::StartElement {
51 name, attributes, namespace
52 } => {
53 let new_xms_ns_attrs =
54 attributes
55 .iter()
56 .filter(|a| a.name.namespace.as_deref() == Some("http://www.w3.org/XML/1998/namespace"))
57 .collect::<Vec<_>>();
58 let cur_xml_ns_attrs = xml_ns_attrs.clone();
59 xml_ns_attrs.push(new_xms_ns_attrs);
60
61 if i >= offset {
62 level += 1;
63
64 let attribute_prefixes = attributes.iter().map(|a| a.name.prefix.as_deref()).collect::<Vec<_>>();
65 exc_ns_stack.push_empty();
66 exc_ns_stack.extend(namespace.0.iter().filter(|n| {
67 name.prefix.as_deref() == Some(&n.0) || attribute_prefixes.contains(&Some(n.1))
68 }).map(|n| (n.0.as_str(), n.1.as_str())).collect::<Vec<_>>());
69
70 let mut mapped_attr = if i == offset && !exclusive {
71 let existing_xms_ns_attrs = attributes.iter().filter_map(|a| if a.name.namespace.as_deref() == Some("http://www.w3.org/XML/1998/namespace") {
72 Some(a.name.local_name.clone())
73 } else {
74 None
75 }).collect::<Vec<_>>();
76 attributes.iter().chain(
77 cur_xml_ns_attrs.iter().flatten()
78 .filter(|a| !existing_xms_ns_attrs.contains(&a.name.local_name))
79 .map(|a| *a)
80 ).map(canon_attr_map).collect()
81 } else {
82 attributes.iter().map(canon_attr_map).collect::<Vec<_>>()
83 };
84 mapped_attr.sort_by(|a, b| {
85 match (a.0.prefix, b.0.prefix) {
86 (None, None) => {
87 a.0.local_name.cmp(b.0.local_name)
88 }
89 (None, Some(_)) => {
90 std::cmp::Ordering::Less
91 }
92 (Some(_), Some(_)) => {
93 let a_ns = a.0.namespace.unwrap_or_default();
94 let b_ns = b.0.namespace.unwrap_or_default();
95 if a_ns == b_ns {
96 a.0.local_name.cmp(b.0.local_name)
97 } else {
98 a_ns.cmp(b_ns)
99 }
100 }
101 (Some(_), None) => {
102 std::cmp::Ordering::Greater
103 }
104 }
105 });
106 output_writer.write(xml::writer::XmlEvent::StartElement {
107 name: name.borrow(),
108 attributes: mapped_attr.iter().map(|a| xml::attribute::Attribute {
109 name: a.0,
110 value: &a.1,
111 }).collect(),
112 namespace: if exclusive {
113 std::borrow::Cow::Owned(exc_ns_stack.squash())
114 } else {
115 std::borrow::Cow::Borrowed(&namespace)
116 },
117 })
118 } else {
119 Ok(())
120 }
121 }
122 xml::reader::XmlEvent::EndElement {
123 name
124 } => {
125 xml_ns_attrs.pop();
126
127 if i >= offset {
128 level -= 1;
129
130 exc_ns_stack.try_pop();
131
132 match output_writer.write(xml::writer::XmlEvent::EndElement {
133 name: Some(name.borrow()),
134 }) {
135 Ok(_) => {
136 if level == 0 {
137 break;
138 }
139 Ok(())
140 }
141 Err(e) => Err(e)
142 }
143 } else {
144 Ok(())
145 }
146 }
147 xml::reader::XmlEvent::CData(data) => {
148 if i >= offset {
149 output_writer.write(xml::writer::XmlEvent::Characters(
150 &data
151 .replace("\r\n", "\n")
152 .replace("&", "&")
153 .replace("<", "<")
154 .replace(">", ">")
155 .replace("\r", "
")
156 ))
157 } else {
158 Ok(())
159 }
160 }
161 xml::reader::XmlEvent::Comment(data) => {
162 if i >= offset && include_comments {
163 output_writer.write(xml::writer::XmlEvent::Comment(
164 &data.replace("\r\n", "\n")
165 ))
166 } else {
167 Ok(())
168 }
169 }
170 xml::reader::XmlEvent::Whitespace(data) => {
171 if i >= offset && include_comments {
172 output_writer.write(xml::writer::XmlEvent::Characters(
173 &data.replace("\r\n", "\n")
174 ))
175 } else {
176 Ok(())
177 }
178 }
179 xml::reader::XmlEvent::Characters(data) => {
180 if i >= offset {
181 output_writer.write(xml::writer::XmlEvent::Characters(
182 &data
183 .replace("\r\n", "\n")
184 .replace("&", "&")
185 .replace("<", "<")
186 .replace(">", ">")
187 .replace("\r", "
")
188 ))
189 } else {
190 Ok(())
191 }
192 }
193 } {
194 Ok(_) => {}
195 Err(e) => return Err(e.to_string())
196 }
197 }
198
199 Ok(String::from_utf8_lossy(&output).to_string())
200}
201
202#[cfg(test)]
203mod tests {
204 #[test]
205 fn c14n_1() {
206 let source_xml = r#"
207<?xml version="1.0" encoding="ISO-8859-1"?>
208
209<Envelope>
210<!-- some comment -->
211 <Body>
212 Olá mundo
213 </Body>
214
215</Envelope>
216"#;
217 let canon_xml = r#"<Envelope>
218
219 <Body>
220 Olá mundo
221 </Body>
222
223</Envelope>"#;
224 let reader = xml::reader::EventReader::new_with_config(
225 source_xml.as_bytes(),
226 xml::ParserConfig::new()
227 .ignore_comments(false)
228 .trim_whitespace(false)
229 .coalesce_characters(false)
230 .ignore_root_level_whitespace(true),
231 ).into_iter().collect::<Result<Vec<_>, _>>().unwrap();
232 let canon = super::canonical_rfc3076(&reader, false, 0, false).unwrap();
233 assert_eq!(canon, canon_xml);
234 }
235
236 #[test]
237 fn c14n_2() {
238 let source_xml = r#"<DigestMethod Algorithm="http:...#sha1" />"#;
239 let canon_xml = r#"<DigestMethod Algorithm="http:...#sha1"></DigestMethod>"#;
240 let reader = xml::reader::EventReader::new_with_config(
241 source_xml.as_bytes(),
242 xml::ParserConfig::new()
243 .ignore_comments(false)
244 .trim_whitespace(false)
245 .coalesce_characters(false)
246 .ignore_root_level_whitespace(true),
247 ).into_iter().collect::<Result<Vec<_>, _>>().unwrap();
248 let canon = super::canonical_rfc3076(&reader, false, 0, false).unwrap();
249 assert_eq!(canon, canon_xml);
250 }
251
252 #[test]
253 fn c14n_3() {
254 let source_xml = r#"<e1 a='one'
255 b = 'two' />"#;
256 let canon_xml = r#"<e1 a="one" b="two"></e1>"#;
257 let reader = xml::reader::EventReader::new_with_config(
258 source_xml.as_bytes(),
259 xml::ParserConfig::new()
260 .ignore_comments(false)
261 .trim_whitespace(false)
262 .coalesce_characters(false)
263 .ignore_root_level_whitespace(true),
264 ).into_iter().collect::<Result<Vec<_>, _>>().unwrap();
265 let canon = super::canonical_rfc3076(&reader, false, 0, false).unwrap();
266 assert_eq!(canon, canon_xml);
267 }
268
269 #[test]
270 fn c14n_4() {
271 let source_xml = r#"<e2 C=' letter
272 A ' />"#;
273 let canon_xml = r#"<e2 C=" letter A "></e2>"#;
274 let reader = xml::reader::EventReader::new_with_config(
275 source_xml.as_bytes(),
276 xml::ParserConfig::new()
277 .ignore_comments(false)
278 .trim_whitespace(false)
279 .coalesce_characters(false)
280 .ignore_root_level_whitespace(true),
281 ).into_iter().collect::<Result<Vec<_>, _>>().unwrap();
282 let canon = super::canonical_rfc3076(&reader, false, 0, false).unwrap();
283 assert_eq!(canon, canon_xml);
284 }
285
286 #[test]
287 fn c14n_5() {
288 let source_xml = r#"<e b:attr="sorted" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" a:attr="out" a:attr2="now" xmlns="http://example.org" xmlns:a="http://www.w3.org" ></e>"#;
289 let canon_xml = r#"<e xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out" a:attr2="now"></e>"#;
290 let reader = xml::reader::EventReader::new_with_config(
291 source_xml.as_bytes(),
292 xml::ParserConfig::new()
293 .ignore_comments(false)
294 .trim_whitespace(false)
295 .coalesce_characters(false)
296 .ignore_root_level_whitespace(true),
297 ).into_iter().collect::<Result<Vec<_>, _>>().unwrap();
298 let canon = super::canonical_rfc3076(&reader, false, 0, false).unwrap();
299 assert_eq!(canon, canon_xml);
300 }
301
302 #[test]
303 fn c14n_6() {
304 let source_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
305<Envelope xmlns="http://www.example.com">
306 <Part xmlns:ab="http://www.ab.com">
307 <Doc Id="P666">
308 ...
309 </Doc>
310 <Signature xmlns="http://www.w3.org/2000/09/xmldsig#">
311 <SignedInfo>
312 <Reference URI="P666" />
313 </SignedInfo>
314 ...
315 </Signature>
316 </Part>
317</Envelope>"#;
318 let canon_xml = r#"<Doc xmlns="http://www.example.com" xmlns:ab="http://www.ab.com" Id="P666">
319 ...
320 </Doc>"#;
321 let reader = xml::reader::EventReader::new_with_config(
322 source_xml.as_bytes(),
323 xml::ParserConfig::new()
324 .ignore_comments(false)
325 .trim_whitespace(false)
326 .coalesce_characters(false)
327 .ignore_root_level_whitespace(true),
328 ).into_iter().collect::<Result<Vec<_>, _>>().unwrap();
329
330 let mut i = 0;
331 for evt in &reader {
332 if let xml::reader::XmlEvent::StartElement {
333 name, ..
334 } = evt {
335 if name.local_name == "Doc" {
336 break;
337 }
338 }
339 i += 1;
340 }
341
342 let canon = super::canonical_rfc3076(&reader, false, i, false).unwrap();
343 assert_eq!(canon, canon_xml);
344 }
345
346 #[test]
347 fn c14n_7() {
348 let source_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
349 <n0:local xmlns:n0="foo:bar"
350 xmlns:n3="ftp://example.org">
351 <n1:elem2 xmlns:n1="http://example.net"
352 xml:lang="en">
353 <n3:stuff xmlns:n3="ftp://example.org"/>
354 </n1:elem2>
355 </n0:local>"#;
356 let canon_xml = r#"<n1:elem2 xmlns:n0="foo:bar" xmlns:n1="http://example.net" xmlns:n3="ftp://example.org" xml:lang="en">
357 <n3:stuff></n3:stuff>
358 </n1:elem2>"#;
359 let reader = xml::reader::EventReader::new_with_config(
360 source_xml.as_bytes(),
361 xml::ParserConfig::new()
362 .ignore_comments(false)
363 .trim_whitespace(false)
364 .coalesce_characters(false)
365 .ignore_root_level_whitespace(true),
366 ).into_iter().collect::<Result<Vec<_>, _>>().unwrap();
367
368 let mut i = 0;
369 for evt in &reader {
370 if let xml::reader::XmlEvent::StartElement {
371 name, ..
372 } = evt {
373 if name.local_name == "elem2" {
374 break;
375 }
376 }
377 i += 1;
378 }
379
380 let canon = super::canonical_rfc3076(&reader, false, i, false).unwrap();
381 assert_eq!(canon, canon_xml);
382 }
383
384 #[test]
385 fn c14n_8() {
386 let source_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
387 <n2:pdu xmlns:n1="http://example.com"
388 xmlns:n2="http://foo.example"
389 xml:lang="fr"
390 xml:space="retain">
391
392 <n1:elem2 xmlns:n1="http://example.net"
393 xml:lang="en">
394 <n3:stuff xmlns:n3="ftp://example.org"/>
395 </n1:elem2>
396 </n2:pdu>"#;
397 let canon_xml = r#"<n1:elem2 xmlns:n1="http://example.net" xmlns:n2="http://foo.example" xml:lang="en" xml:space="retain">
398 <n3:stuff xmlns:n3="ftp://example.org"></n3:stuff>
399 </n1:elem2>"#;
400 let reader = xml::reader::EventReader::new_with_config(
401 source_xml.as_bytes(),
402 xml::ParserConfig::new()
403 .ignore_comments(false)
404 .trim_whitespace(false)
405 .coalesce_characters(false)
406 .ignore_root_level_whitespace(true),
407 ).into_iter().collect::<Result<Vec<_>, _>>().unwrap();
408
409 let mut i = 0;
410 for evt in &reader {
411 if let xml::reader::XmlEvent::StartElement {
412 name, ..
413 } = evt {
414 if name.local_name == "elem2" {
415 break;
416 }
417 }
418 i += 1;
419 }
420
421 let canon = super::canonical_rfc3076(&reader, false, i, false).unwrap();
422 assert_eq!(canon, canon_xml);
423 }
424
425 #[test]
426 fn c14n_9() {
427 let source_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
428 <n0:local xmlns:n0="foo:bar"
429 xmlns:n3="ftp://example.org">
430 <n1:elem2 xmlns:n1="http://example.net"
431 xml:lang="en">
432 <n3:stuff xmlns:n3="ftp://example.org"/>
433 </n1:elem2>
434 </n0:local>"#;
435 let canon_xml = r#"<n1:elem2 xmlns:n1="http://example.net" xml:lang="en">
436 <n3:stuff xmlns:n3="ftp://example.org"></n3:stuff>
437 </n1:elem2>"#;
438 let reader = xml::reader::EventReader::new_with_config(
439 source_xml.as_bytes(),
440 xml::ParserConfig::new()
441 .ignore_comments(false)
442 .trim_whitespace(false)
443 .coalesce_characters(false)
444 .ignore_root_level_whitespace(true),
445 ).into_iter().collect::<Result<Vec<_>, _>>().unwrap();
446
447 let mut i = 0;
448 for evt in &reader {
449 if let xml::reader::XmlEvent::StartElement {
450 name, ..
451 } = evt {
452 if name.local_name == "elem2" {
453 break;
454 }
455 }
456 i += 1;
457 }
458
459 let canon = super::canonical_rfc3076(&reader, false, i, true).unwrap();
460 assert_eq!(canon, canon_xml);
461 }
462
463 #[test]
464 fn c14n_10() {
465 let source_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
466 <n2:pdu xmlns:n1="http://example.com"
467 xmlns:n2="http://foo.example"
468 xml:lang="fr"
469 xml:space="retain">
470
471 <n1:elem2 xmlns:n1="http://example.net"
472 xml:lang="en">
473 <n3:stuff xmlns:n3="ftp://example.org"/>
474 </n1:elem2>
475 </n2:pdu>"#;
476 let canon_xml = r#"<n1:elem2 xmlns:n1="http://example.net" xml:lang="en">
477 <n3:stuff xmlns:n3="ftp://example.org"></n3:stuff>
478 </n1:elem2>"#;
479 let reader = xml::reader::EventReader::new_with_config(
480 source_xml.as_bytes(),
481 xml::ParserConfig::new()
482 .ignore_comments(false)
483 .trim_whitespace(false)
484 .coalesce_characters(false)
485 .ignore_root_level_whitespace(true),
486 ).into_iter().collect::<Result<Vec<_>, _>>().unwrap();
487
488 let mut i = 0;
489 for evt in &reader {
490 if let xml::reader::XmlEvent::StartElement {
491 name, ..
492 } = evt {
493 if name.local_name == "elem2" {
494 break;
495 }
496 }
497 i += 1;
498 }
499
500 let canon = super::canonical_rfc3076(&reader, false, i, true).unwrap();
501 assert_eq!(canon, canon_xml);
502 }
503}