Skip to main content

clayers_xml/
c14n.rs

1use bergshamra_c14n::C14nMode;
2
3use crate::{ContentHash, Error};
4
5/// Canonicalization mode wrapping the W3C C14N algorithm variants.
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum CanonicalizationMode {
8    /// Canonical XML 1.0 (inclusive).
9    Inclusive,
10    /// Canonical XML 1.0 with comments (inclusive).
11    InclusiveWithComments,
12    /// Canonical XML 1.1 (inclusive).
13    Inclusive11,
14    /// Canonical XML 1.1 with comments (inclusive).
15    Inclusive11WithComments,
16    /// Exclusive Canonical XML 1.0.
17    Exclusive,
18    /// Exclusive Canonical XML 1.0 with comments.
19    ExclusiveWithComments,
20}
21
22impl CanonicalizationMode {
23    fn to_bergshamra(self) -> C14nMode {
24        match self {
25            Self::Inclusive => C14nMode::Inclusive,
26            Self::InclusiveWithComments => C14nMode::InclusiveWithComments,
27            Self::Inclusive11 => C14nMode::Inclusive11,
28            Self::Inclusive11WithComments => C14nMode::Inclusive11WithComments,
29            Self::Exclusive => C14nMode::Exclusive,
30            Self::ExclusiveWithComments => C14nMode::ExclusiveWithComments,
31        }
32    }
33}
34
35/// Canonicalize an XML string using the specified mode.
36///
37/// Bridge: xot tree -> serialize to string -> bergshamra C14N -> bytes.
38///
39/// # Errors
40///
41/// Returns `Error::Canonicalization` if the XML is malformed or C14N fails.
42pub fn canonicalize(xml: &str, mode: CanonicalizationMode) -> Result<Vec<u8>, Error> {
43    let prefixes: &[String] = &[];
44    bergshamra_c14n::canonicalize(xml, mode.to_bergshamra(), None, prefixes)
45        .map_err(|e| Error::Canonicalization(e.to_string()))
46}
47
48/// Canonicalize an XML string using inclusive C14N and return the raw bytes.
49///
50/// # Errors
51///
52/// Returns `Error::Canonicalization` if the XML is malformed or C14N fails.
53pub fn canonicalize_str(xml: &str) -> Result<Vec<u8>, Error> {
54    canonicalize(xml, CanonicalizationMode::Inclusive)
55}
56
57/// Canonicalize an XML string and compute its `ContentHash`.
58///
59/// # Errors
60///
61/// Returns `Error::Canonicalization` if the XML is malformed or C14N fails.
62pub fn canonicalize_and_hash(xml: &str, mode: CanonicalizationMode) -> Result<ContentHash, Error> {
63    let canonical = canonicalize(xml, mode)?;
64    Ok(ContentHash::from_canonical(&canonical))
65}
66
67// Public API surface (used by ast-grep for structural verification).
68#[cfg(any())]
69mod _api {
70    use super::*;
71    pub fn canonicalize(xml: &str, mode: CanonicalizationMode) -> Result<Vec<u8>, Error>;
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77
78    const SIMPLE_XML: &str = r#"<root xmlns:a="urn:a"><a:child>text</a:child></root>"#;
79
80    #[test]
81    fn canonicalize_str_deterministic() {
82        let c1 = canonicalize_str(SIMPLE_XML).expect("c14n failed");
83        let c2 = canonicalize_str(SIMPLE_XML).expect("c14n failed");
84        assert_eq!(c1, c2);
85    }
86
87    #[test]
88    fn inclusive_vs_exclusive_differ_for_namespaced() {
89        let inc = canonicalize(SIMPLE_XML, CanonicalizationMode::Inclusive).expect("inc failed");
90        let exc = canonicalize(SIMPLE_XML, CanonicalizationMode::Exclusive).expect("exc failed");
91        // Inclusive and exclusive produce different output for namespaced XML
92        // because exclusive only renders visibly-used namespaces per element
93        assert_ne!(inc, exc);
94    }
95
96    #[test]
97    fn canonicalize_and_hash_returns_valid_hash() {
98        let hash = canonicalize_and_hash(SIMPLE_XML, CanonicalizationMode::Inclusive)
99            .expect("hash failed");
100        let prefixed = hash.to_prefixed();
101        assert!(prefixed.starts_with("sha256:"));
102        assert_eq!(prefixed.len(), 71);
103    }
104
105    #[test]
106    fn all_six_modes_dont_panic() {
107        let modes = [
108            CanonicalizationMode::Inclusive,
109            CanonicalizationMode::InclusiveWithComments,
110            CanonicalizationMode::Inclusive11,
111            CanonicalizationMode::Inclusive11WithComments,
112            CanonicalizationMode::Exclusive,
113            CanonicalizationMode::ExclusiveWithComments,
114        ];
115        let xml = "<root>hello</root>";
116        for mode in modes {
117            let result = canonicalize(xml, mode);
118            assert!(result.is_ok(), "mode {mode:?} failed: {result:?}");
119        }
120    }
121}