Skip to main content

xml_sec/xmldsig/
types.rs

1//! Core types for the XMLDSig transform pipeline.
2//!
3//! These types flow between URI dereference, transforms, and digest computation.
4//!
5//! These types are consumed by URI dereference, the transform chain (P1-014,
6//! P1-015), and reference processing (P1-018).
7
8use std::collections::HashSet;
9
10use roxmltree::{Document, Node, NodeId};
11
12// roxmltree 0.21 uses `Node<'a, 'input: 'a>`. We tie both lifetimes together
13// with a single `'a` by requiring `'input = 'a` at every use site (`Node<'a, 'a>`).
14// This is safe because our NodeSet borrows the Document which owns the input.
15
16/// Data flowing between transforms in the verification/signing pipeline.
17///
18/// Transforms consume and produce either a node set (XML-level) or raw bytes
19/// (after canonicalization or base64 decode).
20pub enum TransformData<'a> {
21    /// A set of nodes from the parsed XML document.
22    NodeSet(NodeSet<'a>),
23    /// Raw bytes (e.g., after canonicalization).
24    Binary(Vec<u8>),
25}
26
27impl std::fmt::Debug for TransformData<'_> {
28    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29        match self {
30            Self::NodeSet(_) => f.debug_tuple("NodeSet").field(&"...").finish(),
31            Self::Binary(b) => f.debug_tuple("Binary").field(&b.len()).finish(),
32        }
33    }
34}
35
36impl<'a> TransformData<'a> {
37    /// Convert to `NodeSet`, returning an error if this is `Binary` data.
38    pub fn into_node_set(self) -> Result<NodeSet<'a>, TransformError> {
39        match self {
40            Self::NodeSet(ns) => Ok(ns),
41            Self::Binary(_) => Err(TransformError::TypeMismatch {
42                expected: "NodeSet",
43                got: "Binary",
44            }),
45        }
46    }
47
48    /// Convert to binary bytes, returning an error if this is a `NodeSet`.
49    pub fn into_binary(self) -> Result<Vec<u8>, TransformError> {
50        match self {
51            Self::Binary(b) => Ok(b),
52            Self::NodeSet(_) => Err(TransformError::TypeMismatch {
53                expected: "Binary",
54                got: "NodeSet",
55            }),
56        }
57    }
58}
59
60/// A set of nodes from a roxmltree document.
61///
62/// Represents "which nodes are included" for canonicalization and transforms.
63/// Two modes:
64/// - **Whole document**: `included` is `None`, meaning all nodes are in the set
65///   (minus any in `excluded`).
66/// - **Subset**: `included` is `Some(ids)`, meaning only those node IDs are in
67///   the set (minus any in `excluded`).
68pub struct NodeSet<'a> {
69    /// Reference to the parsed document.
70    doc: &'a Document<'a>,
71    /// If `None`, all nodes are included. If `Some`, only these nodes.
72    included: Option<HashSet<NodeId>>,
73    /// Nodes explicitly excluded (e.g., `<Signature>` subtree for enveloped transform).
74    excluded: HashSet<NodeId>,
75    /// Whether comment nodes are included. For empty URI dereference (whole
76    /// document), comments are excluded per XMLDSig spec.
77    with_comments: bool,
78}
79
80impl<'a> NodeSet<'a> {
81    /// Create a node set representing the entire document without comments.
82    ///
83    /// Per XMLDSig §4.3.3.2: "An empty URI [...] is a reference to the document
84    /// [...] and the comment nodes are not included."
85    pub fn entire_document_without_comments(doc: &'a Document<'a>) -> Self {
86        Self {
87            doc,
88            included: None,
89            excluded: HashSet::new(),
90            with_comments: false,
91        }
92    }
93
94    /// Create a node set representing the entire document with comments.
95    ///
96    /// Used for `#xpointer(/)` which, unlike empty URI, includes comment nodes.
97    pub fn entire_document_with_comments(doc: &'a Document<'a>) -> Self {
98        Self {
99            doc,
100            included: None,
101            excluded: HashSet::new(),
102            with_comments: true,
103        }
104    }
105
106    /// Create a node set rooted at `element`, containing that element and all
107    /// of its descendant nodes (elements, text, and, for this constructor,
108    /// comment nodes).
109    ///
110    /// Note: in `roxmltree`, attributes and namespaces are not separate nodes
111    /// and therefore are not tracked individually in this `NodeSet`. During
112    /// canonicalization, any attributes and namespace declarations belonging to
113    /// the included elements are serialized as part of those elements.
114    pub fn subtree(element: Node<'a, 'a>) -> Self {
115        let mut ids = HashSet::new();
116        collect_subtree_ids(element, &mut ids);
117        Self {
118            doc: element.document(),
119            included: Some(ids),
120            excluded: HashSet::new(),
121            with_comments: true,
122        }
123    }
124
125    /// Reference to the underlying document.
126    pub fn document(&self) -> &'a Document<'a> {
127        self.doc
128    }
129
130    /// Check whether a node is in this set.
131    ///
132    /// Returns `false` for nodes from a different document than this set's
133    /// owning document (prevents cross-document NodeId collisions).
134    pub fn contains(&self, node: Node<'_, '_>) -> bool {
135        // Guard: reject nodes from a different document. NodeIds are
136        // per-document indices — the same index from another document
137        // would reference a completely different node.
138        if !std::ptr::eq(node.document() as *const _, self.doc as *const _) {
139            return false;
140        }
141
142        let id = node.id();
143
144        // Check exclusion first
145        if self.excluded.contains(&id) {
146            return false;
147        }
148
149        // Filter comments if not included
150        if !self.with_comments && node.is_comment() {
151            return false;
152        }
153
154        // Check inclusion
155        match &self.included {
156            None => true,
157            Some(ids) => ids.contains(&id),
158        }
159    }
160
161    /// Exclude a node and all its descendants from this set.
162    ///
163    /// No-op for nodes from a different document.
164    pub fn exclude_subtree(&mut self, node: Node<'_, '_>) {
165        // Guard: only exclude nodes from our document
166        if !std::ptr::eq(node.document() as *const _, self.doc as *const _) {
167            return;
168        }
169        collect_subtree_ids(node, &mut self.excluded);
170    }
171
172    /// Whether comments are included in this node set.
173    pub fn with_comments(&self) -> bool {
174        self.with_comments
175    }
176}
177
178/// Collect a node and all its descendants into a set of `NodeId`s.
179///
180/// Uses an explicit stack instead of recursion to avoid stack overflow
181/// on deeply nested XML (attacker-controlled input in SAML contexts).
182fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet<NodeId>) {
183    let mut stack = vec![node];
184    while let Some(current) = stack.pop() {
185        ids.insert(current.id());
186        for child in current.children() {
187            stack.push(child);
188        }
189    }
190    // In roxmltree, attributes and namespaces are not nodes and do not
191    // appear in `children()` traversal; they're accessed via
192    // node.attributes(). We therefore track the NodeIds of all descendant
193    // nodes reachable via `children()` (elements, text, comments,
194    // processing instructions, etc.). During C14N, the serializer checks
195    // whether an element is in the node set and then serializes all of
196    // that element's attributes/namespaces as part of the element, so
197    // separate attribute/namespace identifiers are unnecessary.
198}
199
200/// Errors during transform processing.
201///
202/// New variants may be added as more transforms are implemented (Base64, XPath).
203#[derive(Debug, thiserror::Error)]
204#[non_exhaustive]
205pub enum TransformError {
206    /// Data type mismatch between transforms.
207    #[error("type mismatch: expected {expected}, got {got}")]
208    TypeMismatch {
209        /// Expected data type.
210        expected: &'static str,
211        /// Actual data type.
212        got: &'static str,
213    },
214
215    /// Element not found by ID.
216    #[error("element not found by ID: {0}")]
217    ElementNotFound(String),
218
219    /// Unsupported URI scheme or format.
220    #[error("unsupported URI: {0}")]
221    UnsupportedUri(String),
222
223    /// Unsupported transform algorithm.
224    #[error("unsupported transform: {0}")]
225    UnsupportedTransform(String),
226
227    /// Canonicalization error during transform.
228    #[error("C14N error: {0}")]
229    C14n(String),
230
231    /// The Signature node passed to the enveloped transform belongs to a
232    /// different `Document` than the input `NodeSet`.
233    #[error("enveloped-signature transform: invalid Signature node for this document")]
234    CrossDocumentSignatureNode,
235}