xml_sec/xmldsig/types.rs
1//! Core types for the XMLDSig transform pipeline.
2//!
3//! These types flow between URI dereference, transforms, and digest computation.
4//!
5//! These types are consumed by URI dereference, the transform chain (P1-014,
6//! P1-015), and reference processing (P1-018).
7
8use std::collections::HashSet;
9
10use roxmltree::{Document, Node, NodeId};
11
12// roxmltree 0.21 uses `Node<'a, 'input: 'a>`. We tie both lifetimes together
13// with a single `'a` by requiring `'input = 'a` at every use site (`Node<'a, 'a>`).
14// This is safe because our NodeSet borrows the Document which owns the input.
15
16/// Data flowing between transforms in the verification/signing pipeline.
17///
18/// Transforms consume and produce either a node set (XML-level) or raw bytes
19/// (after canonicalization or base64 decode).
20pub enum TransformData<'a> {
21 /// A set of nodes from the parsed XML document.
22 NodeSet(NodeSet<'a>),
23 /// Raw bytes (e.g., after canonicalization).
24 Binary(Vec<u8>),
25}
26
27impl std::fmt::Debug for TransformData<'_> {
28 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29 match self {
30 Self::NodeSet(_) => f.debug_tuple("NodeSet").field(&"...").finish(),
31 Self::Binary(b) => f.debug_tuple("Binary").field(&b.len()).finish(),
32 }
33 }
34}
35
36impl<'a> TransformData<'a> {
37 /// Convert to `NodeSet`, returning an error if this is `Binary` data.
38 pub fn into_node_set(self) -> Result<NodeSet<'a>, TransformError> {
39 match self {
40 Self::NodeSet(ns) => Ok(ns),
41 Self::Binary(_) => Err(TransformError::TypeMismatch {
42 expected: "NodeSet",
43 got: "Binary",
44 }),
45 }
46 }
47
48 /// Convert to binary bytes, returning an error if this is a `NodeSet`.
49 pub fn into_binary(self) -> Result<Vec<u8>, TransformError> {
50 match self {
51 Self::Binary(b) => Ok(b),
52 Self::NodeSet(_) => Err(TransformError::TypeMismatch {
53 expected: "Binary",
54 got: "NodeSet",
55 }),
56 }
57 }
58}
59
60/// A set of nodes from a roxmltree document.
61///
62/// Represents "which nodes are included" for canonicalization and transforms.
63/// Two modes:
64/// - **Whole document**: `included` is `None`, meaning all nodes are in the set
65/// (minus any in `excluded`).
66/// - **Subset**: `included` is `Some(ids)`, meaning only those node IDs are in
67/// the set (minus any in `excluded`).
68pub struct NodeSet<'a> {
69 /// Reference to the parsed document.
70 doc: &'a Document<'a>,
71 /// If `None`, all nodes are included. If `Some`, only these nodes.
72 included: Option<HashSet<NodeId>>,
73 /// Nodes explicitly excluded (e.g., `<Signature>` subtree for enveloped transform).
74 excluded: HashSet<NodeId>,
75 /// Whether comment nodes are included. For empty URI dereference (whole
76 /// document), comments are excluded per XMLDSig spec.
77 with_comments: bool,
78}
79
80impl<'a> NodeSet<'a> {
81 /// Create a node set representing the entire document without comments.
82 ///
83 /// Per XMLDSig §4.3.3.2: "An empty URI [...] is a reference to the document
84 /// [...] and the comment nodes are not included."
85 pub fn entire_document_without_comments(doc: &'a Document<'a>) -> Self {
86 Self {
87 doc,
88 included: None,
89 excluded: HashSet::new(),
90 with_comments: false,
91 }
92 }
93
94 /// Create a node set representing the entire document with comments.
95 ///
96 /// Used for `#xpointer(/)` which, unlike empty URI, includes comment nodes.
97 pub fn entire_document_with_comments(doc: &'a Document<'a>) -> Self {
98 Self {
99 doc,
100 included: None,
101 excluded: HashSet::new(),
102 with_comments: true,
103 }
104 }
105
106 /// Create a node set rooted at `element`, containing that element and all
107 /// of its descendant nodes (elements, text, and, for this constructor,
108 /// comment nodes).
109 ///
110 /// Note: in `roxmltree`, attributes and namespaces are not separate nodes
111 /// and therefore are not tracked individually in this `NodeSet`. During
112 /// canonicalization, any attributes and namespace declarations belonging to
113 /// the included elements are serialized as part of those elements.
114 pub fn subtree(element: Node<'a, 'a>) -> Self {
115 let mut ids = HashSet::new();
116 collect_subtree_ids(element, &mut ids);
117 Self {
118 doc: element.document(),
119 included: Some(ids),
120 excluded: HashSet::new(),
121 with_comments: true,
122 }
123 }
124
125 /// Reference to the underlying document.
126 pub fn document(&self) -> &'a Document<'a> {
127 self.doc
128 }
129
130 /// Check whether a node is in this set.
131 ///
132 /// Returns `false` for nodes from a different document than this set's
133 /// owning document (prevents cross-document NodeId collisions).
134 pub fn contains(&self, node: Node<'_, '_>) -> bool {
135 // Guard: reject nodes from a different document. NodeIds are
136 // per-document indices — the same index from another document
137 // would reference a completely different node.
138 if !std::ptr::eq(node.document() as *const _, self.doc as *const _) {
139 return false;
140 }
141
142 let id = node.id();
143
144 // Check exclusion first
145 if self.excluded.contains(&id) {
146 return false;
147 }
148
149 // Filter comments if not included
150 if !self.with_comments && node.is_comment() {
151 return false;
152 }
153
154 // Check inclusion
155 match &self.included {
156 None => true,
157 Some(ids) => ids.contains(&id),
158 }
159 }
160
161 /// Exclude a node and all its descendants from this set.
162 ///
163 /// No-op for nodes from a different document.
164 pub fn exclude_subtree(&mut self, node: Node<'_, '_>) {
165 // Guard: only exclude nodes from our document
166 if !std::ptr::eq(node.document() as *const _, self.doc as *const _) {
167 return;
168 }
169 collect_subtree_ids(node, &mut self.excluded);
170 }
171
172 /// Whether comments are included in this node set.
173 pub fn with_comments(&self) -> bool {
174 self.with_comments
175 }
176}
177
178/// Collect a node and all its descendants into a set of `NodeId`s.
179///
180/// Uses an explicit stack instead of recursion to avoid stack overflow
181/// on deeply nested XML (attacker-controlled input in SAML contexts).
182fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet<NodeId>) {
183 let mut stack = vec![node];
184 while let Some(current) = stack.pop() {
185 ids.insert(current.id());
186 for child in current.children() {
187 stack.push(child);
188 }
189 }
190 // In roxmltree, attributes and namespaces are not nodes and do not
191 // appear in `children()` traversal; they're accessed via
192 // node.attributes(). We therefore track the NodeIds of all descendant
193 // nodes reachable via `children()` (elements, text, comments,
194 // processing instructions, etc.). During C14N, the serializer checks
195 // whether an element is in the node set and then serializes all of
196 // that element's attributes/namespaces as part of the element, so
197 // separate attribute/namespace identifiers are unnecessary.
198}
199
200/// Errors during transform processing.
201///
202/// New variants may be added as more transforms are implemented (Base64, XPath).
203#[derive(Debug, thiserror::Error)]
204#[non_exhaustive]
205pub enum TransformError {
206 /// Data type mismatch between transforms.
207 #[error("type mismatch: expected {expected}, got {got}")]
208 TypeMismatch {
209 /// Expected data type.
210 expected: &'static str,
211 /// Actual data type.
212 got: &'static str,
213 },
214
215 /// Element not found by ID.
216 #[error("element not found by ID: {0}")]
217 ElementNotFound(String),
218
219 /// Unsupported URI scheme or format.
220 #[error("unsupported URI: {0}")]
221 UnsupportedUri(String),
222
223 /// Unsupported transform algorithm.
224 #[error("unsupported transform: {0}")]
225 UnsupportedTransform(String),
226
227 /// Canonicalization error during transform.
228 #[error("C14N error: {0}")]
229 C14n(String),
230
231 /// The Signature node passed to the enveloped transform belongs to a
232 /// different `Document` than the input `NodeSet`.
233 #[error("enveloped-signature transform: invalid Signature node for this document")]
234 CrossDocumentSignatureNode,
235}