winreg_artifacts/path_expansion.rs
1//! Unified registry path-expansion engine.
2//!
3//! Glob (`*`/`**`), control-set (`CurrentControlSet`), and multi-user
4//! (`HKU\%%sid%%`) resolution are the **same operation**: a catalog path with
5//! one or more **variable segments**, each ranging over an **enumerable
6//! domain**, expanded into concrete paths — each tagged with the [`Binding`]s
7//! that record which domain element produced it. Only the domain differs:
8//!
9//! - [`Wildcard::Subkey`] (`*` / `**`) → the subkeys of a node (intra-hive).
10//! - [`Wildcard::ControlSet`] (`CurrentControlSet`) → the `ControlSet00N` set
11//! selected by `Select\Current` (intra-SYSTEM-hive).
12//! - [`Wildcard::User`] (`HKU\%%sid%%` / NtUser) → the per-user profile hives
13//! (cross-file; bound by the caller, [`crate::catalog_scan::scan_users`]).
14//!
15//! This module owns the intra-hive walk for the `Subkey` and `ControlSet`
16//! domains; the `User` domain is bound one level up because it selects *which
17//! hive file* to walk. The proven glob matching/caps live here unchanged — the
18//! engine wraps them as the `Subkey` domain source rather than rewriting them.
19
20use winreg_core::key::Key;
21
22/// The domain a variable path segment ranges over.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
24pub enum Wildcard {
25 /// `*` / `**` — ranges over the subkeys of a node (intra-hive).
26 Subkey,
27 /// `CurrentControlSet` — ranges over the active `ControlSet00N`
28 /// (intra-SYSTEM-hive), selected by `Select\Current`.
29 ControlSet,
30 /// `HKU\%%sid%%` / per-user NtUser — ranges over the profile hives
31 /// (cross-file). Bound by the multi-user scan, not by this engine.
32 User,
33}
34
35/// One variable resolution, carried on each hit for provenance.
36///
37/// For example `{Subkey, "{CLSID…}"}`, `{ControlSet, "ControlSet002"}`, or
38/// `{User, "S-1-5-21-…-1001"}`.
39#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
40pub struct Binding {
41 /// Which domain this binding came from.
42 pub kind: Wildcard,
43 /// The concrete domain element selected (child-key name, control-set name,
44 /// or user SID/profile).
45 pub value: String,
46}
47
48impl Binding {
49 /// Construct a binding for `kind` selecting `value`.
50 pub fn new(kind: Wildcard, value: impl Into<String>) -> Self {
51 Self {
52 kind,
53 value: value.into(),
54 }
55 }
56}
57
58/// One component of an expansion template.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub enum Segment {
61 /// An exact key name to descend into.
62 Literal(String),
63 /// A variable segment ranging over [`Wildcard`]'s domain. The string is the
64 /// match pattern: a glob (`*`, `*ControlSet*`) for `Subkey`/`ControlSet`,
65 /// otherwise contextual.
66 Variable(Wildcard, String),
67}
68
69/// Maximum key-tree depth a `**` recursive-descent walk will visit.
70///
71/// Untrusted hives can be crafted with pathological nesting; this bounds the
72/// recursion so a malicious image cannot drive unbounded stack/heap growth.
73pub(crate) const MAX_GLOB_DEPTH: usize = 64;
74
75/// Maximum number of concrete keys a single template may expand to.
76///
77/// Caps breadth so a hive with millions of sibling keys under a `*` cannot make
78/// one template produce an unbounded result set (allocation-bomb defence).
79pub(crate) const MAX_GLOB_MATCHES: usize = 4096;
80
81/// The set of `ControlSet00N` names a `CurrentControlSet` segment expands to,
82/// resolved from `SYSTEM\Select\Current`.
83///
84/// Normally a single active set; absent/unreadable `Select\Current` degrades to
85/// `ControlSet001` (see [`resolve_control_sets`]).
86#[derive(Debug, Clone)]
87pub struct ControlSetResolver {
88 /// The concrete `ControlSet00N` names the alias resolves to (in expansion
89 /// order). At least one element.
90 pub sets: Vec<String>,
91}
92
93/// Read `SYSTEM\Select\Current` (a `REG_DWORD`, value `N`) and resolve the
94/// `CurrentControlSet` alias to `ControlSet00N`.
95///
96/// Uses `Current` (the control set that was *running*), never `Default`. If
97/// `Select\Current` is absent, unreadable, or zero, falls back to
98/// `ControlSet001` — degrade, never panic. Reads are bounds-checked against the
99/// untrusted hive via winreg-core's value API.
100#[must_use]
101pub fn resolve_control_sets(root: &Key<'_>) -> ControlSetResolver {
102 let n = current_control_set_number(root).unwrap_or(1);
103 ControlSetResolver {
104 sets: vec![format!("ControlSet{n:03}")],
105 }
106}
107
108/// Read the active control-set number from `Select\Current`, or `None` when the
109/// key/value is absent, unreadable, or zero.
110fn current_control_set_number(root: &Key<'_>) -> Option<u32> {
111 let select = root.subkey("Select").ok()??;
112 let current = select.value("Current").ok()??;
113 // `as_u32` is bounds-checked and infallible on short data (returns 0).
114 let n = current.as_u32().ok()?;
115 if n == 0 {
116 None
117 } else {
118 Some(n)
119 }
120}
121
122/// Expand `segments` against the key tree rooted at `root`, invoking `emit` with
123/// `(bindings, concrete_path, &matched_key)` for every concrete key that matches
124/// the whole template.
125///
126/// `controlset` supplies the `ControlSet00N` names a [`Wildcard::ControlSet`]
127/// segment ranges over; it may be `None` when the template contains no
128/// `ControlSet` segment. `User` bindings are not produced here — the multi-user
129/// scan binds them when it selects the hive.
130pub fn expand(
131 root: &Key<'_>,
132 segments: &[Segment],
133 controlset: Option<&ControlSetResolver>,
134 emit: &mut dyn FnMut(&[Binding], &str, &Key<'_>),
135) {
136 let mut bindings: Vec<Binding> = Vec::new();
137 let mut matched = 0usize;
138 walk(
139 root,
140 segments,
141 controlset,
142 "",
143 0,
144 &mut matched,
145 &mut bindings,
146 emit,
147 );
148}
149
150/// Recursive template walk shared by every domain source.
151#[allow(clippy::too_many_arguments)]
152fn walk(
153 key: &Key<'_>,
154 segments: &[Segment],
155 controlset: Option<&ControlSetResolver>,
156 prefix: &str,
157 depth: usize,
158 matched: &mut usize,
159 bindings: &mut Vec<Binding>,
160 emit: &mut dyn FnMut(&[Binding], &str, &Key<'_>),
161) {
162 if *matched >= MAX_GLOB_MATCHES || depth > MAX_GLOB_DEPTH {
163 return;
164 }
165 let Some((head, rest)) = segments.split_first() else {
166 // All segments consumed — `key` is itself the concrete match.
167 *matched += 1;
168 emit(bindings, prefix, key);
169 return;
170 };
171
172 match head {
173 Segment::Literal(name) => {
174 let Ok(children) = key.subkeys() else { return };
175 for child in children {
176 if child.name().eq_ignore_ascii_case(name) {
177 let child_prefix = join_path(prefix, &child.name());
178 walk(
179 &child,
180 rest,
181 controlset,
182 &child_prefix,
183 depth + 1,
184 matched,
185 bindings,
186 emit,
187 );
188 break;
189 }
190 }
191 }
192 Segment::Variable(Wildcard::ControlSet, _) => {
193 // Domain = the active control set(s) from Select\Current. Default to
194 // ControlSet001 when no resolver was supplied (degrade, never panic).
195 let fallback = ControlSetResolver {
196 sets: vec!["ControlSet001".to_string()],
197 };
198 let resolver = controlset.unwrap_or(&fallback);
199 let Ok(children) = key.subkeys() else { return };
200 for set_name in &resolver.sets {
201 if *matched >= MAX_GLOB_MATCHES {
202 return;
203 }
204 for child in &children {
205 if child.name().eq_ignore_ascii_case(set_name) {
206 let child_prefix = join_path(prefix, &child.name());
207 bindings.push(Binding::new(Wildcard::ControlSet, child.name()));
208 walk(
209 child,
210 rest,
211 controlset,
212 &child_prefix,
213 depth + 1,
214 matched,
215 bindings,
216 emit,
217 );
218 bindings.pop();
219 break;
220 }
221 }
222 }
223 }
224 Segment::Variable(Wildcard::Subkey, pattern) => {
225 if pattern.contains("**") {
226 // `**` matches zero levels: try the remaining pattern here…
227 walk(
228 key, rest, controlset, prefix, depth, matched, bindings, emit,
229 );
230 // …and any number of levels: descend into every child, keeping `**`.
231 let Ok(children) = key.subkeys() else { return };
232 for child in children {
233 if *matched >= MAX_GLOB_MATCHES {
234 return;
235 }
236 let child_prefix = join_path(prefix, &child.name());
237 bindings.push(Binding::new(Wildcard::Subkey, child.name()));
238 walk(
239 &child,
240 segments,
241 controlset,
242 &child_prefix,
243 depth + 1,
244 matched,
245 bindings,
246 emit,
247 );
248 bindings.pop();
249 }
250 } else {
251 let Ok(children) = key.subkeys() else { return };
252 for child in children {
253 if *matched >= MAX_GLOB_MATCHES {
254 return;
255 }
256 if segment_matches(pattern, &child.name()) {
257 let child_prefix = join_path(prefix, &child.name());
258 bindings.push(Binding::new(Wildcard::Subkey, child.name()));
259 walk(
260 &child,
261 rest,
262 controlset,
263 &child_prefix,
264 depth + 1,
265 matched,
266 bindings,
267 emit,
268 );
269 bindings.pop();
270 }
271 }
272 }
273 }
274 // `User` is bound by the multi-user scan, never reached intra-hive.
275 Segment::Variable(Wildcard::User, _) => {} // cov:unreachable: User segments are stripped to a User binding by scan_users before expand() is called.
276 }
277}
278
279/// Join a hive-relative prefix with a child name using `\` separators.
280fn join_path(prefix: &str, name: &str) -> String {
281 if prefix.is_empty() {
282 name.to_string()
283 } else {
284 format!("{prefix}\\{name}")
285 }
286}
287
288/// Match a single path component against a glob `pattern` that may contain `*`
289/// wildcards anywhere (case-insensitive). `*` matches any run of characters.
290fn segment_matches(pattern: &str, name: &str) -> bool {
291 let pat: Vec<char> = pattern.to_ascii_lowercase().chars().collect();
292 let txt: Vec<char> = name.to_ascii_lowercase().chars().collect();
293 glob_match(&pat, &txt)
294}
295
296/// Iterative `*`-only glob matcher over char slices (no backtracking blow-up).
297fn glob_match(pat: &[char], txt: &[char]) -> bool {
298 let (mut p, mut t) = (0usize, 0usize);
299 let (mut star, mut mark) = (None, 0usize);
300 while t < txt.len() {
301 if p < pat.len() && pat[p] == '*' {
302 star = Some(p);
303 mark = t;
304 p += 1;
305 } else if p < pat.len() && pat[p] == txt[t] {
306 p += 1;
307 t += 1;
308 } else if let Some(sp) = star {
309 p = sp + 1;
310 mark += 1;
311 t = mark;
312 } else {
313 return false;
314 }
315 }
316 while p < pat.len() && pat[p] == '*' {
317 p += 1;
318 }
319 p == pat.len()
320}
321
322#[cfg(test)]
323#[allow(clippy::unwrap_used, clippy::expect_used)]
324mod tests {
325 use super::*;
326
327 #[test]
328 fn segment_match_handles_midsegment_wildcard() {
329 assert!(segment_matches("*ControlSet*", "ControlSet001"));
330 assert!(segment_matches("*", "anything"));
331 assert!(segment_matches("ABC*", "abcdef"));
332 assert!(!segment_matches("ABC*", "xyz"));
333 assert!(!segment_matches("Foo", "Bar"));
334 }
335
336 #[test]
337 fn binding_new_constructs() {
338 let b = Binding::new(Wildcard::ControlSet, "ControlSet002");
339 assert_eq!(b.kind, Wildcard::ControlSet);
340 assert_eq!(b.value, "ControlSet002");
341 }
342}