Skip to main content

winreg_artifacts/
path_expansion.rs

1//! Unified registry path-expansion engine.
2//!
3//! Glob (`*`/`**`), control-set (`CurrentControlSet`), and multi-user
4//! (`HKU\%%sid%%`) resolution are the **same operation**: a catalog path with
5//! one or more **variable segments**, each ranging over an **enumerable
6//! domain**, expanded into concrete paths — each tagged with the [`Binding`]s
7//! that record which domain element produced it. Only the domain differs:
8//!
9//! - [`Wildcard::Subkey`] (`*` / `**`) → the subkeys of a node (intra-hive).
10//! - [`Wildcard::ControlSet`] (`CurrentControlSet`) → the `ControlSet00N` set
11//!   selected by `Select\Current` (intra-SYSTEM-hive).
12//! - [`Wildcard::User`] (`HKU\%%sid%%` / NtUser) → the per-user profile hives
13//!   (cross-file; bound by the caller, [`crate::catalog_scan::scan_users`]).
14//!
15//! This module owns the intra-hive walk for the `Subkey` and `ControlSet`
16//! domains; the `User` domain is bound one level up because it selects *which
17//! hive file* to walk. The proven glob matching/caps live here unchanged — the
18//! engine wraps them as the `Subkey` domain source rather than rewriting them.
19
20use winreg_core::key::Key;
21
22/// The domain a variable path segment ranges over.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
24pub enum Wildcard {
25    /// `*` / `**` — ranges over the subkeys of a node (intra-hive).
26    Subkey,
27    /// `CurrentControlSet` — ranges over the active `ControlSet00N`
28    /// (intra-SYSTEM-hive), selected by `Select\Current`.
29    ControlSet,
30    /// `HKU\%%sid%%` / per-user NtUser — ranges over the profile hives
31    /// (cross-file). Bound by the multi-user scan, not by this engine.
32    User,
33}
34
35/// One variable resolution, carried on each hit for provenance.
36///
37/// For example `{Subkey, "{CLSID…}"}`, `{ControlSet, "ControlSet002"}`, or
38/// `{User, "S-1-5-21-…-1001"}`.
39#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
40pub struct Binding {
41    /// Which domain this binding came from.
42    pub kind: Wildcard,
43    /// The concrete domain element selected (child-key name, control-set name,
44    /// or user SID/profile).
45    pub value: String,
46}
47
48impl Binding {
49    /// Construct a binding for `kind` selecting `value`.
50    pub fn new(kind: Wildcard, value: impl Into<String>) -> Self {
51        Self {
52            kind,
53            value: value.into(),
54        }
55    }
56}
57
58/// One component of an expansion template.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub enum Segment {
61    /// An exact key name to descend into.
62    Literal(String),
63    /// A variable segment ranging over [`Wildcard`]'s domain. The string is the
64    /// match pattern: a glob (`*`, `*ControlSet*`) for `Subkey`/`ControlSet`,
65    /// otherwise contextual.
66    Variable(Wildcard, String),
67}
68
69/// Maximum key-tree depth a `**` recursive-descent walk will visit.
70///
71/// Untrusted hives can be crafted with pathological nesting; this bounds the
72/// recursion so a malicious image cannot drive unbounded stack/heap growth.
73pub(crate) const MAX_GLOB_DEPTH: usize = 64;
74
75/// Maximum number of concrete keys a single template may expand to.
76///
77/// Caps breadth so a hive with millions of sibling keys under a `*` cannot make
78/// one template produce an unbounded result set (allocation-bomb defence).
79pub(crate) const MAX_GLOB_MATCHES: usize = 4096;
80
81/// The set of `ControlSet00N` names a `CurrentControlSet` segment expands to,
82/// resolved from `SYSTEM\Select\Current`.
83///
84/// Normally a single active set; absent/unreadable `Select\Current` degrades to
85/// `ControlSet001` (see [`resolve_control_sets`]).
86#[derive(Debug, Clone)]
87pub struct ControlSetResolver {
88    /// The concrete `ControlSet00N` names the alias resolves to (in expansion
89    /// order). At least one element.
90    pub sets: Vec<String>,
91}
92
93/// Read `SYSTEM\Select\Current` (a `REG_DWORD`, value `N`) and resolve the
94/// `CurrentControlSet` alias to `ControlSet00N`.
95///
96/// Uses `Current` (the control set that was *running*), never `Default`. If
97/// `Select\Current` is absent, unreadable, or zero, falls back to
98/// `ControlSet001` — degrade, never panic. Reads are bounds-checked against the
99/// untrusted hive via winreg-core's value API.
100#[must_use]
101pub fn resolve_control_sets(root: &Key<'_>) -> ControlSetResolver {
102    let n = current_control_set_number(root).unwrap_or(1);
103    ControlSetResolver {
104        sets: vec![format!("ControlSet{n:03}")],
105    }
106}
107
108/// Read the active control-set number from `Select\Current`, or `None` when the
109/// key/value is absent, unreadable, or zero.
110fn current_control_set_number(root: &Key<'_>) -> Option<u32> {
111    let select = root.subkey("Select").ok()??;
112    let current = select.value("Current").ok()??;
113    // `as_u32` is bounds-checked and infallible on short data (returns 0).
114    let n = current.as_u32().ok()?;
115    if n == 0 {
116        None
117    } else {
118        Some(n)
119    }
120}
121
122/// Expand `segments` against the key tree rooted at `root`, invoking `emit` with
123/// `(bindings, concrete_path, &matched_key)` for every concrete key that matches
124/// the whole template.
125///
126/// `controlset` supplies the `ControlSet00N` names a [`Wildcard::ControlSet`]
127/// segment ranges over; it may be `None` when the template contains no
128/// `ControlSet` segment. `User` bindings are not produced here — the multi-user
129/// scan binds them when it selects the hive.
130pub fn expand(
131    root: &Key<'_>,
132    segments: &[Segment],
133    controlset: Option<&ControlSetResolver>,
134    emit: &mut dyn FnMut(&[Binding], &str, &Key<'_>),
135) {
136    let mut bindings: Vec<Binding> = Vec::new();
137    let mut matched = 0usize;
138    walk(
139        root,
140        segments,
141        controlset,
142        "",
143        0,
144        &mut matched,
145        &mut bindings,
146        emit,
147    );
148}
149
150/// Recursive template walk shared by every domain source.
151#[allow(clippy::too_many_arguments)]
152fn walk(
153    key: &Key<'_>,
154    segments: &[Segment],
155    controlset: Option<&ControlSetResolver>,
156    prefix: &str,
157    depth: usize,
158    matched: &mut usize,
159    bindings: &mut Vec<Binding>,
160    emit: &mut dyn FnMut(&[Binding], &str, &Key<'_>),
161) {
162    if *matched >= MAX_GLOB_MATCHES || depth > MAX_GLOB_DEPTH {
163        return;
164    }
165    let Some((head, rest)) = segments.split_first() else {
166        // All segments consumed — `key` is itself the concrete match.
167        *matched += 1;
168        emit(bindings, prefix, key);
169        return;
170    };
171
172    match head {
173        Segment::Literal(name) => {
174            let Ok(children) = key.subkeys() else { return };
175            for child in children {
176                if child.name().eq_ignore_ascii_case(name) {
177                    let child_prefix = join_path(prefix, &child.name());
178                    walk(
179                        &child,
180                        rest,
181                        controlset,
182                        &child_prefix,
183                        depth + 1,
184                        matched,
185                        bindings,
186                        emit,
187                    );
188                    break;
189                }
190            }
191        }
192        Segment::Variable(Wildcard::ControlSet, _) => {
193            // Domain = the active control set(s) from Select\Current. Default to
194            // ControlSet001 when no resolver was supplied (degrade, never panic).
195            let fallback = ControlSetResolver {
196                sets: vec!["ControlSet001".to_string()],
197            };
198            let resolver = controlset.unwrap_or(&fallback);
199            let Ok(children) = key.subkeys() else { return };
200            for set_name in &resolver.sets {
201                if *matched >= MAX_GLOB_MATCHES {
202                    return;
203                }
204                for child in &children {
205                    if child.name().eq_ignore_ascii_case(set_name) {
206                        let child_prefix = join_path(prefix, &child.name());
207                        bindings.push(Binding::new(Wildcard::ControlSet, child.name()));
208                        walk(
209                            child,
210                            rest,
211                            controlset,
212                            &child_prefix,
213                            depth + 1,
214                            matched,
215                            bindings,
216                            emit,
217                        );
218                        bindings.pop();
219                        break;
220                    }
221                }
222            }
223        }
224        Segment::Variable(Wildcard::Subkey, pattern) => {
225            if pattern.contains("**") {
226                // `**` matches zero levels: try the remaining pattern here…
227                walk(
228                    key, rest, controlset, prefix, depth, matched, bindings, emit,
229                );
230                // …and any number of levels: descend into every child, keeping `**`.
231                let Ok(children) = key.subkeys() else { return };
232                for child in children {
233                    if *matched >= MAX_GLOB_MATCHES {
234                        return;
235                    }
236                    let child_prefix = join_path(prefix, &child.name());
237                    bindings.push(Binding::new(Wildcard::Subkey, child.name()));
238                    walk(
239                        &child,
240                        segments,
241                        controlset,
242                        &child_prefix,
243                        depth + 1,
244                        matched,
245                        bindings,
246                        emit,
247                    );
248                    bindings.pop();
249                }
250            } else {
251                let Ok(children) = key.subkeys() else { return };
252                for child in children {
253                    if *matched >= MAX_GLOB_MATCHES {
254                        return;
255                    }
256                    if segment_matches(pattern, &child.name()) {
257                        let child_prefix = join_path(prefix, &child.name());
258                        bindings.push(Binding::new(Wildcard::Subkey, child.name()));
259                        walk(
260                            &child,
261                            rest,
262                            controlset,
263                            &child_prefix,
264                            depth + 1,
265                            matched,
266                            bindings,
267                            emit,
268                        );
269                        bindings.pop();
270                    }
271                }
272            }
273        }
274        // `User` is bound by the multi-user scan, never reached intra-hive.
275        Segment::Variable(Wildcard::User, _) => {} // cov:unreachable: User segments are stripped to a User binding by scan_users before expand() is called.
276    }
277}
278
279/// Join a hive-relative prefix with a child name using `\` separators.
280fn join_path(prefix: &str, name: &str) -> String {
281    if prefix.is_empty() {
282        name.to_string()
283    } else {
284        format!("{prefix}\\{name}")
285    }
286}
287
288/// Match a single path component against a glob `pattern` that may contain `*`
289/// wildcards anywhere (case-insensitive). `*` matches any run of characters.
290fn segment_matches(pattern: &str, name: &str) -> bool {
291    let pat: Vec<char> = pattern.to_ascii_lowercase().chars().collect();
292    let txt: Vec<char> = name.to_ascii_lowercase().chars().collect();
293    glob_match(&pat, &txt)
294}
295
296/// Iterative `*`-only glob matcher over char slices (no backtracking blow-up).
297fn glob_match(pat: &[char], txt: &[char]) -> bool {
298    let (mut p, mut t) = (0usize, 0usize);
299    let (mut star, mut mark) = (None, 0usize);
300    while t < txt.len() {
301        if p < pat.len() && pat[p] == '*' {
302            star = Some(p);
303            mark = t;
304            p += 1;
305        } else if p < pat.len() && pat[p] == txt[t] {
306            p += 1;
307            t += 1;
308        } else if let Some(sp) = star {
309            p = sp + 1;
310            mark += 1;
311            t = mark;
312        } else {
313            return false;
314        }
315    }
316    while p < pat.len() && pat[p] == '*' {
317        p += 1;
318    }
319    p == pat.len()
320}
321
322#[cfg(test)]
323#[allow(clippy::unwrap_used, clippy::expect_used)]
324mod tests {
325    use super::*;
326
327    #[test]
328    fn segment_match_handles_midsegment_wildcard() {
329        assert!(segment_matches("*ControlSet*", "ControlSet001"));
330        assert!(segment_matches("*", "anything"));
331        assert!(segment_matches("ABC*", "abcdef"));
332        assert!(!segment_matches("ABC*", "xyz"));
333        assert!(!segment_matches("Foo", "Bar"));
334    }
335
336    #[test]
337    fn binding_new_constructs() {
338        let b = Binding::new(Wildcard::ControlSet, "ControlSet002");
339        assert_eq!(b.kind, Wildcard::ControlSet);
340        assert_eq!(b.value, "ControlSet002");
341    }
342}