Skip to main content

winreg_artifacts/
shimcache.rs

1//! ShimCache (AppCompatCache) registry artifact extractor.
2//!
3//! ShimCache is stored in the SYSTEM hive and records application execution
4//! metadata for compatibility checking. It is evidence of program execution.
5//!
6//! Key path: `SYSTEM\CurrentControlSet\Control\Session Manager\AppCompatCache`
7//! Value name: `AppCompatCache` (REG_BINARY)
8
9use std::io::Cursor;
10
11use winreg_core::hive::Hive;
12use winreg_core::key::filetime_to_datetime;
13
14// AppCompatCache header signatures + entry-body field offsets are facts about
15// the format and live in the KNOWLEDGE leaf. See `forensicnomicon::appcompatcache`
16// for the per-build table and the full authoritative-source list (Mandiant
17// whitepaper, Eric Zimmerman's AppCompatCacheParser, libyal winreg-kb).
18use forensicnomicon::appcompatcache as fmt;
19
20// ---------------------------------------------------------------------------
21// Output type
22// ---------------------------------------------------------------------------
23
24/// A single entry decoded from the AppCompatCache (ShimCache) binary blob.
25#[derive(Debug, Clone, serde::Serialize)]
26pub struct ShimcacheEntry {
27    /// Executable path extracted from the cache entry. Empty if unparseable.
28    pub path: String,
29    /// Last modified time as ISO 8601, or `None` if unavailable.
30    pub last_modified: Option<String>,
31    /// Size of the raw `AppCompatCache` REG_BINARY blob.
32    pub raw_size: usize,
33    /// Position in the cache (0 = most recently executed).
34    pub entry_index: usize,
35}
36
37// ---------------------------------------------------------------------------
38// Key / value paths
39// ---------------------------------------------------------------------------
40
41/// Key path suffix below the ControlSet (`CurrentControlSet` on live hives,
42/// `ControlSet00N` on offline ones).
43const APPCOMPAT_SUFFIX: &str = "Control\\Session Manager\\AppCompatCache";
44const APPCOMPAT_VALUE: &str = "AppCompatCache";
45
46// ---------------------------------------------------------------------------
47// Format signatures
48// ---------------------------------------------------------------------------
49
50/// Windows 8 / Server 2012 legacy header first byte (`0x80`), per libyal
51/// winreg-kb. Some 8.x hives carry this; others (Case-001 DC01) open with a
52/// `0x00000000` first dword, so the format is gated by the entry marker at
53/// `forensicnomicon::appcompatcache::WIN8X_ENTRY_STREAM_OFFSET`, not this byte.
54const WIN8_HEADER_SIG: u8 = 0x80;
55
56/// Entry-body layout for the `"00ts"`/`"10ts"` cache-entry stream. The entry
57/// *framing* (`sig(4) | unknown(4) | ce_data_size(4)`) is identical across
58/// families; only the body differs (see `forensicnomicon::appcompatcache`).
59#[derive(Clone, Copy)]
60enum EntryBodyLayout {
61    /// Win10 (0x30/0x34 header): FILETIME immediately follows the path.
62    Win10,
63    /// Win8.0/8.1 & Server 2012/2012 R2: `package_len(2) | package |
64    /// insertion_flags(4) | shim_flags(4)` precede the FILETIME.
65    Win8x,
66}
67
68// ---------------------------------------------------------------------------
69// Public parse function
70// ---------------------------------------------------------------------------
71
72/// Extract ShimCache entries from a SYSTEM hive.
73///
74/// Resolves the active ControlSet, then reads
75/// `<ControlSet>\Control\Session Manager\AppCompatCache`. Live hives expose a
76/// `CurrentControlSet` symlink; **offline** hives do not — they carry
77/// `ControlSet00N` selected by `Select\Current`, so we resolve that.
78///
79/// Returns an empty `Vec` if the key or value is absent.
80/// Returns a single sentinel entry (empty path) if the blob exists but the
81/// format is unrecognised.
82pub fn parse(hive: &Hive<Cursor<Vec<u8>>>) -> Vec<ShimcacheEntry> {
83    // `Select\Current` (REG_DWORD) names the active set on an offline hive;
84    // default to set 1 when the Select key is absent.
85    let current = hive
86        .open_key("Select")
87        .ok()
88        .flatten()
89        .and_then(|sel| sel.value("Current").ok().flatten())
90        .and_then(|v| v.raw_data().ok())
91        .filter(|d| d.len() >= 4)
92        .map_or(1u32, |d| u32::from_le_bytes([d[0], d[1], d[2], d[3]]));
93
94    // Try the live symlink, the Select-resolved set, then ControlSet001.
95    let candidates = [
96        format!("CurrentControlSet\\{APPCOMPAT_SUFFIX}"),
97        format!("ControlSet{current:03}\\{APPCOMPAT_SUFFIX}"),
98        format!("ControlSet001\\{APPCOMPAT_SUFFIX}"),
99    ];
100    let key = match candidates
101        .iter()
102        .find_map(|p| hive.open_key(p).ok().flatten())
103    {
104        Some(k) => k,
105        None => return Vec::new(),
106    };
107
108    // Read the REG_BINARY value.
109    let blob: Vec<u8> = match key.value(APPCOMPAT_VALUE) {
110        Ok(Some(v)) => match v.raw_data() {
111            Ok(d) => d,
112            Err(_) => return Vec::new(),
113        },
114        _ => return Vec::new(),
115    };
116
117    let raw_size = blob.len();
118
119    // Blobs shorter than 4 bytes cannot contain a valid signature.
120    if raw_size < 4 {
121        return Vec::new();
122    }
123
124    let sig = u32::from_le_bytes([blob[0], blob[1], blob[2], blob[3]]);
125
126    // Win10 (1507 = 0x30, 1607+ = 0x34): the first dword is the header length;
127    // the `"10ts"` entries follow it and carry the FILETIME right after the path.
128    if sig == fmt::WIN10_1507_HEADER_LEN || sig == fmt::WIN10_1607_HEADER_LEN {
129        return parse_win10_entries(&blob, sig as usize, raw_size, b"10ts", EntryBodyLayout::Win10);
130    }
131    // Header-less `"10ts"` stream (some synthetic/edge captures put entries at 0).
132    if sig == fmt::ENTRY_MARKER_WIN81_WIN10_U32 {
133        return parse_win10_entries(&blob, 0, raw_size, b"10ts", EntryBodyLayout::Win10);
134    }
135    // Win8.0/8.1 & Server 2012/2012 R2: a 128-byte header followed by entries
136    // tagged "00ts" (8.0/2012) or "10ts" (8.1/2012 R2). The header's first dword
137    // varies in the wild (0x80 per libyal; 0x00000000 on the Case-001 DC01 Server
138    // 2012 R2 hive), so classify by the marker at offset 128 exactly as Eric
139    // Zimmerman's AppCompatCacheParser does — independent of the first dword. The
140    // Win8.x body carries package_len + insertion/shim flags BEFORE the FILETIME,
141    // so it must be decoded with the Win8x layout (Win10 reads the wrong offset).
142    if blob.len() >= fmt::WIN8X_ENTRY_STREAM_OFFSET + 4 {
143        let marker = &blob[fmt::WIN8X_ENTRY_STREAM_OFFSET..fmt::WIN8X_ENTRY_STREAM_OFFSET + 4];
144        if marker == fmt::ENTRY_MARKER_WIN80 || marker == fmt::ENTRY_MARKER_WIN81_WIN10 {
145            return parse_win10_entries(
146                &blob,
147                fmt::WIN8X_ENTRY_STREAM_OFFSET,
148                raw_size,
149                marker,
150                EntryBodyLayout::Win8x,
151            );
152        }
153    }
154    // Win8 0x80 header without a marker at offset 128 (legacy fixed parser).
155    if blob[0] == WIN8_HEADER_SIG {
156        return parse_win10(&blob, raw_size);
157    }
158    // Last resort: locate the first "10ts" marker anywhere and parse from there
159    // with the Win10 body layout (headerless/synthetic captures).
160    if let Some(pos) = blob.windows(4).position(|w| w == fmt::ENTRY_MARKER_WIN81_WIN10) {
161        return parse_win10_entries(&blob, pos, raw_size, b"10ts", EntryBodyLayout::Win10);
162    }
163    // No "10ts" entries anywhere — genuinely unrecognised. Return a sentinel so
164    // the caller still records that a blob was present.
165    vec![ShimcacheEntry {
166        path: String::new(),
167        last_modified: None,
168        raw_size,
169        entry_index: 0,
170    }]
171}
172
173/// Parse a stream of Win10 `"10ts"` AppCompatCache entries beginning at `start`.
174///
175/// Each entry: `"10ts" | unknown(4) | ce_data_size(4) | body[ce_data_size]`,
176/// where the body is `path_size(2) | path(UTF-16LE) | FILETIME(8) | data_size(4)
177/// | data`.
178/// Parse a `"00ts"`/`"10ts"` entry stream beginning at `start`, tagged
179/// `entry_sig`, with bodies decoded per `layout`.
180///
181/// Each entry: `sig(4) | unknown(4) | ce_data_size(4) | body[ce_data_size]`
182/// (`forensicnomicon::appcompatcache::ENTRY_FRAMING_LEN`).
183fn parse_win10_entries(
184    blob: &[u8],
185    start: usize,
186    raw_size: usize,
187    entry_sig: &[u8],
188    layout: EntryBodyLayout,
189) -> Vec<ShimcacheEntry> {
190    let mut entries = Vec::new();
191    let mut offset = start;
192    let mut entry_index = 0;
193
194    while offset + fmt::ENTRY_FRAMING_LEN <= blob.len() {
195        if &blob[offset..offset + 4] != entry_sig {
196            break;
197        }
198        // offset+4: unknown (4 bytes), then the cache-entry data size.
199        let ce_data_size =
200            u32::from_le_bytes([blob[offset + 8], blob[offset + 9], blob[offset + 10], blob[offset + 11]])
201                as usize;
202        let body_start = offset + fmt::ENTRY_FRAMING_LEN;
203        let body_end = match body_start.checked_add(ce_data_size) {
204            Some(e) if e <= blob.len() => e,
205            _ => break,
206        };
207
208        let (path, last_modified) = decode_win10_entry_body(&blob[body_start..body_end], layout);
209        entries.push(ShimcacheEntry {
210            path,
211            last_modified,
212            raw_size,
213            entry_index,
214        });
215
216        offset = body_end;
217        entry_index += 1;
218    }
219
220    entries
221}
222
223/// Decode a `"00ts"`/`"10ts"` entry body.
224///
225/// `Win10`: `path_size(2) | path(UTF-16LE) | FILETIME(8) | data_size(4) | data`
226/// — FILETIME at `path_end` (`WIN10_PATH_TO_FILETIME` = 0).
227///
228/// `Win8x`: `path_size(2) | path | package_len(2) | package | insertion_flags(4)
229/// | shim_flags(4) | FILETIME(8) | data_size(4) | data` — FILETIME at
230/// `path_end + 2 + package_len + WIN8X_PATH_TO_FILETIME_FIXED`. Offsets and the
231/// authoritative sources live in `forensicnomicon::appcompatcache`.
232fn decode_win10_entry_body(body: &[u8], layout: EntryBodyLayout) -> (String, Option<String>) {
233    if body.len() < 2 {
234        return (String::new(), None);
235    }
236    let path_size = u16::from_le_bytes([body[0], body[1]]) as usize;
237    let path_end = 2 + path_size;
238    let path = if path_size > 0 && path_end <= body.len() {
239        decode_utf16le(&body[2..path_end])
240    } else {
241        String::new()
242    };
243    let ft_offset = match layout {
244        EntryBodyLayout::Win10 => path_end.checked_add(fmt::WIN10_PATH_TO_FILETIME),
245        EntryBodyLayout::Win8x => {
246            // Read package_len(u16) at path_end, then skip it + the package data
247            // + insertion/shim flags to reach the FILETIME.
248            if path_end + 2 <= body.len() {
249                let package_len = u16::from_le_bytes([body[path_end], body[path_end + 1]]) as usize;
250                path_end.checked_add(2 + package_len + fmt::WIN8X_PATH_TO_FILETIME_FIXED)
251            } else {
252                None
253            }
254        }
255    };
256    let last_modified = ft_offset
257        .filter(|&o| o.checked_add(8).is_some_and(|end| end <= body.len()))
258        .and_then(|o| {
259            let ft = winreg_core::bytes::le_u64(body, o);
260            filetime_to_datetime(ft).map(|dt| dt.format("%Y-%m-%dT%H:%M:%SZ").to_string())
261        });
262    (path, last_modified)
263}
264
265// ---------------------------------------------------------------------------
266// Win10 parser
267// ---------------------------------------------------------------------------
268
269/// Parse the Windows 10 AppCompatCache format.
270///
271/// Header (128 bytes):
272///   Bytes 0-3:   signature `0x73743031` ("10ts" LE)
273///   Bytes 4-7:   number of entries (u32 LE)
274///   Bytes 8-127: padding
275///
276/// Each entry starts with:
277///   Bytes 0-3:   entry signature `0x73743031`
278///   Bytes 4-7:   entry data length (u32 LE) — length of the body *after* these 8 bytes
279///   Then entry body (variable):
280///     Bytes 0-1:  path length in bytes (u16 LE)
281///     Bytes 2-7:  padding / reserved
282///     Bytes 8-15: LastModifiedTime (FILETIME, u64 LE)
283///     Bytes 16-17: path data offset within the entry body (u16 LE, often 0x20)
284///     ... path data (UTF-16LE) at body offset indicated by path_offset_in_body
285///
286/// In practice the layout is approximately:
287///   entry_data_len (from header) bytes of body, containing:
288///     [0..2]   path_len  (u16 LE) — byte count of UTF-16LE path
289///     [8..16]  last_modified (u64 LE FILETIME)
290///     [16..18] path_offset   (u16 LE) — offset within body to the path data
291///     [path_offset .. path_offset + path_len] path bytes (UTF-16LE)
292fn parse_win10(blob: &[u8], raw_size: usize) -> Vec<ShimcacheEntry> {
293    // The cache header is 128 bytes for Win10.
294    const HEADER_SIZE: usize = 128;
295
296    if blob.len() < HEADER_SIZE {
297        return Vec::new();
298    }
299
300    let entry_count = u32::from_le_bytes([blob[4], blob[5], blob[6], blob[7]]) as usize;
301    if entry_count == 0 {
302        return Vec::new();
303    }
304
305    let mut entries = Vec::with_capacity(entry_count);
306    let mut offset = HEADER_SIZE;
307    let mut entry_index = 0;
308
309    while offset + 8 <= blob.len() && entry_index < entry_count {
310        // Each entry starts with a 4-byte signature.
311        let entry_sig = u32::from_le_bytes([
312            blob[offset],
313            blob[offset + 1],
314            blob[offset + 2],
315            blob[offset + 3],
316        ]);
317
318        if entry_sig != fmt::ENTRY_MARKER_WIN81_WIN10_U32 {
319            break;
320        }
321
322        let entry_data_len = u32::from_le_bytes([
323            blob[offset + 4],
324            blob[offset + 5],
325            blob[offset + 6],
326            blob[offset + 7],
327        ]) as usize;
328
329        let body_start = offset + 8;
330        let body_end = body_start + entry_data_len;
331
332        if body_end > blob.len() {
333            break;
334        }
335
336        let body = &blob[body_start..body_end];
337
338        let (path, last_modified) = decode_entry_body(body);
339
340        entries.push(ShimcacheEntry {
341            path,
342            last_modified,
343            raw_size,
344            entry_index,
345        });
346
347        offset = body_end;
348        entry_index += 1;
349    }
350
351    entries
352}
353
354/// Decode a single Win10 entry body.
355///
356/// Layout (best-effort; fields may be absent for short bodies):
357///   [0..2]   path_len  (u16 LE) — byte count of the UTF-16LE path
358///   [8..16]  last_modified (u64 LE FILETIME)
359///   [16..18] path_data_offset (u16 LE) — offset within body to path bytes
360fn decode_entry_body(body: &[u8]) -> (String, Option<String>) {
361    if body.len() < 2 {
362        return (String::new(), None);
363    }
364
365    let path_len = u16::from_le_bytes([body[0], body[1]]) as usize;
366
367    let last_modified: Option<String> = if body.len() >= 16 {
368        let ft = winreg_core::bytes::le_u64(&body[..], 8);
369        filetime_to_datetime(ft).map(|dt| dt.format("%Y-%m-%dT%H:%M:%SZ").to_string())
370    } else {
371        None
372    };
373
374    let path: String = if path_len == 0 || body.len() < 18 {
375        String::new()
376    } else {
377        let path_offset = u16::from_le_bytes([body[16], body[17]]) as usize;
378        let path_end = path_offset + path_len;
379        if path_offset < body.len() && path_end <= body.len() {
380            decode_utf16le(&body[path_offset..path_end])
381        } else {
382            String::new()
383        }
384    };
385
386    (path, last_modified)
387}
388
389/// Decode UTF-16LE bytes to a `String`, stopping at the first null.
390fn decode_utf16le(data: &[u8]) -> String {
391    let u16s: Vec<u16> = data
392        .chunks_exact(2)
393        .map(|c| u16::from_le_bytes([c[0], c[1]]))
394        .collect();
395    let trimmed: &[u16] = match u16s.iter().position(|&c| c == 0) {
396        Some(pos) => &u16s[..pos],
397        None => &u16s,
398    };
399    String::from_utf16_lossy(trimmed).to_owned()
400}