Skip to main content

prefetch_core/
lib.rs

1//! Pure-Rust, read-only Windows **Prefetch** (`.pf`) reader.
2//!
3//! Windows 8.1/10/11 store prefetch compressed with a `MAM` (Xpress-Huffman)
4//! wrapper; the decompressed payload is the classic `SCCA` structure. This crate
5//! decodes both, cross-platform, with no Windows API dependency.
6//!
7//! - [`decompress`] — MAM wrapper → raw SCCA bytes (Xpress-Huffman via the
8//!   [`xpress_huffman`] crate).
9//! - [`parse`] — full SCCA v30/31 → [`PrefetchInfo`].
10
11#![forbid(unsafe_code)]
12
13/// Errors decoding a prefetch file.
14#[derive(Debug, PartialEq, Eq)]
15pub enum PrefetchError {
16    /// Input is shorter than the smallest valid header.
17    TooShort,
18    /// Not a recognized prefetch container (`MAM`/Xpress-Huffman or raw `SCCA`).
19    BadSignature,
20    /// The MAM/Xpress-Huffman payload failed to decompress.
21    Decompress(xpress_huffman::Error),
22    /// SCCA format version not supported by this parser (the `u32` is the version
23    /// found). Win10/11 (30/31) are supported; XP/Vista/7/8.1 (17/23/26) are not
24    /// yet — their `FileInformation` block has a different layout.
25    UnsupportedVersion(u32),
26    /// An offset/length field in the SCCA payload pointed past the buffer.
27    TruncatedRecord,
28}
29
30const MAM_SIGNATURE: &[u8; 3] = b"MAM";
31/// MAM compression byte for Xpress-Huffman (`COMPRESSION_FORMAT_XPRESS_HUFF`).
32const MAM_XPRESS_HUFFMAN: u8 = 0x04;
33/// Decompressed SCCA payload signature. It sits at byte offset 4 — the SCCA
34/// header is `[u32 version][b"SCCA"]…` (version values: 17 XP, 23 Vista/7,
35/// 26 Win8.1, 30 Win10, 31 Win11).
36pub const SCCA_SIGNATURE: &[u8; 4] = b"SCCA";
37/// Byte offset of [`SCCA_SIGNATURE`] within the decompressed payload.
38pub const SCCA_SIGNATURE_OFFSET: usize = 4;
39
40/// Decompress a (possibly MAM-wrapped) prefetch file into its raw `SCCA` bytes.
41///
42/// Recognizes the Win8.1+ `MAM\x04` Xpress-Huffman container (4-byte signature +
43/// 4-byte little-endian decompressed size, then the compressed stream) and passes
44/// an already-raw `SCCA` file through unchanged (Win7 and earlier).
45pub fn decompress(data: &[u8]) -> Result<Vec<u8>, PrefetchError> {
46    if data.len() < 8 {
47        return Err(PrefetchError::TooShort);
48    }
49    // A raw (uncompressed, Win7-era) prefetch IS the SCCA structure: a u32
50    // version at offset 0 and the SCCA signature at offset 4. Pass it through.
51    if &data[SCCA_SIGNATURE_OFFSET..SCCA_SIGNATURE_OFFSET + 4] == SCCA_SIGNATURE {
52        return Ok(data.to_vec());
53    }
54    if &data[0..3] != MAM_SIGNATURE || data[3] != MAM_XPRESS_HUFFMAN {
55        return Err(PrefetchError::BadSignature);
56    }
57    let decompressed_size = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
58    xpress_huffman::decompress(&data[8..], decompressed_size).map_err(PrefetchError::Decompress)
59}
60
61// --- SCCA structure parsing (v30/31 — Win10/11) ---------------------------
62
63/// A volume referenced by a prefetch file's `VolumeInformation` block.
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct VolumeInfo {
66    /// Device path, e.g. `\VOLUME{01d68d85e0da1e22-b0e0e8ff}`.
67    pub device_path: String,
68    /// Volume serial number (the 32-bit value Windows formats as 8 hex digits).
69    pub serial: u32,
70    /// Volume creation time, as a raw Windows `FILETIME` (100 ns ticks since 1601).
71    pub creation_time: i64,
72}
73
74/// The forensically-salient contents of a Windows prefetch file.
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct PrefetchInfo {
77    /// SCCA format version (30 = Win10, 31 = Win11).
78    pub version: u32,
79    /// The executable's base name (upper-cased by Windows), e.g. `COREUPDATER.EXE`.
80    pub executable: String,
81    /// Number of times the program has been run.
82    pub run_count: u32,
83    /// Up to eight most-recent run times, newest first, as raw `FILETIME` values.
84    pub last_run_times: Vec<i64>,
85    /// Volumes the program touched.
86    pub volumes: Vec<VolumeInfo>,
87    /// Files (full volume-relative paths) loaded during the traced runs.
88    pub filenames: Vec<String>,
89}
90
91/// Read a little-endian `u32` at `off`, or `None` if it would run past `d`.
92fn rd_u32(d: &[u8], off: usize) -> Option<u32> {
93    d.get(off..off + 4)
94        .map(|s| u32::from_le_bytes([s[0], s[1], s[2], s[3]]))
95}
96
97/// Read a little-endian `i64` at `off`, or `None` if it would run past `d`.
98fn rd_i64(d: &[u8], off: usize) -> Option<i64> {
99    d.get(off..off + 8).map(|s| {
100        let mut a = [0u8; 8];
101        a.copy_from_slice(s);
102        i64::from_le_bytes(a)
103    })
104}
105
106/// Decode a UTF-16LE string of `byte_len` bytes at `off`, truncated at the first
107/// NUL. `None` if the range runs past `d`.
108fn rd_utf16_z(d: &[u8], off: usize, byte_len: usize) -> Option<String> {
109    let s = d.get(off..off + byte_len)?;
110    let units: Vec<u16> = s
111        .chunks_exact(2)
112        .map(|c| u16::from_le_bytes([c[0], c[1]]))
113        .take_while(|&u| u != 0)
114        .collect();
115    Some(String::from_utf16_lossy(&units))
116}
117
118/// Parse a prefetch file (`MAM`-compressed or raw `SCCA`) into [`PrefetchInfo`].
119///
120/// Supports SCCA versions 30 (Win10) and 31 (Win11); other versions yield
121/// [`PrefetchError::UnsupportedVersion`].
122pub fn parse(file_bytes: &[u8]) -> Result<PrefetchInfo, PrefetchError> {
123    let scca = decompress(file_bytes)?;
124    parse_decompressed(&scca)
125}
126
127/// SCCA `FileInformation` block starts right after the 84-byte header.
128const FILE_INFO_OFFSET: usize = 84;
129/// Largest volume count we will trust from the header (allocation-bomb guard).
130const MAX_VOLUMES: u32 = 64;
131
132/// Parse an already-decompressed SCCA payload (version 30/31).
133pub fn parse_decompressed(scca: &[u8]) -> Result<PrefetchInfo, PrefetchError> {
134    if scca.len() < FILE_INFO_OFFSET {
135        return Err(PrefetchError::TooShort);
136    }
137    if scca.get(4..8) != Some(SCCA_SIGNATURE.as_slice()) {
138        return Err(PrefetchError::BadSignature);
139    }
140    let version = rd_u32(scca, 0).ok_or(PrefetchError::TooShort)?;
141    if version != 30 && version != 31 {
142        return Err(PrefetchError::UnsupportedVersion(version));
143    }
144
145    // Header: executable name is UTF-16, 60 bytes at offset 16.
146    let executable = rd_utf16_z(scca, 16, 60).ok_or(PrefetchError::TruncatedRecord)?;
147
148    // FileInformation fields are relative to FILE_INFO_OFFSET.
149    let fi = FILE_INFO_OFFSET;
150    let filename_off = rd_u32(scca, fi + 16).ok_or(PrefetchError::TruncatedRecord)? as usize;
151    let filename_sz = rd_u32(scca, fi + 20).ok_or(PrefetchError::TruncatedRecord)? as usize;
152    let volumes_off = rd_u32(scca, fi + 24).ok_or(PrefetchError::TruncatedRecord)? as usize;
153    let volume_count = rd_u32(scca, fi + 28).ok_or(PrefetchError::TruncatedRecord)?;
154
155    // Last run times: eight FILETIMEs at fi+44; keep the non-zero leading run.
156    let mut last_run_times = Vec::with_capacity(8);
157    for i in 0..8 {
158        match rd_i64(scca, fi + 44 + i * 8) {
159            Some(t) if t > 0 => last_run_times.push(t),
160            _ => break,
161        }
162    }
163
164    // Run count: newer Win10 builds shifted the counter back 8 bytes. The field
165    // at fi+120 is zero in the old layout; when non-zero, the count lives at
166    // fi+116 instead of fi+124.
167    let run_count = if rd_u32(scca, fi + 120).unwrap_or(0) == 0 {
168        rd_u32(scca, fi + 124).unwrap_or(0)
169    } else {
170        rd_u32(scca, fi + 116).unwrap_or(0)
171    };
172
173    let filenames = parse_filenames(scca, filename_off, filename_sz);
174    let volumes = parse_volumes(scca, volumes_off, volume_count.min(MAX_VOLUMES));
175
176    Ok(PrefetchInfo {
177        version,
178        executable,
179        run_count,
180        last_run_times,
181        volumes,
182        filenames,
183    })
184}
185
186/// Split the NUL-separated UTF-16LE filename strings block into paths.
187fn parse_filenames(scca: &[u8], off: usize, size: usize) -> Vec<String> {
188    let Some(block) = scca.get(off..off.saturating_add(size)) else {
189        return Vec::new();
190    };
191    let units: Vec<u16> = block
192        .chunks_exact(2)
193        .map(|c| u16::from_le_bytes([c[0], c[1]]))
194        .collect();
195    String::from_utf16_lossy(&units)
196        .split('\0')
197        .filter(|s| !s.is_empty())
198        .map(str::to_string)
199        .collect()
200}
201
202/// Parse `count` 96-byte volume records starting at `vol_off`.
203fn parse_volumes(scca: &[u8], vol_off: usize, count: u32) -> Vec<VolumeInfo> {
204    let mut out = Vec::with_capacity(count as usize);
205    for j in 0..count as usize {
206        let rec = vol_off + j * 96;
207        let (Some(dev_off), Some(dev_nchar), Some(ct), Some(serial)) = (
208            rd_u32(scca, rec).map(|v| v as usize),
209            rd_u32(scca, rec + 4).map(|v| v as usize),
210            rd_i64(scca, rec + 8),
211            rd_u32(scca, rec + 16),
212        ) else {
213            break;
214        };
215        let device_path = rd_utf16_z(scca, vol_off + dev_off, dev_nchar * 2).unwrap_or_default();
216        out.push(VolumeInfo {
217            device_path,
218            serial,
219            creation_time: ct,
220        });
221    }
222    out
223}
224
225#[cfg(test)]
226#[allow(clippy::unwrap_used)]
227mod tests {
228    use super::*;
229
230    // A real Win10 MAM-compressed prefetch file (Case 001 Desktop): the malware's
231    // own prefetch. Header `MAM\x04` + decompressed size 0x5efc = 24316.
232    const COREUPDATER: &[u8] = include_bytes!("../../tests/data/COREUPDATER.EXE-157C54BB.pf");
233    const AUDIODG: &[u8] = include_bytes!("../../tests/data/AUDIODG.EXE-AB22E9A6.pf");
234
235    #[test]
236    fn mam_header_rejects_non_prefetch() {
237        // 8+ bytes, neither SCCA nor MAM\x04 → BadSignature.
238        assert_eq!(
239            decompress(b"NOPE\x00\x00\x00\x00").err(),
240            Some(PrefetchError::BadSignature)
241        );
242        // wrong MAM compression byte → BadSignature.
243        assert_eq!(
244            decompress(b"MAM\x02\x00\x00\x00\x00").err(),
245            Some(PrefetchError::BadSignature)
246        );
247        // shorter than the 8-byte MAM header → TooShort.
248        assert_eq!(decompress(b"MA").err(), Some(PrefetchError::TooShort));
249    }
250
251    #[test]
252    fn raw_scca_passes_through() {
253        // A raw (Win7-era) prefetch: u32 version at 0, SCCA at offset 4.
254        let mut raw = 23u32.to_le_bytes().to_vec();
255        raw.extend_from_slice(b"SCCA");
256        raw.extend_from_slice(&[0u8; 20]);
257        assert_eq!(decompress(&raw).unwrap(), raw);
258    }
259
260    /// The load-bearing oracle: decompressing the REAL malware prefetch must yield
261    /// the exact declared size and a valid `SCCA` payload.
262    #[test]
263    fn decompresses_real_win10_prefetch_to_scca() {
264        // header: MAM\x04 + LE u32 size
265        assert_eq!(&COREUPDATER[0..3], b"MAM");
266        assert_eq!(COREUPDATER[3], 0x04);
267        let declared = u32::from_le_bytes([
268            COREUPDATER[4],
269            COREUPDATER[5],
270            COREUPDATER[6],
271            COREUPDATER[7],
272        ]) as usize;
273
274        // decompressed length must match the MAM header.
275        let out = decompress(COREUPDATER).unwrap();
276        assert_eq!(out.len(), declared);
277        // SCCA header: [u32 version][b"SCCA"]. The malware ran on the Win10
278        // Desktop → version 30.
279        assert_eq!(
280            &out[SCCA_SIGNATURE_OFFSET..SCCA_SIGNATURE_OFFSET + 4],
281            SCCA_SIGNATURE
282        );
283        assert_eq!(u32::from_le_bytes([out[0], out[1], out[2], out[3]]), 30);
284    }
285
286    #[test]
287    fn decompresses_second_real_prefetch() {
288        let out = decompress(AUDIODG).unwrap();
289        assert_eq!(
290            &out[SCCA_SIGNATURE_OFFSET..SCCA_SIGNATURE_OFFSET + 4],
291            SCCA_SIGNATURE
292        );
293    }
294
295    /// Ground truth from the real malware prefetch (probed from the decompressed
296    /// SCCA v30 payload): the executable, a single run, its run time, the one
297    /// volume's serial/path, and the 51 accessed files.
298    #[test]
299    fn parses_real_coreupdater_scca() {
300        let info = parse(COREUPDATER).unwrap();
301        assert_eq!(info.version, 30);
302        assert_eq!(info.executable, "COREUPDATER.EXE");
303        assert_eq!(info.run_count, 1);
304        assert_eq!(info.last_run_times, vec![132_449_604_494_103_203]);
305        assert_eq!(info.volumes.len(), 1);
306        assert_eq!(info.volumes[0].serial, 0xB0E0_E8FF);
307        assert_eq!(
308            info.volumes[0].device_path,
309            r"\VOLUME{01d68d85e0da1e22-b0e0e8ff}"
310        );
311        assert_eq!(info.filenames.len(), 51);
312        assert!(info.filenames.iter().any(|f| f.ends_with("NTDLL.DLL")));
313        assert!(info
314            .filenames
315            .iter()
316            .any(|f| f.ends_with("COREUPDATER.EXE")));
317    }
318
319    /// AUDIODG ran 8 times: the Win10 run-counter shift must resolve to 8, with
320    /// all 8 last-run timestamps recovered.
321    #[test]
322    fn parses_audiodg_run_count_and_times() {
323        let info = parse(AUDIODG).unwrap();
324        assert_eq!(info.run_count, 8);
325        assert_eq!(info.last_run_times.len(), 8);
326        assert_eq!(info.last_run_times[0], 132_449_663_254_875_727);
327        assert_eq!(info.filenames.len(), 79);
328    }
329
330    #[test]
331    fn parse_rejects_unsupported_version() {
332        // A raw SCCA payload claiming version 23 (Vista/7) — unsupported layout.
333        let mut p = vec![0u8; 256];
334        p[0..4].copy_from_slice(&23u32.to_le_bytes());
335        p[4..8].copy_from_slice(b"SCCA");
336        assert_eq!(parse(&p).err(), Some(PrefetchError::UnsupportedVersion(23)));
337    }
338
339    fn put16(buf: &mut [u8], off: usize, s: &str) {
340        for (i, u) in s.encode_utf16().enumerate() {
341            buf[off + i * 2..off + i * 2 + 2].copy_from_slice(&u.to_le_bytes());
342        }
343    }
344
345    /// Build a minimal valid SCCA v30 payload: one volume, one filename.
346    /// `old_run_count`: leave `FileInfo+120` zero so the count is read from `+124`
347    /// (the pre-shift Win10 layout); otherwise use the shifted `+116`.
348    fn build_scca(old_run_count: bool) -> Vec<u8> {
349        let mut p = vec![0u8; 84 + 224];
350        p[0..4].copy_from_slice(&30u32.to_le_bytes());
351        p[4..8].copy_from_slice(b"SCCA");
352        put16(&mut p, 16, "X.EXE");
353
354        let fname = r"\VOL\X.EXE";
355        let mut fbytes = vec![0u8; (fname.encode_utf16().count() + 1) * 2];
356        put16(&mut fbytes, 0, fname); // trailing NUL already zeroed
357        let fname_off = p.len();
358        p.extend_from_slice(&fbytes);
359
360        let vol_off = p.len();
361        let dev = r"\VOLUME{abcd}";
362        let dev_nchar = dev.encode_utf16().count();
363        let mut vol = vec![0u8; 96];
364        vol[0..4].copy_from_slice(&96u32.to_le_bytes()); // device-name offset (rel)
365        vol[4..8].copy_from_slice(&(dev_nchar as u32).to_le_bytes());
366        vol[8..16].copy_from_slice(&123i64.to_le_bytes()); // creation time
367        vol[16..20].copy_from_slice(&0xDEAD_BEEFu32.to_le_bytes()); // serial
368        p.extend_from_slice(&vol);
369        let mut dbytes = vec![0u8; dev_nchar * 2];
370        put16(&mut dbytes, 0, dev);
371        p.extend_from_slice(&dbytes);
372        let vol_size = (p.len() - vol_off) as u32;
373
374        let fi = FILE_INFO_OFFSET;
375        p[fi + 16..fi + 20].copy_from_slice(&(fname_off as u32).to_le_bytes());
376        p[fi + 20..fi + 24].copy_from_slice(&(fbytes.len() as u32).to_le_bytes());
377        p[fi + 24..fi + 28].copy_from_slice(&(vol_off as u32).to_le_bytes());
378        p[fi + 28..fi + 32].copy_from_slice(&1u32.to_le_bytes());
379        p[fi + 32..fi + 36].copy_from_slice(&vol_size.to_le_bytes());
380        p[fi + 44..fi + 52].copy_from_slice(&1000i64.to_le_bytes()); // one run time
381        if old_run_count {
382            p[fi + 124..fi + 128].copy_from_slice(&5u32.to_le_bytes());
383        } else {
384            p[fi + 120..fi + 124].copy_from_slice(&3u32.to_le_bytes());
385            p[fi + 116..fi + 120].copy_from_slice(&7u32.to_le_bytes());
386        }
387        p
388    }
389
390    #[test]
391    fn parses_synthetic_scca_old_and_new_run_count() {
392        let info = parse_decompressed(&build_scca(true)).unwrap();
393        assert_eq!(info.executable, "X.EXE");
394        assert_eq!(info.run_count, 5); // FileInfo+124 (old layout)
395        assert_eq!(info.last_run_times, vec![1000]);
396        assert_eq!(info.volumes.len(), 1);
397        assert_eq!(info.volumes[0].serial, 0xDEAD_BEEF);
398        assert_eq!(info.volumes[0].device_path, r"\VOLUME{abcd}");
399        assert_eq!(info.filenames, vec![r"\VOL\X.EXE".to_string()]);
400
401        let shifted = parse_decompressed(&build_scca(false)).unwrap();
402        assert_eq!(shifted.run_count, 7); // FileInfo+116 (shifted layout)
403    }
404
405    #[test]
406    fn parse_decompressed_rejects_short_and_unsigned() {
407        assert_eq!(
408            parse_decompressed(&[0u8; 50]).err(),
409            Some(PrefetchError::TooShort)
410        );
411        // ≥84 bytes but no SCCA at offset 4.
412        assert_eq!(
413            parse_decompressed(&[0u8; 100]).err(),
414            Some(PrefetchError::BadSignature)
415        );
416    }
417
418    #[test]
419    fn truncated_filename_and_volume_offsets_degrade_gracefully() {
420        let fi = FILE_INFO_OFFSET;
421        let mut p = build_scca(true);
422        let past = (p.len() as u32) + 1000;
423        p[fi + 16..fi + 20].copy_from_slice(&past.to_le_bytes()); // filenames off past EOF
424        assert!(parse_decompressed(&p).unwrap().filenames.is_empty());
425
426        let mut q = build_scca(true);
427        q[fi + 24..fi + 28].copy_from_slice(&past.to_le_bytes()); // volumes off past EOF
428        assert!(parse_decompressed(&q).unwrap().volumes.is_empty());
429    }
430}