Skip to main content

ntfs_core/
data.rs

1//! Reconstructing an attribute's bytes — resident inline, or non-resident by
2//! following its runlist across the volume.
3//!
4//! Sparse runs yield zeroes without touching the disk; real runs are read at
5//! `lcn × cluster_size`. The output is bounded by the attribute's real size and
6//! by the bytes its runs actually allocate, and every size is checked — a
7//! crafted runlist cannot trigger an unbounded allocation or an out-of-range
8//! seek.
9
10use std::io::{Read, Seek, SeekFrom};
11
12use crate::attribute::{Attribute, AttributeBody};
13use crate::error::{NtfsError, Result};
14use crate::runlist::{self, Run};
15
16/// Hard ceiling on a single reconstructed value (1 TiB) — far above any real
17/// artifact, but stops an allocation bomb from a crafted size.
18const MAX_VALUE_BYTES: u64 = 1 << 40;
19
20/// Read `real_size` bytes of a file described by `runs`, from `reader`.
21///
22/// The result is `min(real_size, bytes the runs allocate)` bytes long; sparse
23/// runs contribute zeroes.
24///
25/// # Errors
26///
27/// [`NtfsError::BadRunlist`] on arithmetic overflow, [`NtfsError::TooLarge`]
28/// when the requested size is implausible, or [`NtfsError::Io`] on read failure.
29pub fn read_runs<R: Read + Seek>(
30    reader: &mut R,
31    runs: &[Run],
32    cluster_size: u64,
33    real_size: u64,
34) -> Result<Vec<u8>> {
35    // Bytes the runs allocate (checked); the value can't exceed this.
36    let mut allocated = 0u64;
37    for r in runs {
38        let run_bytes = r
39            .length
40            .checked_mul(cluster_size)
41            .ok_or(NtfsError::BadRunlist("run byte length overflow"))?;
42        allocated = allocated
43            .checked_add(run_bytes)
44            .ok_or(NtfsError::BadRunlist("allocation overflow"))?;
45    }
46
47    let want = real_size.min(allocated);
48    if want > MAX_VALUE_BYTES {
49        return Err(NtfsError::TooLarge { bytes: want });
50    }
51    let want_usize = usize::try_from(want).map_err(|_| NtfsError::TooLarge { bytes: want })?;
52
53    let mut out: Vec<u8> = Vec::new();
54    out.try_reserve_exact(want_usize)
55        .map_err(|_| NtfsError::TooLarge { bytes: want })?;
56
57    let mut remaining = want;
58    for r in runs {
59        if remaining == 0 {
60            break;
61        }
62        let run_bytes = r.length * cluster_size; // already checked above
63        let take = run_bytes.min(remaining);
64        let take_usize = take as usize; // ≤ want ≤ MAX_VALUE_BYTES, fits usize
65
66        match r.lcn {
67            None => out.resize(out.len() + take_usize, 0), // sparse hole → zeroes
68            Some(lcn) => {
69                let byte_off = lcn
70                    .checked_mul(cluster_size)
71                    .ok_or(NtfsError::BadRunlist("LCN byte offset overflow"))?;
72                reader.seek(SeekFrom::Start(byte_off))?;
73                let start = out.len();
74                out.resize(start + take_usize, 0);
75                reader.read_exact(&mut out[start..])?;
76            }
77        }
78        remaining -= take;
79    }
80
81    Ok(out)
82}
83
84/// Read an attribute's value, dispatching on resident vs non-resident.
85///
86/// `record` is the (fixed-up) MFT record the attribute lives in; `reader` is the
87/// volume; `cluster_size` is from the boot sector.
88///
89/// # Errors
90///
91/// As [`read_runs`], plus [`NtfsError::BadAttribute`] when a resident value or
92/// the runlist slice is out of bounds.
93pub fn read_attribute_value<R: Read + Seek>(
94    reader: &mut R,
95    record: &[u8],
96    attribute: &Attribute,
97    cluster_size: u64,
98) -> Result<Vec<u8>> {
99    match attribute.body {
100        AttributeBody::Resident { .. } => attribute
101            .resident_content(record)
102            .map(<[u8]>::to_vec)
103            .ok_or(NtfsError::BadAttribute {
104                offset: attribute.offset,
105                detail: "resident content out of bounds",
106            }),
107        AttributeBody::NonResident { real_size, .. } => {
108            let runs = attribute_runlist(record, attribute)?;
109            read_nonresident(reader, &runs, cluster_size, real_size, attribute)
110        }
111    }
112}
113
114/// Read a non-resident `$DATA` value from its (already-assembled) runlist,
115/// LZNT1-decompressing when `attribute` is compressed.
116///
117/// This is the **single** dispatch point for both the single-record path
118/// ([`read_attribute_value`]) and the split-runlist path
119/// (`NtfsFs::read_data_stream`, which concatenates a `$DATA` spread across
120/// `$ATTRIBUTE_LIST` extension records) — so neither can read a compressed file
121/// as raw bytes. (A compressed `$DATA` stores data in `2^compression_unit`
122/// clusters; `checked_shl` rejects an implausible crafted unit before overflow.)
123///
124/// # Errors
125///
126/// As [`read_runs`] / [`read_compressed_runs`], plus [`NtfsError::BadAttribute`]
127/// for an implausible compression unit.
128pub(crate) fn read_nonresident<R: Read + Seek>(
129    reader: &mut R,
130    runs: &[Run],
131    cluster_size: u64,
132    real_size: u64,
133    attribute: &Attribute,
134) -> Result<Vec<u8>> {
135    let cu = attribute.compression_unit();
136    if attribute.is_compressed() && cu != 0 {
137        let unit_clusters = 1u64
138            .checked_shl(u32::from(cu))
139            .ok_or(NtfsError::BadAttribute {
140                offset: attribute.offset,
141                detail: "implausible compression unit",
142            })?;
143        read_compressed_runs(reader, runs, cluster_size, real_size, unit_clusters)
144    } else {
145        read_runs(reader, runs, cluster_size, real_size)
146    }
147}
148
149/// Read a COMPRESSED non-resident attribute, LZNT1-decompressing each
150/// compression unit. NTFS stores a compressed file in `unit_clusters`-cluster
151/// units; within a unit the data is either: fully allocated (stored
152/// uncompressed → copied verbatim), partially allocated then sparse-padded (the
153/// allocated clusters hold the LZNT1 stream → decompressed), or fully sparse (a
154/// unit of zeroes). The result is truncated to `real_size`.
155///
156/// # Errors
157///
158/// [`NtfsError::TooLarge`] for an implausible size, [`NtfsError::BadRunlist`] on
159/// arithmetic overflow, [`NtfsError::BadCompression`] on a malformed LZNT1
160/// stream, or [`NtfsError::Io`] on read failure.
161fn read_compressed_runs<R: Read + Seek>(
162    reader: &mut R,
163    runs: &[Run],
164    cluster_size: u64,
165    real_size: u64,
166    unit_clusters: u64,
167) -> Result<Vec<u8>> {
168    if real_size > MAX_VALUE_BYTES {
169        return Err(NtfsError::TooLarge { bytes: real_size });
170    }
171    let unit_bytes = unit_clusters
172        .checked_mul(cluster_size)
173        .ok_or(NtfsError::BadRunlist("compression unit byte size overflow"))?;
174
175    // Run cursor: (lcn, remaining_clusters), mutated as clusters are consumed.
176    let mut queue: std::collections::VecDeque<(Option<u64>, u64)> =
177        runs.iter().map(|r| (r.lcn, r.length)).collect();
178    let real_size_usize =
179        usize::try_from(real_size).map_err(|_| NtfsError::TooLarge { bytes: real_size })?;
180    let mut out: Vec<u8> = Vec::new();
181
182    while (out.len() as u64) < real_size {
183        // Gather exactly one compression unit (`unit_clusters` of VCN) from the
184        // runlist; the leading allocated clusters (if any) hold the unit's bytes.
185        let mut real_bytes: Vec<u8> = Vec::new();
186        let mut real_clusters = 0u64;
187        let mut got = 0u64;
188        while got < unit_clusters {
189            let Some((lcn, avail)) = queue.front_mut() else {
190                break;
191            };
192            let take = (unit_clusters - got).min(*avail);
193            if let Some(l) = *lcn {
194                let byte_off = l
195                    .checked_mul(cluster_size)
196                    .ok_or(NtfsError::BadRunlist("LCN byte offset overflow"))?;
197                let nbytes =
198                    usize::try_from(take * cluster_size).map_err(|_| NtfsError::TooLarge {
199                        bytes: take * cluster_size,
200                    })?;
201                reader.seek(SeekFrom::Start(byte_off))?;
202                let start = real_bytes.len();
203                real_bytes.resize(start + nbytes, 0);
204                reader.read_exact(&mut real_bytes[start..])?;
205                real_clusters += take;
206                *lcn = Some(l + take); // advance the LCN within this run
207            }
208            *avail -= take;
209            if *avail == 0 {
210                queue.pop_front();
211            }
212            got += take;
213        }
214        if got == 0 {
215            break; // runlist exhausted
216        }
217
218        if real_clusters == 0 {
219            // Fully sparse unit → a unit of zeroes (bounded by the file tail).
220            let want = unit_bytes.min(real_size - out.len() as u64);
221            let want = usize::try_from(want).map_err(|_| NtfsError::TooLarge { bytes: want })?;
222            out.resize(out.len() + want, 0);
223        } else if real_clusters == unit_clusters {
224            // Fully allocated → stored uncompressed; append verbatim.
225            out.extend_from_slice(&real_bytes);
226        } else {
227            // Partially allocated → the allocated clusters hold the LZNT1 stream.
228            // A unit decodes to at most unit_bytes; cap it so any trailing cluster
229            // slack the decoder ran into can't misalign later units (the final
230            // unit is bounded by the truncate to real_size below).
231            let mut decompressed = Vec::new();
232            lznt1::decompress(&real_bytes, &mut decompressed)
233                .map_err(|_| NtfsError::BadCompression("LZNT1 decode failed"))?;
234            decompressed.truncate(unit_bytes as usize);
235            out.extend_from_slice(&decompressed);
236        }
237    }
238
239    out.truncate(real_size_usize);
240    Ok(out)
241}
242
243/// Decode the data-run list of a non-resident attribute from its (fixed-up)
244/// record bytes.
245///
246/// Reused to assemble a split `$DATA` whose runlist spans several `$DATA`
247/// attributes in different MFT records (via `$ATTRIBUTE_LIST`).
248///
249/// # Errors
250///
251/// [`NtfsError::BadAttribute`] for a resident attribute or an out-of-bounds
252/// runlist; [`NtfsError::BadRunlist`] for a malformed runlist.
253pub fn attribute_runlist(record: &[u8], attribute: &Attribute) -> Result<Vec<Run>> {
254    let AttributeBody::NonResident { runs_offset, .. } = attribute.body else {
255        return Err(NtfsError::BadAttribute {
256            offset: attribute.offset,
257            detail: "attribute is resident (no runlist)",
258        });
259    };
260    let attr_end = attribute
261        .offset
262        .checked_add(attribute.length as usize)
263        .ok_or(NtfsError::BadAttribute {
264            offset: attribute.offset,
265            detail: "attribute length overflow",
266        })?;
267    let runs_start =
268        attribute
269            .offset
270            .checked_add(runs_offset as usize)
271            .ok_or(NtfsError::BadAttribute {
272                offset: attribute.offset,
273                detail: "runs offset overflow",
274            })?;
275    let runs_bytes = record
276        .get(runs_start..attr_end)
277        .ok_or(NtfsError::BadAttribute {
278            offset: attribute.offset,
279            detail: "runlist out of bounds",
280        })?;
281    runlist::decode(runs_bytes)
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287    use std::io::Cursor;
288
289    /// A volume where cluster `c` is filled with byte value `c as u8`.
290    fn volume(clusters: usize, cluster_size: usize) -> Cursor<Vec<u8>> {
291        let mut v = vec![0u8; clusters * cluster_size];
292        for c in 0..clusters {
293            let b = c as u8;
294            for x in &mut v[c * cluster_size..(c + 1) * cluster_size] {
295                *x = b;
296            }
297        }
298        Cursor::new(v)
299    }
300
301    #[test]
302    fn reads_single_run() {
303        let mut vol = volume(4, 512);
304        // One run: 2 clusters starting at LCN 1.
305        let runs = [Run {
306            length: 2,
307            lcn: Some(1),
308        }];
309        let out = read_runs(&mut vol, &runs, 512, 1024).unwrap();
310        assert_eq!(out.len(), 1024);
311        assert!(out[..512].iter().all(|&b| b == 1));
312        assert!(out[512..].iter().all(|&b| b == 2));
313    }
314
315    #[test]
316    fn sparse_run_yields_zeroes_without_reading() {
317        let mut vol = volume(1, 512); // too small to read 2 clusters — proves no read
318        let runs = [Run {
319            length: 2,
320            lcn: None,
321        }];
322        let out = read_runs(&mut vol, &runs, 512, 1024).unwrap();
323        assert_eq!(out.len(), 1024);
324        assert!(out.iter().all(|&b| b == 0));
325    }
326
327    #[test]
328    fn truncates_to_real_size() {
329        let mut vol = volume(4, 512);
330        let runs = [Run {
331            length: 2,
332            lcn: Some(0),
333        }]; // 1024 allocated
334        let out = read_runs(&mut vol, &runs, 512, 600).unwrap();
335        assert_eq!(out.len(), 600);
336    }
337
338    #[test]
339    fn mixed_data_and_sparse() {
340        let mut vol = volume(4, 512);
341        let runs = [
342            Run {
343                length: 1,
344                lcn: Some(3),
345            }, // cluster 3 → all 3s
346            Run {
347                length: 1,
348                lcn: None,
349            }, // sparse → zeros
350        ];
351        let out = read_runs(&mut vol, &runs, 512, 1024).unwrap();
352        assert!(out[..512].iter().all(|&b| b == 3));
353        assert!(out[512..].iter().all(|&b| b == 0));
354    }
355
356    #[test]
357    fn refuses_implausible_size() {
358        // A crafted runlist that *allocates* far more than the ceiling — a
359        // single sparse run of 2^40 clusters. (A huge real_size alone is
360        // harmless: it is clamped to what the runs actually allocate.)
361        let mut vol = volume(1, 512);
362        let runs = [Run {
363            length: 1 << 40,
364            lcn: None,
365        }];
366        assert!(matches!(
367            read_runs(&mut vol, &runs, 512, u64::MAX),
368            Err(NtfsError::TooLarge { .. })
369        ));
370    }
371
372    #[test]
373    fn rejects_cluster_size_overflow() {
374        let mut vol = volume(1, 512);
375        let runs = [Run {
376            length: u64::MAX,
377            lcn: Some(0),
378        }];
379        assert!(matches!(
380            read_runs(&mut vol, &runs, 512, 1024),
381            Err(NtfsError::BadRunlist(_))
382        ));
383    }
384
385    // ── read_attribute_value dispatch ─────────────────────────────────────────
386
387    #[test]
388    fn reads_resident_value() {
389        use forensicnomicon::ntfs::attr_types;
390        // Build a one-attribute record with resident $DATA content "hello".
391        let content = b"hello";
392        // Minimal resident attribute laid out by hand at record offset 0x10.
393        let attr_off = 0x10usize;
394        let mut record = vec![0u8; attr_off];
395        // header: type, length, resident, name_len 0, name_off, flags, id
396        let name_offset = 0x18u16;
397        let content_offset = 0x18u16;
398        let length = (content_offset as usize + content.len() + 7) & !7;
399        let mut a = vec![0u8; length];
400        a[0x00..0x04].copy_from_slice(&attr_types::DATA.to_le_bytes());
401        a[0x04..0x08].copy_from_slice(&(length as u32).to_le_bytes());
402        a[0x0A..0x0C].copy_from_slice(&name_offset.to_le_bytes());
403        a[0x10..0x14].copy_from_slice(&(content.len() as u32).to_le_bytes());
404        a[0x14..0x16].copy_from_slice(&content_offset.to_le_bytes());
405        a[content_offset as usize..content_offset as usize + content.len()]
406            .copy_from_slice(content);
407        record.extend_from_slice(&a);
408        record.extend_from_slice(&attr_types::END.to_le_bytes());
409
410        let attrs = crate::attribute::parse_attributes(&record, attr_off).unwrap();
411        let mut vol = volume(1, 512);
412        let out = read_attribute_value(&mut vol, &record, &attrs[0], 512).unwrap();
413        assert_eq!(out, b"hello");
414    }
415
416    #[test]
417    fn reads_nonresident_value_via_runlist() {
418        use forensicnomicon::ntfs::attr_types;
419        // Non-resident $DATA: runlist of 1 cluster @ LCN 2, real size 512.
420        let runs_bytes = [0x11u8, 0x01, 0x02, 0x00]; // len 1, lcn delta +2
421        let attr_off = 0x10usize;
422        let mut record = vec![0u8; attr_off];
423        let runs_offset = 0x40u16;
424        let length = ((runs_offset as usize + runs_bytes.len()) + 7) & !7;
425        let mut a = vec![0u8; length];
426        a[0x00..0x04].copy_from_slice(&attr_types::DATA.to_le_bytes());
427        a[0x04..0x08].copy_from_slice(&(length as u32).to_le_bytes());
428        a[0x08] = 1; // non-resident
429        a[0x0A..0x0C].copy_from_slice(&runs_offset.to_le_bytes()); // name offset (no name)
430        a[0x20..0x22].copy_from_slice(&runs_offset.to_le_bytes()); // runs offset
431        a[0x28..0x30].copy_from_slice(&512u64.to_le_bytes()); // allocated
432        a[0x30..0x38].copy_from_slice(&512u64.to_le_bytes()); // real size
433        a[runs_offset as usize..runs_offset as usize + runs_bytes.len()]
434            .copy_from_slice(&runs_bytes);
435        record.extend_from_slice(&a);
436        record.extend_from_slice(&attr_types::END.to_le_bytes());
437
438        let attrs = crate::attribute::parse_attributes(&record, attr_off).unwrap();
439        let mut vol = volume(4, 512); // cluster 2 → all 2s
440        let out = read_attribute_value(&mut vol, &record, &attrs[0], 512).unwrap();
441        assert_eq!(out.len(), 512);
442        assert!(out.iter().all(|&b| b == 2));
443    }
444
445    #[test]
446    fn reads_compressed_nonresident_value() {
447        use forensicnomicon::ntfs::attr_types;
448        // A COMPRESSED $DATA: one 16-cluster compression unit made of 1 real
449        // cluster (the LZNT1 stream) + 15 sparse clusters. real_size = 100 bytes.
450        // The stream is a single *uncompressed* LZNT1 chunk (header bit15=0,
451        // low-12 = size-1), which is valid LZNT1 the decompressor copies verbatim
452        // — lets us build a real compressed-unit fixture without a compressor.
453        let content = vec![0xABu8; 100];
454        let mut stream = Vec::new();
455        stream.extend_from_slice(&(content.len() as u16 - 1).to_le_bytes()); // 0x0063
456        stream.extend_from_slice(&content);
457
458        // runlist: 0x11 len=1 lcn+2 | 0x01 len=15 sparse | 0x00 end
459        let runs_bytes = [0x11u8, 0x01, 0x02, 0x01, 0x0F, 0x00];
460        let attr_off = 0x10usize;
461        let mut record = vec![0u8; attr_off];
462        let runs_offset = 0x40u16;
463        let length = ((runs_offset as usize + runs_bytes.len()) + 7) & !7;
464        let mut a = vec![0u8; length];
465        a[0x00..0x04].copy_from_slice(&attr_types::DATA.to_le_bytes());
466        a[0x04..0x08].copy_from_slice(&(length as u32).to_le_bytes());
467        a[0x08] = 1; // non-resident
468        a[0x0A..0x0C].copy_from_slice(&runs_offset.to_le_bytes()); // name offset (no name)
469        a[0x0C..0x0E].copy_from_slice(&0x0001u16.to_le_bytes()); // flags: COMPRESSED
470        a[0x20..0x22].copy_from_slice(&runs_offset.to_le_bytes()); // runs offset
471        a[0x22..0x24].copy_from_slice(&4u16.to_le_bytes()); // compression_unit = 4 → 16 clusters
472        a[0x28..0x30].copy_from_slice(&(16u64 * 512).to_le_bytes()); // allocated 8192
473        a[0x30..0x38].copy_from_slice(&(content.len() as u64).to_le_bytes()); // real size 100
474        a[runs_offset as usize..runs_offset as usize + runs_bytes.len()]
475            .copy_from_slice(&runs_bytes);
476        record.extend_from_slice(&a);
477        record.extend_from_slice(&attr_types::END.to_le_bytes());
478
479        let cluster_size = 512usize;
480        let mut disk = vec![0u8; 16 * cluster_size];
481        disk[2 * cluster_size..2 * cluster_size + stream.len()].copy_from_slice(&stream);
482        let mut vol = std::io::Cursor::new(disk);
483
484        let attrs = crate::attribute::parse_attributes(&record, attr_off).unwrap();
485        let out = read_attribute_value(&mut vol, &record, &attrs[0], 512).unwrap();
486        assert_eq!(
487            out, content,
488            "compressed $DATA must be LZNT1-decompressed, not returned raw"
489        );
490    }
491
492    #[test]
493    fn stops_reading_once_real_size_is_met() {
494        // real_size covers only the first run; the second run must not be read.
495        let mut vol = volume(4, 512);
496        let runs = [
497            Run {
498                length: 1,
499                lcn: Some(0),
500            },
501            Run {
502                length: 1,
503                lcn: Some(1),
504            },
505        ];
506        let out = read_runs(&mut vol, &runs, 512, 512).unwrap();
507        assert_eq!(out.len(), 512); // only the first run's worth
508    }
509
510    #[test]
511    fn rejects_runlist_region_out_of_bounds() {
512        use crate::attribute::{Attribute, AttributeBody};
513        // runs_offset points past the attribute, so the runlist slice is invalid.
514        let attr = Attribute {
515            type_code: forensicnomicon::ntfs::attr_types::DATA,
516            length: 0x48,
517            non_resident: true,
518            name: None,
519            flags: 0,
520            attribute_id: 0,
521            offset: 0,
522            body: AttributeBody::NonResident {
523                start_vcn: 0,
524                last_vcn: 0,
525                runs_offset: 0xFFFF,
526                compression_unit: 0,
527                allocated_size: 512,
528                real_size: 512,
529                initialized_size: 512,
530            },
531        };
532        let record = vec![0u8; 0x48];
533        let mut vol = volume(1, 512);
534        assert!(matches!(
535            read_attribute_value(&mut vol, &record, &attr, 512),
536            Err(NtfsError::BadAttribute { detail, .. }) if detail == "runlist out of bounds"
537        ));
538    }
539
540    #[test]
541    fn rejects_runs_offset_overflow() {
542        use crate::attribute::{Attribute, AttributeBody};
543        // offset + length stays in range, but offset + runs_offset overflows.
544        let attr = Attribute {
545            type_code: forensicnomicon::ntfs::attr_types::DATA,
546            length: 0x48,
547            non_resident: true,
548            name: None,
549            flags: 0,
550            attribute_id: 0,
551            offset: usize::MAX - 0x48,
552            body: AttributeBody::NonResident {
553                start_vcn: 0,
554                last_vcn: 0,
555                runs_offset: 0x49,
556                compression_unit: 0,
557                allocated_size: 512,
558                real_size: 512,
559                initialized_size: 512,
560            },
561        };
562        let record = vec![0u8; 1];
563        let mut vol = volume(1, 512);
564        assert!(matches!(
565            read_attribute_value(&mut vol, &record, &attr, 512),
566            Err(NtfsError::BadAttribute { detail, .. }) if detail == "runs offset overflow"
567        ));
568    }
569
570    #[test]
571    fn attribute_runlist_rejects_resident_attribute() {
572        let attr = Attribute {
573            type_code: forensicnomicon::ntfs::attr_types::DATA,
574            length: 0x20,
575            non_resident: false,
576            name: None,
577            flags: 0,
578            attribute_id: 0,
579            offset: 0,
580            body: AttributeBody::Resident {
581                content_offset: 0x18,
582                content_length: 4,
583            },
584        };
585        assert!(matches!(
586            attribute_runlist(&[0u8; 0x20], &attr),
587            Err(NtfsError::BadAttribute { detail, .. }) if detail.contains("resident")
588        ));
589    }
590}