Skip to main content

fstool/fs/ntfs/
mod.rs

1//! NTFS — Microsoft's NT File System. Read implementation.
2//!
3//! ## Status
4//!
5//! Detection, MFT decode, attribute decode, directory walking via
6//! $INDEX_ROOT + $INDEX_ALLOCATION, and streaming reads of $DATA streams
7//! (resident, non-resident, sparse, LZNT1-compressed, and alternate data
8//! streams) are implemented. The driver follows `$ATTRIBUTE_LIST` spill
9//! across multiple MFT records, resolves shared security descriptors via
10//! `$Secure:$SDS` (looked up through `$Secure:$SII`), and case-folds
11//! directory lookups through the `$UpCase` table. Write support is out
12//! of scope.
13//!
14//! ## Reference
15//!
16//! - Microsoft "[MS-FSCC] File System Control Codes":
17//!   <https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/>
18//! - Microsoft "[MS-XCA]" §2.5 ("LZNT1 Algorithm Details"):
19//!   <https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-xca/>
20//! - Linux kernel "NTFS3" docs:
21//!   <https://docs.kernel.org/filesystems/ntfs3.html>
22//! - "NTFS Documentation" by Richard Russon and Yuval Fledel.
23//!
24//! ## Attribute model — non-Unix metadata
25//!
26//! NTFS metadata doesn't map cleanly onto POSIX. The shape we adopt for
27//! cross-FS conversion is:
28//!
29//! | NTFS concept                              | xattr key                                | Notes                                                    |
30//! |-------------------------------------------|------------------------------------------|----------------------------------------------------------|
31//! | `$STANDARD_INFORMATION.file_attributes`   | `user.ntfs.dos_attrs`                    | 32-bit LE: READONLY/HIDDEN/SYSTEM/ARCHIVE/COMPRESSED/etc |
32//! | Object ID GUID (`$OBJECT_ID`)             | `user.ntfs.object_id`                    | 16 bytes raw GUID                                        |
33//! | Reparse point tag + data (`$REPARSE_POINT`)| `user.ntfs.reparse`                     | Tag (LE u32) prepended to raw reparse data               |
34//! | Alternate Data Streams (named `$DATA`)    | `user.ntfs.ads.<name>`                   | Per-stream xattr; binary stream contents                 |
35//! | `$SECURITY_DESCRIPTOR` (raw NT SD blob)   | `system.ntfs_security`                   | Resident attribute, OR resolved from `$Secure:$SDS`      |
36//! |                                           |                                          | via `$STANDARD_INFORMATION.security_id`                  |
37//! | Short (8.3) filename                      | `user.ntfs.short_name`                   | UTF-16LE per `$FILE_NAME` with namespace=DOS             |
38//! | Last-write / creation / change / access   | inode timestamps + `user.ntfs.times.raw` | The latter holds all four NT-FILETIME (100 ns) values    |
39//!
40//! ### NTFS → NTFS round-trip guarantee
41//!
42//! The cross-FS xattr mapping above is lossy at the sub-100ns level. For
43//! NTFS-to-NTFS transfers, the writer copies raw attribute byte streams
44//! verbatim rather than going through this mapping.
45//!
46//! ### Reparse points
47//!
48//! `$REPARSE_POINT` data is surfaced via `user.ntfs.reparse` (tag + raw
49//! data). This driver does NOT follow junctions, symlinks or any other
50//! reparse-point type — the target's bytes are intentionally exposed as-is
51//! so the caller can decide whether to interpret them. A symlink read as
52//! a "file" via `open_file_reader` returns the reparse point's
53//! `$DATA` (typically empty) rather than dereferencing the link.
54
55use std::collections::HashMap;
56use std::io::Read;
57
58use crate::Result;
59use crate::block::BlockDevice;
60
61pub mod attribute;
62pub mod attribute_list;
63pub mod boot;
64pub mod compression;
65pub mod format;
66pub mod index;
67pub mod logfile;
68pub mod mft;
69pub mod run_list;
70pub mod rw;
71pub mod secure;
72pub mod upcase_gen;
73pub mod writer;
74
75use attribute::{
76    ATTR_FLAG_COMPRESSED, ATTR_FLAG_ENCRYPTED, AttributeIter, AttributeKind, FileName,
77    StandardInformation, TYPE_ATTRIBUTE_LIST, TYPE_DATA, TYPE_FILE_NAME, TYPE_INDEX_ALLOCATION,
78    TYPE_INDEX_ROOT, TYPE_OBJECT_ID, TYPE_REPARSE_POINT, TYPE_SECURITY_DESCRIPTOR,
79    TYPE_STANDARD_INFORMATION,
80};
81use boot::BootSector;
82use index::IndexEntry;
83use run_list::Extent;
84use secure::UpcaseTable;
85
86/// Hard-coded MFT record numbers reserved by NTFS.
87pub const MFT_RECORD_MFT: u64 = 0;
88pub const MFT_RECORD_ROOT: u64 = 5;
89pub const MFT_RECORD_SECURE: u64 = 9;
90pub const MFT_RECORD_UPCASE: u64 = 10;
91
92/// Cap on the size of a single security descriptor we'll pull out of
93/// `$Secure:$SDS` and surface as `system.ntfs_security`. SDs are normally
94/// well under 1 KiB; the 64 KiB cap exists purely as a sanity check
95/// against malformed images.
96const MAX_SECURITY_DESCRIPTOR_BYTES: u64 = 64 * 1024;
97
98/// Absolute ceiling on any single heap allocation sized from an untrusted
99/// on-disk field, independent of device size. Caps pathological values on
100/// huge backing devices (e.g. a 4 GiB `real_size` on a 1 TiB image).
101const MAX_UNTRUSTED_ALLOC: u64 = 256 * 1024 * 1024;
102
103/// Validate that an allocation size derived from an untrusted on-disk field
104/// is plausible before we hand it to `Vec::with_capacity` / `vec![0; n]`.
105/// `want` is rejected if it exceeds either the device size or an absolute
106/// ceiling. Returns `want` as a `usize` on success.
107fn checked_alloc_len(want: u64, total: u64, what: &str) -> Result<usize> {
108    if want > total || want > MAX_UNTRUSTED_ALLOC {
109        return Err(crate::Error::InvalidImage(format!(
110            "ntfs: {what} size {want} exceeds device/allocation bounds"
111        )));
112    }
113    usize::try_from(want)
114        .map_err(|_| crate::Error::InvalidImage(format!("ntfs: {what} size {want} too large")))
115}
116
117/// Validate an attribute's `compression_unit` exponent and return the number
118/// of clusters per compression unit (`1 << compression_unit`). Real NTFS uses
119/// 4 (16-cluster units); we accept up to 16 and reject anything larger so the
120/// `1 << compression_unit` shift cannot overflow.
121fn validate_compression_unit(compression_unit: u8) -> Result<u64> {
122    if compression_unit > 16 {
123        return Err(crate::Error::InvalidImage(format!(
124            "ntfs: implausible compression_unit {compression_unit}"
125        )));
126    }
127    1u64.checked_shl(u32::from(compression_unit))
128        .ok_or_else(|| crate::Error::InvalidImage("ntfs: compression_unit shift overflow".into()))
129}
130
131pub fn probe(dev: &mut dyn BlockDevice) -> Result<bool> {
132    if dev.total_size() < 11 {
133        return Ok(false);
134    }
135    let mut head = [0u8; 11];
136    dev.read_at(0, &mut head)?;
137    Ok(&head[3..11] == boot::NTFS_OEM)
138}
139
140/// Convenience re-export of the boot-sector decoder for tests/inspect.
141pub use boot::BootSector as ExportedBootSector;
142
143pub struct Ntfs {
144    boot: BootSector,
145    /// Cached MFT run list: where to read MFT record N from. Empty before
146    /// `load_mft_runs` has been called.
147    mft_runs: Vec<Extent>,
148    /// Cached `$UpCase` table for case-insensitive directory lookups.
149    /// `None` means "haven't tried yet"; `Some(identity)` means we tried
150    /// and the image didn't expose one — names are compared exactly.
151    upcase: Option<UpcaseTable>,
152    /// Cache of decoded `$Secure:$SII` entries (security_id -> SDS slice).
153    /// `None` means "haven't tried yet". An empty `Some(_)` means we tried
154    /// and the image had no usable `$Secure`.
155    sii_cache: Option<HashMap<u32, (u64, u32)>>,
156    /// Writer state — populated only after `Ntfs::format` (or
157    /// `Ntfs::open_for_write`). Read-only opens leave this `None`.
158    writer: Option<writer::WriterState>,
159}
160
161impl Ntfs {
162    pub fn open(dev: &mut dyn BlockDevice) -> Result<Self> {
163        if dev.total_size() < 512 {
164            return Err(crate::Error::InvalidImage(
165                "ntfs: device too small to hold a boot sector".into(),
166            ));
167        }
168        let mut buf = [0u8; 512];
169        dev.read_at(0, &mut buf)?;
170        let boot = BootSector::decode(&buf).ok_or_else(|| {
171            crate::Error::InvalidImage(
172                "ntfs: boot sector OEM ID is not 'NTFS    ' or geometry is invalid".into(),
173            )
174        })?;
175        // Geometry passed `decode`'s self-consistency checks; now bound the
176        // derived sizes against the actual device so a valid-but-hostile BPB
177        // cannot force allocations larger than the image can possibly hold.
178        let total = dev.total_size();
179        let cluster_size = u64::from(boot.cluster_size());
180        if cluster_size > total {
181            return Err(crate::Error::InvalidImage(
182                "ntfs: cluster size exceeds device size".into(),
183            ));
184        }
185        // The MFT's first record must lie within the device.
186        let mft_byte = boot
187            .mft_lcn
188            .checked_mul(cluster_size)
189            .ok_or_else(|| crate::Error::InvalidImage("ntfs: $MFT LCN offset overflow".into()))?;
190        if mft_byte >= total {
191            return Err(crate::Error::InvalidImage(
192                "ntfs: $MFT LCN points past end of device".into(),
193            ));
194        }
195        Ok(Self {
196            boot,
197            mft_runs: Vec::new(),
198            upcase: None,
199            sii_cache: None,
200            writer: None,
201        })
202    }
203
204    pub fn total_bytes(&self) -> u64 {
205        self.boot.total_sectors * u64::from(self.boot.bytes_per_sector)
206    }
207
208    pub fn cluster_size(&self) -> u32 {
209        self.boot.cluster_size()
210    }
211
212    pub fn bytes_per_sector(&self) -> u16 {
213        self.boot.bytes_per_sector
214    }
215
216    pub fn sectors_per_cluster(&self) -> u8 {
217        self.boot.sectors_per_cluster
218    }
219
220    pub fn mft_record_size(&self) -> u32 {
221        self.boot.mft_record_size()
222    }
223
224    pub fn volume_serial(&self) -> u64 {
225        self.boot.volume_serial
226    }
227
228    pub fn boot_sector(&self) -> &BootSector {
229        &self.boot
230    }
231
232    /// Read MFT record N from disk into `out`. `out` must be at least
233    /// `mft_record_size` bytes; the function applies USA fixup before
234    /// returning. The first call lazily loads $MFT's own run list by
235    /// reading record 0 directly from `mft_lcn` (bootstrap).
236    pub fn read_mft_record(
237        &mut self,
238        dev: &mut dyn BlockDevice,
239        rec: u64,
240        out: &mut [u8],
241    ) -> Result<()> {
242        let rec_size = self.boot.mft_record_size() as usize;
243        if out.len() < rec_size {
244            return Err(crate::Error::InvalidArgument(
245                "ntfs: MFT scratch buffer too small".into(),
246            ));
247        }
248        let out = &mut out[..rec_size];
249
250        // Bootstrap: read record 0 from the BPB-anchored MFT LCN. From
251        // record 0 we extract $MFT's $DATA run list and cache it.
252        if self.mft_runs.is_empty() {
253            let base = self
254                .boot
255                .mft_lcn
256                .checked_mul(u64::from(self.boot.cluster_size()))
257                .ok_or_else(|| {
258                    crate::Error::InvalidImage("ntfs: $MFT LCN offset overflow".into())
259                })?;
260            // Record 0 is at the very start of the MFT — its index times
261            // record_size is zero, so the read offset is just `base`.
262            dev.read_at(base, out)?;
263            mft::apply_fixup(out, self.boot.bytes_per_sector as usize)?;
264            // Now decode record 0's attributes to find $DATA's run list.
265            let header = mft::RecordHeader::parse(out)?;
266            for attr_res in AttributeIter::new(out, header.first_attribute_offset as usize) {
267                let attr = attr_res?;
268                if attr.type_code == TYPE_DATA && attr.name.is_empty() {
269                    match attr.kind {
270                        AttributeKind::NonResident { runs, .. } => {
271                            self.mft_runs = runs;
272                        }
273                        AttributeKind::Resident { .. } => {
274                            return Err(crate::Error::InvalidImage(
275                                "ntfs: $MFT $DATA is resident — impossible".into(),
276                            ));
277                        }
278                    }
279                    break;
280                }
281            }
282            if self.mft_runs.is_empty() {
283                return Err(crate::Error::InvalidImage(
284                    "ntfs: could not locate $MFT $DATA run list in record 0".into(),
285                ));
286            }
287            if rec == 0 {
288                return Ok(()); // already loaded
289            }
290        }
291
292        // For all other records, map record `rec` through the MFT $DATA
293        // run list. `mft_runs` is in clusters; the record offset within
294        // the MFT (in bytes) is `rec * rec_size`.
295        let mft_byte_offset = rec
296            .checked_mul(rec_size as u64)
297            .ok_or_else(|| crate::Error::InvalidImage("ntfs: MFT offset overflow".into()))?;
298        let cluster_size = u64::from(self.boot.cluster_size());
299        let mut vcn_bytes: u64 = 0;
300        let mut found = false;
301        for ext in &self.mft_runs {
302            let ext_bytes = ext.length.checked_mul(cluster_size).ok_or_else(|| {
303                crate::Error::InvalidImage("ntfs: MFT extent span overflow".into())
304            })?;
305            let vcn_end = vcn_bytes.checked_add(ext_bytes).ok_or_else(|| {
306                crate::Error::InvalidImage("ntfs: MFT run-list offset overflow".into())
307            })?;
308            if mft_byte_offset < vcn_end {
309                let local = mft_byte_offset - vcn_bytes;
310                match ext.lcn {
311                    Some(lcn) => {
312                        let phys = lcn
313                            .checked_mul(cluster_size)
314                            .and_then(|b| b.checked_add(local))
315                            .ok_or_else(|| {
316                                crate::Error::InvalidImage(
317                                    "ntfs: MFT record byte offset overflow".into(),
318                                )
319                            })?;
320                        dev.read_at(phys, out)?;
321                    }
322                    None => {
323                        return Err(crate::Error::InvalidImage(
324                            "ntfs: requested MFT record sits in a sparse run".into(),
325                        ));
326                    }
327                }
328                found = true;
329                break;
330            }
331            vcn_bytes = vcn_end;
332        }
333        if !found {
334            return Err(crate::Error::InvalidImage(format!(
335                "ntfs: MFT record {rec} is past the end of $MFT"
336            )));
337        }
338        mft::apply_fixup(out, self.boot.bytes_per_sector as usize)?;
339        Ok(())
340    }
341
342    /// Read the base record `rec_no` plus, if it has an `$ATTRIBUTE_LIST`,
343    /// every extension record named in that list. Returns a vector of
344    /// `(record_number, record_bytes)` pairs ordered base-first.
345    fn load_record_set(
346        &mut self,
347        dev: &mut dyn BlockDevice,
348        rec_no: u64,
349    ) -> Result<Vec<(u64, Vec<u8>)>> {
350        let rec_size = self.boot.mft_record_size() as usize;
351        let mut base = vec![0u8; rec_size];
352        self.read_mft_record(dev, rec_no, &mut base)?;
353        let mut records: Vec<(u64, Vec<u8>)> = vec![(rec_no, base)];
354
355        // Look for $ATTRIBUTE_LIST in the base record.
356        let base_bytes = records[0].1.clone();
357        let hdr = mft::RecordHeader::parse(&base_bytes)?;
358        let mut alist_bytes: Option<Vec<u8>> = None;
359        for attr_res in AttributeIter::new(&base_bytes, hdr.first_attribute_offset as usize) {
360            let attr = attr_res?;
361            if attr.type_code != TYPE_ATTRIBUTE_LIST {
362                continue;
363            }
364            match attr.kind {
365                AttributeKind::Resident { value, .. } => {
366                    alist_bytes = Some(value.to_vec());
367                }
368                AttributeKind::NonResident {
369                    real_size, runs, ..
370                } => {
371                    // Non-resident $ATTRIBUTE_LIST: stream it cluster by
372                    // cluster through a dedicated reader. This is uncommon
373                    // (the list rarely overflows a record) but legal.
374                    let cap = checked_alloc_len(real_size, dev.total_size(), "$ATTRIBUTE_LIST")?;
375                    let mut reader = NonResidentReader {
376                        dev: &mut *dev,
377                        cluster_size: self.boot.cluster_size() as u64,
378                        runs,
379                        real_size,
380                        initialized_size: real_size,
381                        pos: 0,
382                        cluster_buf: vec![0u8; self.boot.cluster_size() as usize],
383                        cached_vcn: u64::MAX,
384                        cached_cluster_filled: false,
385                    };
386                    let mut buf = Vec::with_capacity(cap);
387                    reader.read_to_end(&mut buf).map_err(crate::Error::from)?;
388                    alist_bytes = Some(buf);
389                }
390            }
391            break;
392        }
393
394        let Some(alist_bytes) = alist_bytes else {
395            return Ok(records);
396        };
397        let entries = attribute_list::decode(&alist_bytes)?;
398        let mut seen = std::collections::HashSet::new();
399        seen.insert(rec_no);
400        for entry in entries {
401            let extension_rec = entry.record_number();
402            if extension_rec == rec_no {
403                // The list also names attributes that live in the base
404                // record — skip those.
405                continue;
406            }
407            if !seen.insert(extension_rec) {
408                continue;
409            }
410            let mut buf = vec![0u8; rec_size];
411            self.read_mft_record(dev, extension_rec, &mut buf)?;
412            records.push((extension_rec, buf));
413        }
414        Ok(records)
415    }
416
417    /// Walk a directory's index, returning the (file_ref, FileName) of each
418    /// entry. Skips DOS-namespace duplicates (those are covered by the
419    /// Win32+DOS combined entry that has both names).
420    pub fn read_directory(
421        &mut self,
422        dev: &mut dyn BlockDevice,
423        dir_rec: u64,
424    ) -> Result<Vec<IndexEntry>> {
425        // Flush any entries staged for this directory by the writer's
426        // batch cache so the on-disk `$I30` we are about to read reflects
427        // every child created so far (transparency for list / path
428        // lookups / remove).
429        if let Some(w) = self.writer.as_mut()
430            && let Some(entries) = w.dir_batch.take(&dir_rec)
431        {
432            self.serialize_dir(dev, dir_rec, &entries)?;
433        }
434        let records = self.load_record_set(dev, dir_rec)?;
435        let hdr = mft::RecordHeader::parse(&records[0].1)?;
436        if !hdr.is_in_use() {
437            return Err(crate::Error::InvalidImage(format!(
438                "ntfs: directory record {dir_rec} is not in use"
439            )));
440        }
441
442        // Locate $INDEX_ROOT (must be named "$I30") and optional
443        // $INDEX_ALLOCATION (same name) across the merged record set.
444        let mut root_value: Option<Vec<u8>> = None;
445        let mut alloc_runs: Option<Vec<Extent>> = None;
446        for (_rec, rec_buf) in &records {
447            let h = mft::RecordHeader::parse(rec_buf)?;
448            for attr_res in AttributeIter::new(rec_buf, h.first_attribute_offset as usize) {
449                let attr = attr_res?;
450                if attr.name != "$I30" {
451                    continue;
452                }
453                match (attr.type_code, attr.kind) {
454                    (TYPE_INDEX_ROOT, AttributeKind::Resident { value, .. }) => {
455                        root_value = Some(value.to_vec());
456                    }
457                    (TYPE_INDEX_ALLOCATION, AttributeKind::NonResident { runs, .. }) => {
458                        // Multiple $INDEX_ALLOCATION segments are appended
459                        // in starting_vcn order via load_record_set, so
460                        // just chain runs as we encounter them. NTFS only
461                        // splits these for very large directories.
462                        match alloc_runs.as_mut() {
463                            Some(existing) => existing.extend(runs),
464                            None => alloc_runs = Some(runs),
465                        }
466                    }
467                    _ => {}
468                }
469            }
470        }
471        let root_value = root_value.ok_or_else(|| {
472            crate::Error::InvalidImage(format!("ntfs: record {dir_rec} has no $INDEX_ROOT $I30"))
473        })?;
474        let root_hdr = index::IndexRootHeader::parse(&root_value)?;
475        let entries_start = root_hdr.header_offset + root_hdr.first_entry_offset as usize;
476        let entries_len = (root_hdr.bytes_in_use as usize).saturating_sub(16);
477
478        let mut out = Vec::new();
479        let mut visited_blocks = std::collections::HashSet::<u64>::new();
480        let root_children = index::walk_index_node(&root_value, entries_start, entries_len, |e| {
481            out.push(e.clone());
482        })?;
483
484        if let Some(runs) = alloc_runs {
485            let block_size = root_hdr.index_block_size as usize;
486            for vcn in root_children {
487                self.descend_index(dev, &runs, block_size, vcn, &mut out, &mut visited_blocks)?;
488            }
489        }
490
491        // Dedup DOS-namespace duplicates: NTFS stores a separate index
492        // entry for the DOS short name when the file has both Win32 and
493        // DOS names. The same `file_ref` shows up twice in that case;
494        // we drop the DOS-namespace one and keep the Win32 long name.
495        let mut seen_refs = std::collections::HashMap::<u64, usize>::new();
496        let mut filtered: Vec<IndexEntry> = Vec::new();
497        for entry in out.into_iter() {
498            let key = entry.file_ref;
499            let is_dos = entry
500                .file_name
501                .as_ref()
502                .map(|fn_| fn_.namespace == FileName::NAMESPACE_DOS)
503                .unwrap_or(false);
504            if is_dos && seen_refs.contains_key(&key) {
505                continue;
506            }
507            if let Some(&idx) = seen_refs.get(&key) {
508                // If we already kept a DOS entry but now get a Win32 one,
509                // replace it.
510                let prior_is_dos = filtered[idx]
511                    .file_name
512                    .as_ref()
513                    .map(|fn_| fn_.namespace == FileName::NAMESPACE_DOS)
514                    .unwrap_or(false);
515                if prior_is_dos && !is_dos {
516                    filtered[idx] = entry;
517                }
518                continue;
519            }
520            seen_refs.insert(key, filtered.len());
521            filtered.push(entry);
522        }
523        Ok(filtered)
524    }
525
526    fn descend_index(
527        &mut self,
528        dev: &mut dyn BlockDevice,
529        alloc_runs: &[Extent],
530        block_size: usize,
531        vcn: u64,
532        out: &mut Vec<IndexEntry>,
533        visited: &mut std::collections::HashSet<u64>,
534    ) -> Result<()> {
535        if !visited.insert(vcn) {
536            return Err(crate::Error::InvalidImage(
537                "ntfs: cycle in $INDEX_ALLOCATION tree".into(),
538            ));
539        }
540        let cluster_size = u64::from(self.boot.cluster_size());
541        let block_len = checked_alloc_len(block_size as u64, dev.total_size(), "index block")?;
542        let target_bytes = vcn.checked_mul(cluster_size).ok_or_else(|| {
543            crate::Error::InvalidImage("ntfs: index VCN byte offset overflow".into())
544        })?;
545        let mut walked: u64 = 0;
546        let mut block_buf = vec![0u8; block_len];
547        let mut found_offset: Option<u64> = None;
548        for ext in alloc_runs {
549            let span = ext.length.checked_mul(cluster_size).ok_or_else(|| {
550                crate::Error::InvalidImage("ntfs: index extent span overflow".into())
551            })?;
552            let walked_end = walked.checked_add(span).ok_or_else(|| {
553                crate::Error::InvalidImage("ntfs: index run-list offset overflow".into())
554            })?;
555            if target_bytes < walked_end {
556                let local = target_bytes - walked;
557                match ext.lcn {
558                    Some(lcn) => {
559                        found_offset = Some(
560                            lcn.checked_mul(cluster_size)
561                                .and_then(|b| b.checked_add(local))
562                                .ok_or_else(|| {
563                                    crate::Error::InvalidImage(
564                                        "ntfs: index LCN byte offset overflow".into(),
565                                    )
566                                })?,
567                        );
568                    }
569                    None => {
570                        return Err(crate::Error::InvalidImage(
571                            "ntfs: $INDEX_ALLOCATION points to a sparse VCN".into(),
572                        ));
573                    }
574                }
575                break;
576            }
577            walked = walked_end;
578        }
579        let phys = found_offset.ok_or_else(|| {
580            crate::Error::InvalidImage(format!("ntfs: index VCN {vcn} not in run list"))
581        })?;
582        dev.read_at(phys, &mut block_buf)?;
583        mft::apply_fixup(&mut block_buf, self.boot.bytes_per_sector as usize)?;
584        let blk_hdr = index::IndexBlockHeader::parse(&block_buf)?;
585        let entries_start = blk_hdr.entries_start();
586        let entries_len = blk_hdr.entries_byte_len();
587        let children = index::walk_index_node(&block_buf, entries_start, entries_len, |e| {
588            out.push(e.clone());
589        })?;
590        for child in children {
591            self.descend_index(dev, alloc_runs, block_size, child, out, visited)?;
592        }
593        Ok(())
594    }
595
596    /// Resolve a path to its MFT record number. Path components are matched
597    /// case-insensitively through the `$UpCase` table (when available). The
598    /// root path "/" maps to record 5.
599    pub fn lookup_path(&mut self, dev: &mut dyn BlockDevice, path: &str) -> Result<u64> {
600        if !path.starts_with('/') {
601            return Err(crate::Error::InvalidArgument(format!(
602                "ntfs: path must be absolute, got {path:?}"
603            )));
604        }
605        self.ensure_upcase(dev)?;
606        let mut current = MFT_RECORD_ROOT;
607        for component in path.split('/').filter(|s| !s.is_empty()) {
608            let entries = self.read_directory(dev, current)?;
609            let mut next: Option<u64> = None;
610            for entry in entries {
611                if let Some(fname) = entry.file_name {
612                    if fname.namespace == FileName::NAMESPACE_DOS {
613                        // DOS-namespace entries are covered by the matching
614                        // Win32 entry; skip to avoid double matching.
615                        continue;
616                    }
617                    let matches = match self.upcase.as_ref() {
618                        Some(t) => t.equals_ignore_case(&fname.name, component),
619                        None => fname.name == component,
620                    };
621                    if matches {
622                        next = Some(entry.file_ref & 0x0000_FFFF_FFFF_FFFF);
623                        break;
624                    }
625                }
626            }
627            current = next.ok_or_else(|| {
628                crate::Error::InvalidImage(format!("ntfs: path component {component:?} not found"))
629            })?;
630        }
631        Ok(current)
632    }
633
634    /// Public list-path API: walks `path`, returns directory entries.
635    pub fn list_path(
636        &mut self,
637        dev: &mut dyn BlockDevice,
638        path: &str,
639    ) -> Result<Vec<crate::fs::DirEntry>> {
640        let rec = self.lookup_path(dev, path)?;
641        let entries = self.read_directory(dev, rec)?;
642        // At the root, `$I30` indexes the canonical system files
643        // (`$MFT`, `$Volume`, `$Bitmap`, …, `$Extend`, the reserved
644        // slots 12..15). Hide them from the cross-FS view so the
645        // generic walker only sees user-visible entries — they're
646        // still present on disk for `ntfs-3g` / chkdsk to find.
647        let is_root = rec == MFT_RECORD_ROOT;
648        let mut out = Vec::with_capacity(entries.len());
649        for entry in entries {
650            if let Some(fname) = entry.file_name {
651                if is_root && fname.name.starts_with('$') {
652                    continue;
653                }
654                let kind = if fname.is_directory() {
655                    crate::fs::EntryKind::Dir
656                } else {
657                    crate::fs::EntryKind::Regular
658                };
659                // The MFT reference's upper 16 bits are the sequence
660                // number; the public inode field is a u32 so we truncate
661                // to the low 32 bits of the record number. Callers
662                // doing a real cross-FS map should use `lookup_path`.
663                let rec_no = (entry.file_ref & 0x0000_FFFF_FFFF_FFFF) as u32;
664                let size = if fname.is_directory() {
665                    0
666                } else {
667                    fname.real_size
668                };
669                out.push(crate::fs::DirEntry {
670                    name: fname.name,
671                    inode: rec_no,
672                    kind,
673                    size,
674                });
675            }
676        }
677        Ok(out)
678    }
679
680    /// Open the default unnamed $DATA stream of `path` as a streaming
681    /// reader. The reader pulls one cluster at a time through an
682    /// internal scratch buffer.
683    pub fn open_file_reader<'a>(
684        &'a mut self,
685        dev: &'a mut dyn BlockDevice,
686        path: &str,
687    ) -> Result<Box<dyn Read + 'a>> {
688        let rec_no = self.lookup_path(dev, path)?;
689        self.open_stream_by_record(dev, rec_no, "")
690    }
691
692    /// Open a named stream by MFT record + name. `""` means the default
693    /// unnamed $DATA. Used by both `open_file_reader` and ADS extraction.
694    ///
695    /// Honours `$ATTRIBUTE_LIST` spill: if the stream's runs are split
696    /// across multiple MFT records, all segments are gathered and chained
697    /// by `starting_vcn` before the reader is constructed.
698    ///
699    /// Compressed `$DATA` (LZNT1) is decoded on the fly, one 16-cluster
700    /// "compression unit" at a time. Encrypted `$DATA` (EFS) is refused
701    /// with [`crate::Error::Unsupported`].
702    pub fn open_stream_by_record<'a>(
703        &'a mut self,
704        dev: &'a mut dyn BlockDevice,
705        rec_no: u64,
706        stream_name: &str,
707    ) -> Result<Box<dyn Read + 'a>> {
708        let records = self.load_record_set(dev, rec_no)?;
709        let hdr = mft::RecordHeader::parse(&records[0].1)?;
710        if !hdr.is_in_use() {
711            return Err(crate::Error::InvalidImage(format!(
712                "ntfs: record {rec_no} is not in use"
713            )));
714        }
715
716        // Gather every $DATA segment matching the requested name across
717        // all records in the set. For non-resident attributes we'll merge
718        // their run lists; for resident ones we expect exactly one match.
719        let mut resident_bytes: Option<Vec<u8>> = None;
720        // (starting_vcn, last_vcn, allocated, real, initialized, comp_unit, runs)
721        type Segment = (u64, u64, u64, u64, u64, u8, Vec<Extent>);
722        let mut segments: Vec<Segment> = Vec::new();
723        let mut is_encrypted = false;
724        let mut is_compressed = false;
725        for (_rec, rec_buf) in &records {
726            let h = mft::RecordHeader::parse(rec_buf)?;
727            for attr_res in AttributeIter::new(rec_buf, h.first_attribute_offset as usize) {
728                let attr = attr_res?;
729                if attr.type_code != TYPE_DATA {
730                    continue;
731                }
732                if attr.name != stream_name {
733                    continue;
734                }
735                if attr.flags & ATTR_FLAG_ENCRYPTED != 0 {
736                    is_encrypted = true;
737                }
738                if attr.flags & ATTR_FLAG_COMPRESSED != 0 {
739                    is_compressed = true;
740                }
741                match attr.kind {
742                    AttributeKind::Resident { value, .. } => {
743                        resident_bytes = Some(value.to_vec());
744                    }
745                    AttributeKind::NonResident {
746                        starting_vcn,
747                        last_vcn,
748                        allocated_size,
749                        real_size,
750                        initialized_size,
751                        compression_unit,
752                        runs,
753                    } => {
754                        segments.push((
755                            starting_vcn,
756                            last_vcn,
757                            allocated_size,
758                            real_size,
759                            initialized_size,
760                            compression_unit,
761                            runs,
762                        ));
763                    }
764                }
765            }
766        }
767
768        if is_encrypted {
769            return Err(crate::Error::Unsupported(
770                "ntfs: encrypted $DATA (EFS) is not supported".into(),
771            ));
772        }
773
774        if let Some(bytes) = resident_bytes {
775            return Ok(Box::new(ResidentReader { bytes, pos: 0 }));
776        }
777
778        if segments.is_empty() {
779            return Err(crate::Error::InvalidImage(format!(
780                "ntfs: stream {stream_name:?} not found on record {rec_no}"
781            )));
782        }
783
784        // Sort and merge segments by starting_vcn. The first segment's
785        // header carries the canonical real_size / initialized_size /
786        // compression_unit; later segments only contribute runs.
787        segments.sort_by_key(|s| s.0);
788        let real_size = segments[0].3;
789        let initialized_size = segments[0].4;
790        let compression_unit = segments[0].5;
791        let mut runs: Vec<Extent> = Vec::new();
792        for seg in &segments {
793            runs.extend(seg.6.iter().copied());
794        }
795
796        let cluster_size = self.boot.cluster_size() as u64;
797        if is_compressed && compression_unit > 0 {
798            let cu_clusters = validate_compression_unit(compression_unit)?;
799            // Bound the decode buffers (2 * cu_size) against the device.
800            let cu_size = cluster_size.checked_mul(cu_clusters).ok_or_else(|| {
801                crate::Error::InvalidImage("ntfs: compression-unit size overflow".into())
802            })?;
803            checked_alloc_len(cu_size, dev.total_size(), "compression unit")?;
804            return Ok(Box::new(CompressedReader::new(
805                dev,
806                cluster_size,
807                cu_clusters,
808                runs,
809                real_size,
810                initialized_size,
811            )));
812        }
813
814        Ok(Box::new(NonResidentReader {
815            dev,
816            cluster_size,
817            runs,
818            real_size,
819            initialized_size,
820            pos: 0,
821            cluster_buf: vec![0u8; cluster_size as usize],
822            cached_vcn: u64::MAX,
823            cached_cluster_filled: false,
824        }))
825    }
826
827    /// Open the default unnamed $DATA stream of `path` as a seekable
828    /// reader. Backs [`crate::fs::Filesystem::open_file_ro`]. The
829    /// returned reader is one of resident / non-resident / compressed
830    /// depending on how the stream is stored; all three implement
831    /// `Read + Seek + FileReadHandle`.
832    pub fn open_file_seekable<'a>(
833        &'a mut self,
834        dev: &'a mut dyn BlockDevice,
835        path: &str,
836    ) -> Result<NtfsSeekableReader<'a>> {
837        let rec_no = self.lookup_path(dev, path)?;
838        let records = self.load_record_set(dev, rec_no)?;
839        let hdr = mft::RecordHeader::parse(&records[0].1)?;
840        if !hdr.is_in_use() {
841            return Err(crate::Error::InvalidImage(format!(
842                "ntfs: record {rec_no} is not in use"
843            )));
844        }
845        if hdr.is_directory() {
846            return Err(crate::Error::InvalidArgument(format!(
847                "ntfs: {path:?} is a directory"
848            )));
849        }
850
851        let stream_name = "";
852        let mut resident_bytes: Option<Vec<u8>> = None;
853        type Segment = (u64, u64, u64, u64, u64, u8, Vec<Extent>);
854        let mut segments: Vec<Segment> = Vec::new();
855        let mut is_encrypted = false;
856        let mut is_compressed = false;
857        for (_rec, rec_buf) in &records {
858            let h = mft::RecordHeader::parse(rec_buf)?;
859            for attr_res in AttributeIter::new(rec_buf, h.first_attribute_offset as usize) {
860                let attr = attr_res?;
861                if attr.type_code != TYPE_DATA {
862                    continue;
863                }
864                if attr.name != stream_name {
865                    continue;
866                }
867                if attr.flags & ATTR_FLAG_ENCRYPTED != 0 {
868                    is_encrypted = true;
869                }
870                if attr.flags & ATTR_FLAG_COMPRESSED != 0 {
871                    is_compressed = true;
872                }
873                match attr.kind {
874                    AttributeKind::Resident { value, .. } => {
875                        resident_bytes = Some(value.to_vec());
876                    }
877                    AttributeKind::NonResident {
878                        starting_vcn,
879                        last_vcn,
880                        allocated_size,
881                        real_size,
882                        initialized_size,
883                        compression_unit,
884                        runs,
885                    } => {
886                        segments.push((
887                            starting_vcn,
888                            last_vcn,
889                            allocated_size,
890                            real_size,
891                            initialized_size,
892                            compression_unit,
893                            runs,
894                        ));
895                    }
896                }
897            }
898        }
899        if is_encrypted {
900            return Err(crate::Error::Unsupported(
901                "ntfs: encrypted $DATA (EFS) is not supported".into(),
902            ));
903        }
904        if let Some(bytes) = resident_bytes {
905            return Ok(NtfsSeekableReader::Resident(ResidentReader {
906                bytes,
907                pos: 0,
908            }));
909        }
910        if segments.is_empty() {
911            return Err(crate::Error::InvalidImage(format!(
912                "ntfs: stream {stream_name:?} not found on record {rec_no}"
913            )));
914        }
915        segments.sort_by_key(|s| s.0);
916        let real_size = segments[0].3;
917        let initialized_size = segments[0].4;
918        let compression_unit = segments[0].5;
919        let mut runs: Vec<Extent> = Vec::new();
920        for seg in &segments {
921            runs.extend(seg.6.iter().copied());
922        }
923        let cluster_size = self.boot.cluster_size() as u64;
924        if is_compressed && compression_unit > 0 {
925            let cu_clusters = validate_compression_unit(compression_unit)?;
926            let cu_size = cluster_size.checked_mul(cu_clusters).ok_or_else(|| {
927                crate::Error::InvalidImage("ntfs: compression-unit size overflow".into())
928            })?;
929            checked_alloc_len(cu_size, dev.total_size(), "compression unit")?;
930            return Ok(NtfsSeekableReader::Compressed(CompressedReader::new(
931                dev,
932                cluster_size,
933                cu_clusters,
934                runs,
935                real_size,
936                initialized_size,
937            )));
938        }
939        Ok(NtfsSeekableReader::NonResident(NonResidentReader {
940            dev,
941            cluster_size,
942            runs,
943            real_size,
944            initialized_size,
945            pos: 0,
946            cluster_buf: vec![0u8; cluster_size as usize],
947            cached_vcn: u64::MAX,
948            cached_cluster_filled: false,
949        }))
950    }
951
952    /// Collect cross-FS xattr metadata for `path` using the
953    /// `xattr_keys` mapping. Note: streams (ADS) bigger than memory
954    /// would be a problem; we cap them at 1 MiB and surface
955    /// `Unsupported` if exceeded.
956    pub fn read_xattrs(
957        &mut self,
958        dev: &mut dyn BlockDevice,
959        path: &str,
960    ) -> Result<HashMap<String, Vec<u8>>> {
961        let rec_no = self.lookup_path(dev, path)?;
962        let records = self.load_record_set(dev, rec_no)?;
963
964        let mut out: HashMap<String, Vec<u8>> = HashMap::new();
965        let mut ads_names: Vec<String> = Vec::new();
966        let mut win32_short_name: Option<Vec<u8>> = None;
967        let mut security_id: Option<u32> = None;
968        let mut have_inline_security = false;
969        for (_rec, rec_buf) in &records {
970            let h = mft::RecordHeader::parse(rec_buf)?;
971            for attr_res in AttributeIter::new(rec_buf, h.first_attribute_offset as usize) {
972                let attr = attr_res?;
973                match attr.type_code {
974                    TYPE_STANDARD_INFORMATION => {
975                        if let AttributeKind::Resident { value, .. } = attr.kind {
976                            let si = StandardInformation::parse(value)?;
977                            out.insert(
978                                xattr_keys::DOS_ATTRS.into(),
979                                si.file_attributes.to_le_bytes().to_vec(),
980                            );
981                            out.insert(xattr_keys::TIMES_RAW.into(), si.times_raw().to_vec());
982                            // NTFS >= 3.0 extended $STANDARD_INFORMATION
983                            // adds owner_id / security_id / quota / USN
984                            // starting at offset 0x30. security_id is at
985                            // 0x34 — a non-zero value points at $Secure:$SII
986                            // for the shared SD.
987                            if value.len() >= 0x38 {
988                                let id = u32::from_le_bytes(value[0x34..0x38].try_into().unwrap());
989                                if id != 0 {
990                                    security_id = Some(id);
991                                }
992                            }
993                        }
994                    }
995                    TYPE_FILE_NAME => {
996                        if let AttributeKind::Resident { value, .. } = attr.kind {
997                            let fname = FileName::parse(value)?;
998                            if fname.namespace == FileName::NAMESPACE_DOS
999                                || fname.namespace == FileName::NAMESPACE_WIN32_DOS
1000                            {
1001                                let raw_utf16: Vec<u8> = fname
1002                                    .name
1003                                    .encode_utf16()
1004                                    .flat_map(|u| u.to_le_bytes())
1005                                    .collect();
1006                                win32_short_name = Some(raw_utf16);
1007                            }
1008                        }
1009                    }
1010                    TYPE_OBJECT_ID => {
1011                        if let AttributeKind::Resident { value, .. } = attr.kind {
1012                            // First 16 bytes are the GUID.
1013                            let take = value.len().min(16);
1014                            out.insert(xattr_keys::OBJECT_ID.into(), value[..take].to_vec());
1015                        }
1016                    }
1017                    TYPE_SECURITY_DESCRIPTOR => {
1018                        if let AttributeKind::Resident { value, .. } = attr.kind {
1019                            out.insert(xattr_keys::SECURITY.into(), value.to_vec());
1020                            have_inline_security = true;
1021                        }
1022                        // Non-resident inline SDs are unusual; $Secure
1023                        // handles the common shared-SD case below.
1024                    }
1025                    TYPE_REPARSE_POINT => {
1026                        if let AttributeKind::Resident { value, .. } = attr.kind {
1027                            out.insert(xattr_keys::REPARSE.into(), value.to_vec());
1028                        }
1029                    }
1030                    TYPE_DATA if !attr.name.is_empty() && !ads_names.contains(&attr.name) => {
1031                        ads_names.push(attr.name.clone());
1032                    }
1033                    _ => {}
1034                }
1035            }
1036        }
1037        if let Some(name) = win32_short_name {
1038            out.insert(xattr_keys::SHORT_NAME.into(), name);
1039        }
1040
1041        // Resolve shared security descriptor via $Secure if applicable.
1042        if !have_inline_security
1043            && let Some(id) = security_id
1044            && let Some(sd) = self.resolve_security_descriptor(dev, id)?
1045        {
1046            out.insert(xattr_keys::SECURITY.into(), sd);
1047        }
1048
1049        // Pull each ADS payload through the streaming reader, with a
1050        // 1 MiB safety cap.
1051        for name in ads_names {
1052            let key = format!("{}{}", xattr_keys::ADS_PREFIX, name);
1053            let mut reader = self.open_stream_by_record(dev, rec_no, &name)?;
1054            let mut buf = Vec::new();
1055            let mut chunk = [0u8; 8192];
1056            loop {
1057                let n = reader.read(&mut chunk).map_err(crate::Error::from)?;
1058                if n == 0 {
1059                    break;
1060                }
1061                if buf.len() + n > 1024 * 1024 {
1062                    return Err(crate::Error::Unsupported(format!(
1063                        "ntfs: ADS {name:?} exceeds 1 MiB cap for xattr passthrough"
1064                    )));
1065                }
1066                buf.extend_from_slice(&chunk[..n]);
1067            }
1068            out.insert(key, buf);
1069        }
1070
1071        Ok(out)
1072    }
1073
1074    /// Lazily load `$UpCase` (record 10) into the `Ntfs::upcase` cache.
1075    /// Failure to read it falls back to an identity table — synthetic test
1076    /// images may not carry `$UpCase`, and case-sensitive comparison is
1077    /// the safe degraded behaviour. Any error encountered is silently
1078    /// swallowed in favour of the identity table.
1079    fn ensure_upcase(&mut self, dev: &mut dyn BlockDevice) -> Result<()> {
1080        if self.upcase.is_some() {
1081            return Ok(());
1082        }
1083        // Attempt to open record 10's default $DATA stream.
1084        match self.read_metadata_stream(dev, MFT_RECORD_UPCASE, "", 256 * 1024) {
1085            Ok(bytes) => {
1086                self.upcase = Some(UpcaseTable::from_bytes(&bytes));
1087            }
1088            Err(_) => {
1089                self.upcase = Some(UpcaseTable::identity());
1090            }
1091        }
1092        Ok(())
1093    }
1094
1095    /// Read up to `cap` bytes of `(rec_no, stream_name)` into a `Vec<u8>`.
1096    /// Used for `$UpCase` and `$Secure:$SDS`. Bypasses path lookup (so it
1097    /// doesn't recurse into upcase / the security file itself).
1098    fn read_metadata_stream(
1099        &mut self,
1100        dev: &mut dyn BlockDevice,
1101        rec_no: u64,
1102        stream_name: &str,
1103        cap: usize,
1104    ) -> Result<Vec<u8>> {
1105        let mut reader = self.open_stream_by_record(dev, rec_no, stream_name)?;
1106        let mut out = Vec::new();
1107        let mut tmp = [0u8; 8192];
1108        loop {
1109            let n = reader.read(&mut tmp).map_err(crate::Error::from)?;
1110            if n == 0 {
1111                break;
1112            }
1113            if out.len() + n > cap {
1114                let take = cap - out.len();
1115                out.extend_from_slice(&tmp[..take]);
1116                break;
1117            }
1118            out.extend_from_slice(&tmp[..n]);
1119        }
1120        Ok(out)
1121    }
1122
1123    /// Resolve a `security_id` (from `$STANDARD_INFORMATION`) to its raw
1124    /// self-relative `SECURITY_DESCRIPTOR` bytes. The id is keyed through
1125    /// `$Secure:$SII`, which points at an offset + size within
1126    /// `$Secure:$SDS`. The SDS entry is prefixed with a 20-byte header
1127    /// (hash, id, offset, size) followed by the SD payload.
1128    ///
1129    /// Returns `Ok(None)` if `$Secure` is missing / unreadable, or if the
1130    /// id doesn't match any SII entry. Returns `Ok(Some(_))` with the SD
1131    /// bytes only, capped at `MAX_SECURITY_DESCRIPTOR_BYTES`.
1132    fn resolve_security_descriptor(
1133        &mut self,
1134        dev: &mut dyn BlockDevice,
1135        security_id: u32,
1136    ) -> Result<Option<Vec<u8>>> {
1137        // Build / reuse the $SII cache.
1138        if self.sii_cache.is_none() {
1139            let cache = self.build_sii_cache(dev).unwrap_or_default();
1140            self.sii_cache = Some(cache);
1141        }
1142        let cache = self.sii_cache.as_ref().expect("sii_cache populated");
1143        let Some(&(offset, size)) = cache.get(&security_id) else {
1144            return Ok(None);
1145        };
1146        if size as u64 > MAX_SECURITY_DESCRIPTOR_BYTES {
1147            return Ok(None);
1148        }
1149
1150        // Read `size` bytes from $Secure:$SDS starting at `offset`.
1151        let mut reader = self.open_stream_by_record(dev, MFT_RECORD_SECURE, "$SDS")?;
1152        // Skip to `offset`.
1153        let mut skipped: u64 = 0;
1154        let mut sink = [0u8; 8192];
1155        while skipped < offset {
1156            let want = (offset - skipped).min(sink.len() as u64) as usize;
1157            let n = reader.read(&mut sink[..want]).map_err(crate::Error::from)?;
1158            if n == 0 {
1159                return Ok(None);
1160            }
1161            skipped += n as u64;
1162        }
1163        let mut blob = vec![0u8; size as usize];
1164        let mut filled = 0;
1165        while filled < blob.len() {
1166            let n = reader
1167                .read(&mut blob[filled..])
1168                .map_err(crate::Error::from)?;
1169            if n == 0 {
1170                blob.truncate(filled);
1171                break;
1172            }
1173            filled += n;
1174        }
1175        if blob.len() < 0x14 {
1176            return Ok(None);
1177        }
1178        // First 20 bytes are the SDS-entry header; the SD payload follows.
1179        let entry_size = u32::from_le_bytes(blob[16..20].try_into().unwrap()) as usize;
1180        if entry_size <= 0x14 || entry_size > blob.len() {
1181            return Ok(None);
1182        }
1183        let sd = blob[0x14..entry_size].to_vec();
1184        Ok(Some(sd))
1185    }
1186
1187    /// Walk `$Secure:$SII` (an `$INDEX_ROOT` + optional `$INDEX_ALLOCATION`
1188    /// keyed by security_id) and return a `security_id -> (offset, size)`
1189    /// map into `$SDS`.
1190    fn build_sii_cache(&mut self, dev: &mut dyn BlockDevice) -> Result<HashMap<u32, (u64, u32)>> {
1191        let records = self.load_record_set(dev, MFT_RECORD_SECURE)?;
1192        let mut root_value: Option<Vec<u8>> = None;
1193        let mut alloc_runs: Option<Vec<Extent>> = None;
1194        let mut index_block_size: u32 = 0;
1195        for (_rec, rec_buf) in &records {
1196            let h = mft::RecordHeader::parse(rec_buf)?;
1197            for attr_res in AttributeIter::new(rec_buf, h.first_attribute_offset as usize) {
1198                let attr = attr_res?;
1199                if attr.name != "$SII" {
1200                    continue;
1201                }
1202                match (attr.type_code, attr.kind) {
1203                    (TYPE_INDEX_ROOT, AttributeKind::Resident { value, .. }) => {
1204                        let hdr = index::IndexRootHeader::parse(value)?;
1205                        index_block_size = hdr.index_block_size;
1206                        root_value = Some(value.to_vec());
1207                    }
1208                    (TYPE_INDEX_ALLOCATION, AttributeKind::NonResident { runs, .. }) => {
1209                        match alloc_runs.as_mut() {
1210                            Some(existing) => existing.extend(runs),
1211                            None => alloc_runs = Some(runs),
1212                        }
1213                    }
1214                    _ => {}
1215                }
1216            }
1217        }
1218        let Some(root_value) = root_value else {
1219            return Ok(HashMap::new());
1220        };
1221        let root_hdr = index::IndexRootHeader::parse(&root_value)?;
1222        let entries_start = root_hdr.header_offset + root_hdr.first_entry_offset as usize;
1223        let entries_len = (root_hdr.bytes_in_use as usize).saturating_sub(16);
1224        let mut cache = HashMap::new();
1225        // Guard the slice: a malicious $SII root can set first_entry_offset
1226        // past the end of the resident value. Mirror index::walk_index_node.
1227        if entries_start > root_value.len() {
1228            return Err(crate::Error::InvalidImage(
1229                "ntfs: $SII root entries start past end of value".into(),
1230            ));
1231        }
1232        let avail = root_value.len() - entries_start;
1233        let entry_buf = &root_value[entries_start..entries_start + entries_len.min(avail)];
1234        for e in secure::walk_sii_node(entry_buf)? {
1235            cache.insert(e.security_id, (e.sds_offset, e.sds_size));
1236        }
1237
1238        // Walk allocation blocks if any. Each INDX block carries the same
1239        // entry stream layout we just decoded for the root.
1240        if let Some(runs) = alloc_runs
1241            && index_block_size > 0
1242        {
1243            let cluster_size = u64::from(self.boot.cluster_size());
1244            // Bound the per-block allocation against the device. This walk is
1245            // best-effort (caller falls back to an empty cache), so an
1246            // oversized block size aborts the walk rather than erroring.
1247            let block_size =
1248                checked_alloc_len(u64::from(index_block_size), dev.total_size(), "$SII block")?;
1249            let mut visited = std::collections::HashSet::<u64>::new();
1250            // Iterate every block in the run list rather than tree-
1251            // descending — $SII isn't deep in practice and a flat
1252            // scan keeps the cache builder simple.
1253            let mut walked: u64 = 0;
1254            for ext in &runs {
1255                let Some(span) = ext.length.checked_mul(cluster_size) else {
1256                    break;
1257                };
1258                if let Some(lcn) = ext.lcn {
1259                    let mut local: u64 = 0;
1260                    while local < span {
1261                        let Some(phys) = lcn
1262                            .checked_mul(cluster_size)
1263                            .and_then(|b| b.checked_add(local))
1264                        else {
1265                            break;
1266                        };
1267                        if visited.insert(phys) {
1268                            let mut blk = vec![0u8; block_size];
1269                            if dev.read_at(phys, &mut blk).is_ok()
1270                                && mft::apply_fixup(&mut blk, self.boot.bytes_per_sector as usize)
1271                                    .is_ok()
1272                                && let Ok(blk_hdr) = index::IndexBlockHeader::parse(&blk)
1273                            {
1274                                let s = blk_hdr.entries_start();
1275                                let l = blk_hdr.entries_byte_len();
1276                                if s + l <= blk.len() {
1277                                    let entries = &blk[s..s + l];
1278                                    if let Ok(rows) = secure::walk_sii_node(entries) {
1279                                        for r in rows {
1280                                            cache.insert(r.security_id, (r.sds_offset, r.sds_size));
1281                                        }
1282                                    }
1283                                }
1284                            }
1285                        }
1286                        local += block_size as u64;
1287                    }
1288                }
1289                walked = walked.saturating_add(span);
1290            }
1291            let _ = walked;
1292        }
1293        Ok(cache)
1294    }
1295}
1296
1297/// Streaming reader over a resident $DATA value (whole payload already in
1298/// the MFT record).
1299pub struct ResidentReader {
1300    bytes: Vec<u8>,
1301    pos: usize,
1302}
1303
1304impl Read for ResidentReader {
1305    fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
1306        let n = (self.bytes.len() - self.pos).min(out.len());
1307        out[..n].copy_from_slice(&self.bytes[self.pos..self.pos + n]);
1308        self.pos += n;
1309        Ok(n)
1310    }
1311}
1312
1313impl std::io::Seek for ResidentReader {
1314    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
1315        let total = self.bytes.len() as i128;
1316        let new = match pos {
1317            std::io::SeekFrom::Start(n) => n as i128,
1318            std::io::SeekFrom::Current(d) => self.pos as i128 + d as i128,
1319            std::io::SeekFrom::End(d) => total + d as i128,
1320        };
1321        if new < 0 {
1322            return Err(std::io::Error::new(
1323                std::io::ErrorKind::InvalidInput,
1324                "ntfs: seek to negative offset",
1325            ));
1326        }
1327        self.pos = new as usize;
1328        Ok(self.pos as u64)
1329    }
1330}
1331
1332/// Streaming reader over a non-resident $DATA stream. Reads at most one
1333/// cluster from disk at a time into `cluster_buf`. Bytes past
1334/// `initialized_size` (but before `real_size`) read as zero — that's the
1335/// NTFS "valid data length" semantics.
1336pub struct NonResidentReader<'a> {
1337    dev: &'a mut dyn BlockDevice,
1338    cluster_size: u64,
1339    runs: Vec<Extent>,
1340    real_size: u64,
1341    initialized_size: u64,
1342    pos: u64,
1343    cluster_buf: Vec<u8>,
1344    cached_vcn: u64,
1345    cached_cluster_filled: bool,
1346}
1347
1348impl<'a> NonResidentReader<'a> {
1349    /// Find the physical byte offset of VCN `vcn`. Returns `None` for
1350    /// sparse extents.
1351    fn map_vcn(&self, vcn: u64) -> std::io::Result<Option<u64>> {
1352        let mut walked: u64 = 0;
1353        for ext in &self.runs {
1354            let walked_end = walked
1355                .checked_add(ext.length)
1356                .ok_or_else(|| std::io::Error::other("ntfs: run-list VCN length overflow"))?;
1357            if vcn < walked_end {
1358                let local = vcn - walked;
1359                return match ext.lcn {
1360                    Some(lcn) => lcn
1361                        .checked_add(local)
1362                        .and_then(|c| c.checked_mul(self.cluster_size))
1363                        .map(Some)
1364                        .ok_or_else(|| std::io::Error::other("ntfs: VCN byte offset overflow")),
1365                    None => Ok(None),
1366                };
1367            }
1368            walked = walked_end;
1369        }
1370        Err(std::io::Error::new(
1371            std::io::ErrorKind::UnexpectedEof,
1372            format!("ntfs: VCN {vcn} past end of run list"),
1373        ))
1374    }
1375}
1376
1377impl<'a> std::io::Seek for NonResidentReader<'a> {
1378    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
1379        let total = self.real_size as i128;
1380        let new = match pos {
1381            std::io::SeekFrom::Start(n) => n as i128,
1382            std::io::SeekFrom::Current(d) => self.pos as i128 + d as i128,
1383            std::io::SeekFrom::End(d) => total + d as i128,
1384        };
1385        if new < 0 {
1386            return Err(std::io::Error::new(
1387                std::io::ErrorKind::InvalidInput,
1388                "ntfs: seek to negative offset",
1389            ));
1390        }
1391        self.pos = new as u64;
1392        Ok(self.pos)
1393    }
1394}
1395
1396impl<'a> Read for NonResidentReader<'a> {
1397    fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
1398        if self.pos >= self.real_size {
1399            return Ok(0);
1400        }
1401        let cs = self.cluster_size;
1402        let vcn = self.pos / cs;
1403        let off = (self.pos % cs) as usize;
1404        if !self.cached_cluster_filled || self.cached_vcn != vcn {
1405            let phys = self.map_vcn(vcn)?;
1406            match phys {
1407                Some(p) => {
1408                    self.dev
1409                        .read_at(p, &mut self.cluster_buf)
1410                        .map_err(std::io::Error::other)?;
1411                }
1412                None => {
1413                    // Sparse: read as zero.
1414                    self.cluster_buf.fill(0);
1415                }
1416            }
1417            self.cached_vcn = vcn;
1418            self.cached_cluster_filled = true;
1419        }
1420        let remaining_file = self.real_size - self.pos;
1421        let n = ((cs - off as u64) as usize)
1422            .min(out.len())
1423            .min(remaining_file as usize);
1424        // Zero-fill the tail past initialized_size.
1425        if self.pos + n as u64 <= self.initialized_size {
1426            out[..n].copy_from_slice(&self.cluster_buf[off..off + n]);
1427        } else if self.pos >= self.initialized_size {
1428            out[..n].fill(0);
1429        } else {
1430            let copy_n = (self.initialized_size - self.pos) as usize;
1431            out[..copy_n].copy_from_slice(&self.cluster_buf[off..off + copy_n]);
1432            out[copy_n..n].fill(0);
1433        }
1434        self.pos += n as u64;
1435        Ok(n)
1436    }
1437}
1438
1439/// Streaming reader over an LZNT1-compressed non-resident `$DATA` stream.
1440///
1441/// NTFS groups clusters into "compression units" of `1 << compression_unit`
1442/// clusters each (16 at the canonical 4 KiB-cluster / `compression_unit=4`
1443/// case). One unit on disk is one of:
1444///
1445/// * **All-zero** — the unit's run list slice is wholly sparse. We yield
1446///   `cu_size` bytes of zero.
1447/// * **Stored** — exactly `cu_clusters` clusters of real data with no
1448///   sparse tail; the unit is held verbatim. We pass those bytes through.
1449/// * **Compressed** — fewer than `cu_clusters` clusters of data followed
1450///   by sparse tail (NTFS deallocates the saved tail). We LZNT1-decode
1451///   the real prefix into a `cu_size`-byte buffer.
1452///
1453/// The reader keeps one decoded compression unit cached so reads inside
1454/// the same unit are zero-cost after the initial fetch.
1455pub struct CompressedReader<'a> {
1456    dev: &'a mut dyn BlockDevice,
1457    cluster_size: u64,
1458    cu_clusters: u64,
1459    cu_size: u64,
1460    runs: Vec<Extent>,
1461    real_size: u64,
1462    initialized_size: u64,
1463    pos: u64,
1464    /// Scratch buffer for one CU's compressed-on-disk bytes (up to
1465    /// `cu_size`).
1466    src_buf: Vec<u8>,
1467    /// Decoded CU contents. Always exactly `cu_size` bytes long.
1468    out_buf: Vec<u8>,
1469    /// Which compression unit (counted in CUs from the start of the
1470    /// attribute) is currently materialized in `out_buf`, or `u64::MAX`
1471    /// if none.
1472    cached_cu_index: u64,
1473}
1474
1475impl<'a> CompressedReader<'a> {
1476    fn new(
1477        dev: &'a mut dyn BlockDevice,
1478        cluster_size: u64,
1479        cu_clusters: u64,
1480        runs: Vec<Extent>,
1481        real_size: u64,
1482        initialized_size: u64,
1483    ) -> Self {
1484        let cu_size = cluster_size * cu_clusters;
1485        Self {
1486            dev,
1487            cluster_size,
1488            cu_clusters,
1489            cu_size,
1490            runs,
1491            real_size,
1492            initialized_size,
1493            pos: 0,
1494            src_buf: vec![0u8; cu_size as usize],
1495            out_buf: vec![0u8; cu_size as usize],
1496            cached_cu_index: u64::MAX,
1497        }
1498    }
1499
1500    /// Resolve the `i`th run-list cluster (counted as VCN) to its on-disk
1501    /// (lcn, length-remaining-in-run) tuple, or `None` for sparse.
1502    fn map_vcn(&self, vcn: u64) -> std::io::Result<Option<u64>> {
1503        let mut walked: u64 = 0;
1504        for ext in &self.runs {
1505            let walked_end = walked
1506                .checked_add(ext.length)
1507                .ok_or_else(|| std::io::Error::other("ntfs: run-list VCN length overflow"))?;
1508            if vcn < walked_end {
1509                let local = vcn - walked;
1510                return match ext.lcn {
1511                    Some(lcn) => lcn
1512                        .checked_add(local)
1513                        .and_then(|c| c.checked_mul(self.cluster_size))
1514                        .map(Some)
1515                        .ok_or_else(|| std::io::Error::other("ntfs: VCN byte offset overflow")),
1516                    None => Ok(None),
1517                };
1518            }
1519            walked = walked_end;
1520        }
1521        Err(std::io::Error::new(
1522            std::io::ErrorKind::UnexpectedEof,
1523            format!("ntfs: VCN {vcn} past end of run list"),
1524        ))
1525    }
1526
1527    /// Walk `cu_clusters` consecutive VCNs and decide how many of them have
1528    /// a real LCN. The first `real_clusters` VCNs of the CU carry data;
1529    /// the remaining `cu_clusters - real_clusters` are sparse (i.e. the
1530    /// compression saved those clusters). All-zero CUs report 0.
1531    fn count_real_clusters_in_cu(&self, base_vcn: u64) -> std::io::Result<u64> {
1532        let mut count = 0u64;
1533        for k in 0..self.cu_clusters {
1534            let phys = self.map_vcn(base_vcn + k)?;
1535            if phys.is_some() {
1536                count += 1;
1537            }
1538        }
1539        Ok(count)
1540    }
1541
1542    /// Materialize CU number `cu_index` into `self.out_buf`.
1543    fn load_cu(&mut self, cu_index: u64) -> std::io::Result<()> {
1544        if self.cached_cu_index == cu_index {
1545            return Ok(());
1546        }
1547        let base_vcn = cu_index * self.cu_clusters;
1548        let real_clusters = self.count_real_clusters_in_cu(base_vcn)?;
1549        if real_clusters == 0 {
1550            // All-sparse CU → all zero.
1551            for b in &mut self.out_buf {
1552                *b = 0;
1553            }
1554        } else if real_clusters == self.cu_clusters {
1555            // Stored verbatim — concatenate every cluster's bytes.
1556            for k in 0..self.cu_clusters {
1557                let phys = self
1558                    .map_vcn(base_vcn + k)?
1559                    .ok_or_else(|| std::io::Error::other("ntfs: stored-CU sparse cluster"))?;
1560                let lo = (k * self.cluster_size) as usize;
1561                let hi = lo + self.cluster_size as usize;
1562                self.dev
1563                    .read_at(phys, &mut self.out_buf[lo..hi])
1564                    .map_err(std::io::Error::other)?;
1565            }
1566        } else {
1567            // Compressed: first `real_clusters` clusters carry an LZNT1
1568            // stream; the rest of the CU is sparse padding.
1569            let src_len = (real_clusters * self.cluster_size) as usize;
1570            self.src_buf.resize(src_len, 0);
1571            for k in 0..real_clusters {
1572                let phys = self.map_vcn(base_vcn + k)?.ok_or_else(|| {
1573                    std::io::Error::other("ntfs: compressed-CU sparse mid-cluster")
1574                })?;
1575                let lo = (k * self.cluster_size) as usize;
1576                let hi = lo + self.cluster_size as usize;
1577                self.dev
1578                    .read_at(phys, &mut self.src_buf[lo..hi])
1579                    .map_err(std::io::Error::other)?;
1580            }
1581            compression::decompress_unit(&self.src_buf, &mut self.out_buf)
1582                .map_err(std::io::Error::other)?;
1583        }
1584        self.cached_cu_index = cu_index;
1585        Ok(())
1586    }
1587}
1588
1589impl<'a> std::io::Seek for CompressedReader<'a> {
1590    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
1591        let total = self.real_size as i128;
1592        let new = match pos {
1593            std::io::SeekFrom::Start(n) => n as i128,
1594            std::io::SeekFrom::Current(d) => self.pos as i128 + d as i128,
1595            std::io::SeekFrom::End(d) => total + d as i128,
1596        };
1597        if new < 0 {
1598            return Err(std::io::Error::new(
1599                std::io::ErrorKind::InvalidInput,
1600                "ntfs: seek to negative offset",
1601            ));
1602        }
1603        self.pos = new as u64;
1604        Ok(self.pos)
1605    }
1606}
1607
1608impl<'a> Read for CompressedReader<'a> {
1609    fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
1610        if self.pos >= self.real_size {
1611            return Ok(0);
1612        }
1613        let cu_index = self.pos / self.cu_size;
1614        let off = (self.pos % self.cu_size) as usize;
1615        self.load_cu(cu_index)?;
1616        let remaining_file = self.real_size - self.pos;
1617        let n = ((self.cu_size - off as u64) as usize)
1618            .min(out.len())
1619            .min(remaining_file as usize);
1620        if self.pos + n as u64 <= self.initialized_size {
1621            out[..n].copy_from_slice(&self.out_buf[off..off + n]);
1622        } else if self.pos >= self.initialized_size {
1623            out[..n].fill(0);
1624        } else {
1625            let copy_n = (self.initialized_size - self.pos) as usize;
1626            out[..copy_n].copy_from_slice(&self.out_buf[off..off + copy_n]);
1627            out[copy_n..n].fill(0);
1628        }
1629        self.pos += n as u64;
1630        Ok(n)
1631    }
1632}
1633
1634/// Seekable wrapper over NTFS's three flavours of $DATA reader.
1635/// Returned by [`Ntfs::open_file_seekable`] and used to back
1636/// [`crate::fs::Filesystem::open_file_ro`]. Implements
1637/// `Read + Seek + FileReadHandle`, dispatching to the variant.
1638pub enum NtfsSeekableReader<'a> {
1639    Resident(ResidentReader),
1640    NonResident(NonResidentReader<'a>),
1641    Compressed(CompressedReader<'a>),
1642}
1643
1644impl<'a> Read for NtfsSeekableReader<'a> {
1645    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1646        match self {
1647            Self::Resident(r) => r.read(buf),
1648            Self::NonResident(r) => r.read(buf),
1649            Self::Compressed(r) => r.read(buf),
1650        }
1651    }
1652}
1653
1654impl<'a> std::io::Seek for NtfsSeekableReader<'a> {
1655    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
1656        match self {
1657            Self::Resident(r) => r.seek(pos),
1658            Self::NonResident(r) => r.seek(pos),
1659            Self::Compressed(r) => r.seek(pos),
1660        }
1661    }
1662}
1663
1664impl<'a> crate::fs::FileReadHandle for NtfsSeekableReader<'a> {
1665    fn len(&self) -> u64 {
1666        match self {
1667            Self::Resident(r) => r.bytes.len() as u64,
1668            Self::NonResident(r) => r.real_size,
1669            Self::Compressed(r) => r.real_size,
1670        }
1671    }
1672}
1673
1674/// Names for the xattr namespace this driver will use when round-tripping
1675/// NTFS metadata through other filesystems.
1676pub mod xattr_keys {
1677    /// $STANDARD_INFORMATION.file_attributes (32-bit LE).
1678    pub const DOS_ATTRS: &str = "user.ntfs.dos_attrs";
1679    /// $OBJECT_ID GUID (16 raw bytes).
1680    pub const OBJECT_ID: &str = "user.ntfs.object_id";
1681    /// Reparse-point tag (LE u32) followed by raw reparse data. The driver
1682    /// surfaces this as-is and does NOT follow junctions, symlinks, or any
1683    /// other reparse-point type during path resolution or reads.
1684    pub const REPARSE: &str = "user.ntfs.reparse";
1685    /// Alternate Data Streams; full key is `user.ntfs.ads.<name>`.
1686    pub const ADS_PREFIX: &str = "user.ntfs.ads.";
1687    /// Self-relative NT SECURITY_DESCRIPTOR blob. Sourced from either a
1688    /// resident `$SECURITY_DESCRIPTOR` attribute or, when the file uses a
1689    /// shared SD, resolved via `$STANDARD_INFORMATION.security_id` against
1690    /// `$Secure:$SII` → `$Secure:$SDS`.
1691    pub const SECURITY: &str = "system.ntfs_security";
1692    /// Short 8.3 filename (UTF-16LE from a $FILE_NAME with namespace=DOS).
1693    pub const SHORT_NAME: &str = "user.ntfs.short_name";
1694    /// Raw NT-FILETIME quadruple (create, modify, change, access) at 100 ns
1695    /// granularity, 4 × 8 = 32 bytes LE.
1696    pub const TIMES_RAW: &str = "user.ntfs.times.raw";
1697}
1698
1699// ----------------------------------------------------------------------
1700// `crate::fs::Filesystem` trait impl — bridges Ntfs into the generic
1701// walker. Like HfsPlus, `open()` returns a read-only handle; trait
1702// mutators only succeed after `format()`.
1703// ----------------------------------------------------------------------
1704
1705impl crate::fs::FilesystemFactory for Ntfs {
1706    type FormatOpts = format::FormatOpts;
1707
1708    fn format(dev: &mut dyn BlockDevice, opts: &Self::FormatOpts) -> Result<Self> {
1709        Self::format(dev, opts)
1710    }
1711
1712    fn open(dev: &mut dyn BlockDevice) -> Result<Self> {
1713        Self::open(dev)
1714    }
1715}
1716
1717impl crate::fs::Filesystem for Ntfs {
1718    // Consumes the FileSource during create_file, so let the
1719    // streaming repack buffer small files in memory instead of
1720    // spilling each to a temp file (see create_file_streaming).
1721    fn streams_immediately(&self) -> bool {
1722        true
1723    }
1724
1725    /// NTFS supports full in-place edits. A handle from [`Ntfs::open`]
1726    /// starts read-only (`writer: None`), but the first mutation lazily
1727    /// reconstructs the writer state from disk (see
1728    /// `writer::ensure_writer`), so a reopened image — e.g. one inside a
1729    /// qcow2 used as a read/write store — accepts add / `open_file_rw`
1730    /// just like a freshly-formatted one.
1731    fn mutation_capability(&self) -> crate::fs::MutationCapability {
1732        crate::fs::MutationCapability::Mutable
1733    }
1734
1735    fn create_file(
1736        &mut self,
1737        dev: &mut dyn BlockDevice,
1738        path: &std::path::Path,
1739        src: crate::fs::FileSource,
1740        meta: crate::fs::FileMeta,
1741    ) -> Result<()> {
1742        let s = path
1743            .to_str()
1744            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1745        self.create_file(dev, s, src, meta)
1746    }
1747
1748    fn create_dir(
1749        &mut self,
1750        dev: &mut dyn BlockDevice,
1751        path: &std::path::Path,
1752        meta: crate::fs::FileMeta,
1753    ) -> Result<()> {
1754        let s = path
1755            .to_str()
1756            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1757        self.create_dir(dev, s, meta)
1758    }
1759
1760    fn create_symlink(
1761        &mut self,
1762        dev: &mut dyn BlockDevice,
1763        path: &std::path::Path,
1764        target: &std::path::Path,
1765        meta: crate::fs::FileMeta,
1766    ) -> Result<()> {
1767        let s = path
1768            .to_str()
1769            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1770        let t = target.to_str().ok_or_else(|| {
1771            crate::Error::InvalidArgument("ntfs: non-UTF-8 symlink target".into())
1772        })?;
1773        self.create_symlink(dev, s, t, meta)
1774    }
1775
1776    fn create_device(
1777        &mut self,
1778        dev: &mut dyn BlockDevice,
1779        path: &std::path::Path,
1780        kind: crate::fs::DeviceKind,
1781        major: u32,
1782        minor: u32,
1783        meta: crate::fs::FileMeta,
1784    ) -> Result<()> {
1785        let s = path
1786            .to_str()
1787            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1788        self.create_device(dev, s, kind, major, minor, meta)
1789    }
1790
1791    fn remove(&mut self, dev: &mut dyn BlockDevice, path: &std::path::Path) -> Result<()> {
1792        let s = path
1793            .to_str()
1794            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1795        self.remove(dev, s)
1796    }
1797
1798    fn list(
1799        &mut self,
1800        dev: &mut dyn BlockDevice,
1801        path: &std::path::Path,
1802    ) -> Result<Vec<crate::fs::DirEntry>> {
1803        let s = path
1804            .to_str()
1805            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1806        self.list_path(dev, s)
1807    }
1808
1809    fn read_file<'a>(
1810        &'a mut self,
1811        dev: &'a mut dyn BlockDevice,
1812        path: &std::path::Path,
1813    ) -> Result<Box<dyn std::io::Read + 'a>> {
1814        let s = path
1815            .to_str()
1816            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1817        let r = self.open_file_reader(dev, s)?;
1818        Ok(Box::new(r))
1819    }
1820
1821    /// NTFS has no POSIX ownership/mode; we surface what maps cleanly:
1822    /// the four timestamps (NT-FILETIME → Unix) and a mode synthesised
1823    /// from the DOS attribute bits (directory + read-only). uid/gid stay
1824    /// 0. The kind/size come from the directory index (authoritative, and
1825    /// the size is what the repack walker streams). Native NTFS metadata
1826    /// (DOS attrs, ADS, security, …) round-trips via the trait
1827    /// [`crate::fs::Filesystem::list_xattrs`] impl below.
1828    fn getattr(
1829        &mut self,
1830        dev: &mut dyn BlockDevice,
1831        path: &std::path::Path,
1832    ) -> Result<crate::fs::FileAttrs> {
1833        let s = path
1834            .to_str()
1835            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1836        let norm = s.trim_end_matches('/');
1837        if norm.is_empty() {
1838            return Ok(crate::fs::FileAttrs {
1839                kind: crate::fs::EntryKind::Dir,
1840                mode: 0o755,
1841                uid: 0,
1842                gid: 0,
1843                size: 0,
1844                blocks: 0,
1845                nlink: 2,
1846                atime: 0,
1847                mtime: 0,
1848                ctime: 0,
1849                rdev: 0,
1850                inode: MFT_RECORD_ROOT as u32,
1851            });
1852        }
1853        // kind / size / inode from the parent's index — the same source
1854        // the trait default uses, and the only authoritative file size.
1855        let (parent, name) = norm.rsplit_once('/').unwrap_or(("", norm));
1856        let parent = if parent.is_empty() { "/" } else { parent };
1857        let de = self
1858            .list_path(dev, parent)?
1859            .into_iter()
1860            .find(|e| e.name == name)
1861            .ok_or_else(|| crate::Error::InvalidArgument(format!("ntfs: no entry at {s:?}")))?;
1862
1863        // mode + times from $STANDARD_INFORMATION (surfaced as xattrs).
1864        let xa = self.read_xattrs(dev, s).unwrap_or_default();
1865        let dos = xa
1866            .get(xattr_keys::DOS_ATTRS)
1867            .filter(|v| v.len() >= 4)
1868            .map(|v| u32::from_le_bytes(v[0..4].try_into().unwrap()))
1869            .unwrap_or(0);
1870        let read_only = dos & 0x1 != 0; // FILE_ATTRIBUTE_READONLY
1871        let mode = match de.kind {
1872            crate::fs::EntryKind::Dir => 0o755,
1873            _ if read_only => 0o444,
1874            _ => 0o644,
1875        };
1876        // TIMES_RAW = [creation, modified, mft_changed, accessed] FILETIMEs.
1877        let filetime_to_unix = |ft: u64| (ft / 10_000_000).saturating_sub(11_644_473_600) as u32;
1878        let pick = |off: usize| -> u32 {
1879            xa.get(xattr_keys::TIMES_RAW)
1880                .filter(|v| v.len() >= off + 8)
1881                .map(|v| filetime_to_unix(u64::from_le_bytes(v[off..off + 8].try_into().unwrap())))
1882                .unwrap_or(0)
1883        };
1884        Ok(crate::fs::FileAttrs {
1885            kind: de.kind,
1886            mode,
1887            uid: 0,
1888            gid: 0,
1889            size: de.size,
1890            blocks: de.size.div_ceil(512),
1891            nlink: 1,
1892            atime: pick(24),
1893            mtime: pick(8),
1894            ctime: pick(16),
1895            rdev: 0,
1896            inode: de.inode,
1897        })
1898    }
1899
1900    /// Surface NTFS's native metadata (DOS attributes, object id, reparse
1901    /// data, alternate data streams, security descriptor, short name, raw
1902    /// timestamps) as `user.ntfs.*` / `system.ntfs_security` xattrs so it
1903    /// round-trips through repack.
1904    fn list_xattrs(
1905        &mut self,
1906        dev: &mut dyn BlockDevice,
1907        path: &std::path::Path,
1908    ) -> Result<Vec<crate::fs::XattrPair>> {
1909        let s = path
1910            .to_str()
1911            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1912        let mut pairs: Vec<crate::fs::XattrPair> = self
1913            .read_xattrs(dev, s)?
1914            .into_iter()
1915            .map(|(name, value)| crate::fs::XattrPair { name, value })
1916            .collect();
1917        pairs.sort_by(|a, b| a.name.cmp(&b.name));
1918        Ok(pairs)
1919    }
1920
1921    fn open_file_ro<'a>(
1922        &'a mut self,
1923        dev: &'a mut dyn BlockDevice,
1924        path: &std::path::Path,
1925    ) -> Result<Box<dyn crate::fs::FileReadHandle + 'a>> {
1926        let s = path
1927            .to_str()
1928            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1929        let r = self.open_file_seekable(dev, s)?;
1930        Ok(Box::new(r))
1931    }
1932
1933    fn open_file_rw<'a>(
1934        &'a mut self,
1935        dev: &'a mut dyn BlockDevice,
1936        path: &std::path::Path,
1937        flags: crate::fs::OpenFlags,
1938        meta: Option<crate::fs::FileMeta>,
1939    ) -> Result<Box<dyn crate::fs::FileHandle + 'a>> {
1940        let s = path
1941            .to_str()
1942            .ok_or_else(|| crate::Error::InvalidArgument("ntfs: non-UTF-8 path".into()))?;
1943        self.open_rw(dev, s, flags, meta)
1944    }
1945
1946    fn flush(&mut self, dev: &mut dyn BlockDevice) -> Result<()> {
1947        Self::flush(self, dev)
1948    }
1949}
1950
1951#[cfg(test)]
1952mod tests;