Skip to main content

ntfs_core/
refs.rs

1//! ReFS (Resilient File System) aware handling of USN V3 records.
2//!
3//! ReFS uses full 128-bit file reference numbers instead of NTFS's 48-bit entry
4//! + 16-bit sequence format. This module provides types and analysis for ReFS
5//!   volumes where USN_RECORD_V3 records contain these wider references.
6//!
7//! Key differences from NTFS:
8//! - File references are opaque 128-bit IDs, not split into entry+sequence
9//! - No traditional $MFT, so path reconstruction relies solely on journal rewind
10//! - V3 records have `major_version: 3`
11
12use std::collections::HashMap;
13use std::fmt;
14
15use crate::usn::UsnRecord;
16
17// ---- Types ----
18
19/// A full 128-bit ReFS file identifier.
20///
21/// Unlike NTFS which splits its 64-bit reference into a 48-bit MFT entry number
22/// and a 16-bit sequence number, ReFS uses an opaque 128-bit identifier. The upper
23/// and lower 64-bit halves have no defined entry/sequence semantics.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub struct RefsFileId(pub u128);
26
27impl RefsFileId {
28    /// Create a `RefsFileId` from a raw u128 value.
29    pub fn from_u128(value: u128) -> Self {
30        Self(value)
31    }
32
33    /// Extract the high 64 bits of the file ID.
34    pub fn high(&self) -> u64 {
35        (self.0 >> 64) as u64
36    }
37
38    /// Extract the low 64 bits of the file ID.
39    pub fn low(&self) -> u64 {
40        self.0 as u64
41    }
42}
43
44impl fmt::Display for RefsFileId {
45    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46        write!(f, "0x{:016x}:0x{:016x}", self.high(), self.low())
47    }
48}
49
50/// A USN V3 record enriched with full 128-bit ReFS file references.
51///
52/// The standard `UsnRecord` truncates the 128-bit references to fit into
53/// `mft_entry: u64` + `mft_sequence: u16`. This wrapper preserves the full
54/// 128-bit file and parent references as they appeared in the raw V3 record.
55#[derive(Debug, Clone)]
56pub struct RefsRecord {
57    /// The underlying parsed USN record.
58    pub record: UsnRecord,
59    /// Full 128-bit file reference.
60    pub file_id: RefsFileId,
61    /// Full 128-bit parent reference.
62    pub parent_id: RefsFileId,
63}
64
65impl RefsRecord {
66    /// Wrap a `UsnRecord` with explicit 128-bit file references.
67    pub fn new(record: UsnRecord, file_id: RefsFileId, parent_id: RefsFileId) -> Self {
68        Self {
69            record,
70            file_id,
71            parent_id,
72        }
73    }
74}
75
76/// Analyzer for ReFS USN journal data.
77///
78/// Provides grouping by full 128-bit file ID, ReFS vs NTFS volume detection,
79/// and journal-rewind-only path reconstruction (since ReFS has no traditional MFT).
80pub struct RefsAnalyzer {
81    records: Vec<RefsRecord>,
82}
83
84impl RefsAnalyzer {
85    /// Create a new analyzer from a set of `RefsRecord`s.
86    pub fn new(records: Vec<RefsRecord>) -> Self {
87        Self { records }
88    }
89
90    /// Detect whether the records likely originate from a ReFS volume.
91    ///
92    /// Heuristic: if all records have `major_version == 3` and any record has
93    /// a `file_id` whose upper 64 bits are non-zero, it is likely ReFS.
94    /// Pure NTFS V3 records would have upper bits all zero.
95    pub fn is_likely_refs(&self) -> bool {
96        if self.records.is_empty() {
97            return false;
98        }
99
100        let all_v3 = self.records.iter().all(|r| r.record.major_version == 3);
101        if !all_v3 {
102            return false;
103        }
104
105        // If any file or parent reference has non-zero upper 64 bits,
106        // this is likely a ReFS volume (NTFS V3 refs fit in lower 64 bits).
107        self.records
108            .iter()
109            .any(|r| r.file_id.high() != 0 || r.parent_id.high() != 0)
110    }
111
112    /// Group records by their full 128-bit file ID.
113    ///
114    /// Returns a map from `RefsFileId` to all records referencing that file.
115    pub fn group_by_file_id(&self) -> HashMap<RefsFileId, Vec<&RefsRecord>> {
116        let mut groups: HashMap<RefsFileId, Vec<&RefsRecord>> = HashMap::new();
117        for rec in &self.records {
118            groups.entry(rec.file_id).or_default().push(rec);
119        }
120        groups
121    }
122
123    /// Reconstruct file paths using journal rewind only (no MFT seeding).
124    ///
125    /// ReFS has no traditional $MFT, so path reconstruction must rely entirely
126    /// on walking the USN journal backwards to build the directory tree from
127    /// rename and create events.
128    ///
129    /// Returns a map from `RefsFileId` to reconstructed path (if resolvable).
130    pub fn reconstruct_paths(&self) -> HashMap<RefsFileId, String> {
131        // Build a lookup: file_id -> (filename, parent_id)
132        // Use the most recent (last seen) name for each file ID.
133        let mut lookup: HashMap<RefsFileId, (String, RefsFileId)> = HashMap::new();
134
135        for rec in &self.records {
136            lookup.insert(rec.file_id, (rec.record.filename.clone(), rec.parent_id));
137        }
138
139        // Determine root IDs: any parent_id that has no entry in the lookup
140        // is considered a root anchor.
141        let root_ids: std::collections::HashSet<RefsFileId> = self
142            .records
143            .iter()
144            .map(|r| r.parent_id)
145            .filter(|pid| !lookup.contains_key(pid))
146            .collect();
147
148        // Resolve paths by walking parent chains up to a root.
149        let mut paths: HashMap<RefsFileId, String> = HashMap::new();
150
151        for &file_id in lookup.keys() {
152            if root_ids.contains(&file_id) {
153                continue; // cov:unreachable: root_ids only holds parent_ids absent from lookup (filtered by !lookup.contains_key), but file_id iterates lookup.keys() ⇒ it is in lookup, so never a root_id
154            }
155
156            let mut components = Vec::new();
157            let mut current = file_id;
158            let mut visited = std::collections::HashSet::new();
159
160            loop {
161                if !visited.insert(current) {
162                    // Cycle detected, stop
163                    break;
164                }
165
166                if let Some((name, parent)) = lookup.get(&current) {
167                    components.push(name.clone());
168                    if root_ids.contains(parent) || !lookup.contains_key(parent) {
169                        break;
170                    }
171                    current = *parent;
172                } else {
173                    break; // cov:unreachable: current starts at a lookup key and only advances to *parent when lookup.contains_key(parent) holds (else the prior break fires), so lookup.get(&current) is always Some
174                }
175            }
176
177            components.reverse();
178            if !components.is_empty() {
179                paths.insert(file_id, components.join("\\"));
180            }
181        }
182
183        paths
184    }
185}
186
187// ---- Tests ----
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192    use crate::usn::{FileAttributes, UsnReason, UsnRecord};
193    use chrono::DateTime;
194
195    /// Helper: build a `UsnRecord` with `major_version` 3 for testing.
196    fn make_v3_record(
197        mft_entry: u64,
198        parent_mft_entry: u64,
199        reason: UsnReason,
200        filename: &str,
201    ) -> UsnRecord {
202        UsnRecord {
203            mft_entry,
204            mft_sequence: 0,
205            parent_mft_entry,
206            parent_mft_sequence: 0,
207            usn: 1000,
208            timestamp: DateTime::from_timestamp(1_700_000_000, 0).unwrap(),
209            reason,
210            filename: filename.to_string(),
211            file_attributes: FileAttributes::from_bits_retain(0x20), // ARCHIVE
212            source_info: 0,
213            security_id: 0,
214            major_version: 3,
215        }
216    }
217
218    #[test]
219    fn test_refs_file_id_from_u128() {
220        // ReFS uses full 128-bit file IDs. Verify round-trip conversion.
221        let value: u128 = 0x0000_0000_0000_0001_0000_0000_0000_0064;
222        let id = RefsFileId::from_u128(value);
223        assert_eq!(id.0, value);
224
225        // Verify high/low extraction
226        assert_eq!(id.high(), 0x0000_0000_0000_0001);
227        assert_eq!(id.low(), 0x0000_0000_0000_0064);
228
229        // Zero is a valid file ID
230        let zero_id = RefsFileId::from_u128(0);
231        assert_eq!(zero_id.0, 0);
232        assert_eq!(zero_id.high(), 0);
233        assert_eq!(zero_id.low(), 0);
234
235        // Max value
236        let max_id = RefsFileId::from_u128(u128::MAX);
237        assert_eq!(max_id.high(), u64::MAX);
238        assert_eq!(max_id.low(), u64::MAX);
239    }
240
241    #[test]
242    fn test_refs_file_id_display() {
243        // Display format should show high:low in hex
244        let id = RefsFileId::from_u128(0x0000_0000_0000_0001_0000_0000_0000_0064);
245        let display = format!("{id}");
246        assert_eq!(display, "0x0000000000000001:0x0000000000000064");
247
248        // Zero case
249        let zero_id = RefsFileId::from_u128(0);
250        assert_eq!(
251            format!("{zero_id}"),
252            "0x0000000000000000:0x0000000000000000"
253        );
254
255        // Large values
256        let large_id = RefsFileId::from_u128(0xDEAD_BEEF_CAFE_BABE_1234_5678_9ABC_DEF0);
257        assert_eq!(
258            format!("{large_id}"),
259            "0xdeadbeefcafebabe:0x123456789abcdef0"
260        );
261    }
262
263    #[test]
264    fn test_refs_volume_detection() {
265        // Case 1: V3 records with upper bits set -> likely ReFS
266        let rec1 = make_v3_record(100, 5, UsnReason::FILE_CREATE, "file.txt");
267        let refs_rec1 = RefsRecord::new(
268            rec1,
269            RefsFileId::from_u128(0x0000_0000_0000_0001_0000_0000_0000_0064),
270            RefsFileId::from_u128(0x0000_0000_0000_0001_0000_0000_0000_0005),
271        );
272
273        let analyzer = RefsAnalyzer::new(vec![refs_rec1]);
274        assert!(analyzer.is_likely_refs());
275
276        // Case 2: V3 records with upper bits all zero -> likely NTFS using V3 format
277        let rec2 = make_v3_record(200, 5, UsnReason::FILE_CREATE, "ntfs_file.txt");
278        let refs_rec2 = RefsRecord::new(
279            rec2,
280            RefsFileId::from_u128(0x0000_0000_0000_0000_0000_0000_0000_00C8),
281            RefsFileId::from_u128(0x0000_0000_0000_0000_0000_0000_0000_0005),
282        );
283
284        let analyzer2 = RefsAnalyzer::new(vec![refs_rec2]);
285        assert!(!analyzer2.is_likely_refs());
286
287        // Case 3: Empty records -> not ReFS
288        let analyzer3 = RefsAnalyzer::new(vec![]);
289        assert!(!analyzer3.is_likely_refs());
290    }
291
292    #[test]
293    fn test_refs_record_grouping() {
294        // Create multiple records for the same file and different files
295        let file_id_a = RefsFileId::from_u128(0x0000_0000_0000_0001_0000_0000_0000_000A);
296        let file_id_b = RefsFileId::from_u128(0x0000_0000_0000_0001_0000_0000_0000_000B);
297        let parent_id = RefsFileId::from_u128(0x0000_0000_0000_0001_0000_0000_0000_0005);
298
299        let rec1 = RefsRecord::new(
300            make_v3_record(10, 5, UsnReason::FILE_CREATE, "alpha.txt"),
301            file_id_a,
302            parent_id,
303        );
304        let rec2 = RefsRecord::new(
305            make_v3_record(10, 5, UsnReason::DATA_EXTEND, "alpha.txt"),
306            file_id_a,
307            parent_id,
308        );
309        let rec3 = RefsRecord::new(
310            make_v3_record(11, 5, UsnReason::FILE_CREATE, "beta.txt"),
311            file_id_b,
312            parent_id,
313        );
314
315        let analyzer = RefsAnalyzer::new(vec![rec1, rec2, rec3]);
316        let groups = analyzer.group_by_file_id();
317
318        assert_eq!(groups.len(), 2);
319        assert_eq!(groups.get(&file_id_a).map(std::vec::Vec::len), Some(2));
320        assert_eq!(groups.get(&file_id_b).map(std::vec::Vec::len), Some(1));
321
322        // Verify the grouped records have the right filenames
323        let a_records = groups.get(&file_id_a).unwrap();
324        assert!(a_records.iter().all(|r| r.record.filename == "alpha.txt"));
325    }
326
327    #[test]
328    fn test_refs_path_reconstruction_without_mft() {
329        // Simulate a directory tree created purely through journal events:
330        //   root (id=5) -> "Documents" (id=100) -> "report.docx" (id=200)
331        //
332        // ReFS has no MFT to seed from, so paths come only from journal entries.
333
334        let root_id = RefsFileId::from_u128(5);
335        let docs_id = RefsFileId::from_u128(100);
336        let file_id = RefsFileId::from_u128(200);
337
338        // Directory creation event: "Documents" created under root
339        let dir_create = RefsRecord::new(
340            {
341                let mut r = make_v3_record(100, 5, UsnReason::FILE_CREATE, "Documents");
342                r.file_attributes = FileAttributes::from_bits_retain(0x10); // DIRECTORY
343                r
344            },
345            docs_id,
346            root_id,
347        );
348
349        // File creation event: "report.docx" created under "Documents"
350        let file_create = RefsRecord::new(
351            make_v3_record(200, 100, UsnReason::FILE_CREATE, "report.docx"),
352            file_id,
353            docs_id,
354        );
355
356        let analyzer = RefsAnalyzer::new(vec![dir_create, file_create]);
357        let paths = analyzer.reconstruct_paths();
358
359        // The file should be resolvable to its full path
360        assert_eq!(
361            paths.get(&file_id).map(std::string::String::as_str),
362            Some("Documents\\report.docx")
363        );
364
365        // The directory itself should be resolvable
366        assert_eq!(
367            paths.get(&docs_id).map(std::string::String::as_str),
368            Some("Documents")
369        );
370
371        // Root should not appear in reconstructed paths (it's the anchor)
372        assert!(!paths.contains_key(&root_id));
373    }
374
375    #[test]
376    fn test_refs_path_cycle_detection() {
377        // Create a cycle: A->B->A
378        let id_a = RefsFileId::from_u128(10);
379        let id_b = RefsFileId::from_u128(20);
380
381        let rec_a = RefsRecord::new(
382            make_v3_record(10, 20, UsnReason::FILE_CREATE, "dir_a"),
383            id_a,
384            id_b,
385        );
386        let rec_b = RefsRecord::new(
387            make_v3_record(20, 10, UsnReason::FILE_CREATE, "dir_b"),
388            id_b,
389            id_a,
390        );
391
392        let analyzer = RefsAnalyzer::new(vec![rec_a, rec_b]);
393        let paths = analyzer.reconstruct_paths();
394        // Should not hang. Cycle should be broken.
395        // The paths may or may not be empty, but it should not panic.
396        assert!(paths.len() <= 2);
397    }
398
399    #[test]
400    fn test_refs_empty_analyzer() {
401        let analyzer = RefsAnalyzer::new(vec![]);
402        let groups = analyzer.group_by_file_id();
403        assert!(groups.is_empty());
404        let paths = analyzer.reconstruct_paths();
405        assert!(paths.is_empty());
406    }
407
408    #[test]
409    fn test_refs_file_id_equality() {
410        let id1 = RefsFileId::from_u128(42);
411        let id2 = RefsFileId::from_u128(42);
412        let id3 = RefsFileId::from_u128(43);
413        assert_eq!(id1, id2);
414        assert_ne!(id1, id3);
415    }
416
417    #[test]
418    fn test_refs_reconstruct_paths_root_id_skipped() {
419        // Test line 152: file_id that is in root_ids should be skipped
420        // in path reconstruction.
421        // Create a scenario where a file's ID is also a parent_id of another
422        // record that has no entry in lookup (making it a root_id).
423        // Then create a record whose file_id IS a root_id.
424        let root_id = RefsFileId::from_u128(5);
425
426        let root_record = RefsRecord::new(
427            make_v3_record(5, 999, UsnReason::FILE_CREATE, "root_dir"),
428            root_id,
429            RefsFileId::from_u128(999),
430        );
431
432        // Lines 152 and 172 are effectively unreachable in the current logic:
433        // - Line 152: root_ids = {parent_ids} - {lookup.keys()}, so a file_id
434        //   in lookup.keys() can never also be in root_ids.
435        // - Line 172: the loop always checks the parent on line 167 before
436        //   advancing current, so current is always in lookup.
437        // This test exercises the path reconstruction as deeply as possible.
438
439        let analyzer = RefsAnalyzer::new(vec![root_record.clone()]);
440        let paths = analyzer.reconstruct_paths();
441        assert_eq!(
442            paths.get(&root_id).map(std::string::String::as_str),
443            Some("root_dir")
444        );
445    }
446
447    #[test]
448    fn test_refs_reconstruct_paths_single_orphan() {
449        // Covers line 173: the else branch (break) when current is not in lookup.
450        // Create a record whose parent_id is not in lookup, causing walk to break
451        // on the first iteration itself via line 168 (!lookup.contains_key(parent)).
452        let orphan_id = RefsFileId::from_u128(42);
453        let unknown_parent = RefsFileId::from_u128(999);
454
455        let rec = RefsRecord::new(
456            make_v3_record(42, 999, UsnReason::FILE_CREATE, "orphan.txt"),
457            orphan_id,
458            unknown_parent,
459        );
460
461        let analyzer = RefsAnalyzer::new(vec![rec]);
462        let paths = analyzer.reconstruct_paths();
463        // The orphan should still get a path (just its own name)
464        assert_eq!(
465            paths.get(&orphan_id).map(std::string::String::as_str),
466            Some("orphan.txt")
467        );
468    }
469
470    #[test]
471    fn test_refs_reconstruct_deep_chain_with_missing_ancestor() {
472        // Tests the path walk stopping when an ancestor is missing from lookup.
473        // A -> B -> C -> (missing D)
474        let id_a = RefsFileId::from_u128(10);
475        let id_b = RefsFileId::from_u128(20);
476        let id_c = RefsFileId::from_u128(30);
477        let id_d = RefsFileId::from_u128(40); // not in any record
478
479        let rec_a = RefsRecord::new(
480            make_v3_record(10, 20, UsnReason::FILE_CREATE, "file.txt"),
481            id_a,
482            id_b,
483        );
484        let rec_b = RefsRecord::new(
485            make_v3_record(20, 30, UsnReason::FILE_CREATE, "subdir"),
486            id_b,
487            id_c,
488        );
489        let rec_c = RefsRecord::new(
490            make_v3_record(30, 40, UsnReason::FILE_CREATE, "topdir"),
491            id_c,
492            id_d,
493        );
494
495        let analyzer = RefsAnalyzer::new(vec![rec_a, rec_b, rec_c]);
496        let paths = analyzer.reconstruct_paths();
497
498        assert_eq!(
499            paths.get(&id_a).map(std::string::String::as_str),
500            Some("topdir\\subdir\\file.txt")
501        );
502        assert_eq!(
503            paths.get(&id_b).map(std::string::String::as_str),
504            Some("topdir\\subdir")
505        );
506        assert_eq!(
507            paths.get(&id_c).map(std::string::String::as_str),
508            Some("topdir")
509        );
510    }
511
512    #[test]
513    fn test_refs_mixed_v2_and_v3_not_refs() {
514        // If any record is not v3, it's not ReFS
515        let v2_record = UsnRecord {
516            mft_entry: 100,
517            mft_sequence: 0,
518            parent_mft_entry: 5,
519            parent_mft_sequence: 0,
520            usn: 1000,
521            timestamp: DateTime::from_timestamp(1_700_000_000, 0).unwrap(),
522            reason: UsnReason::FILE_CREATE,
523            filename: "v2file.txt".to_string(),
524            file_attributes: FileAttributes::from_bits_retain(0x20),
525            source_info: 0,
526            security_id: 0,
527            major_version: 2, // V2!
528        };
529        let refs_rec = RefsRecord::new(
530            v2_record,
531            RefsFileId::from_u128(0x0000_0000_0000_0001_0000_0000_0000_0064),
532            RefsFileId::from_u128(0x0000_0000_0000_0001_0000_0000_0000_0005),
533        );
534
535        let analyzer = RefsAnalyzer::new(vec![refs_rec]);
536        assert!(!analyzer.is_likely_refs());
537    }
538
539    #[test]
540    fn test_refs_reconstruct_paths_parent_not_in_lookup() {
541        // Cover line 173: break when current's parent is not in lookup and not a root.
542        // Create three records: A -> B -> C where C's parent (D) doesn't exist.
543        // When walking from A: A's parent is B (in lookup), B's parent is C (in lookup),
544        // C's parent is D (not in lookup). Line 168 checks root_ids.contains(parent)
545        // || !lookup.contains_key(parent) and breaks.
546        //
547        // But we also need a record whose file_id is in root_ids to test line 153.
548        // root_ids = {parent_ids not in lookup}. For file_id to be in root_ids,
549        // we need file_id to be a parent_id that has no entry in lookup -- but
550        // file_id IS in lookup. So line 153 is logically unreachable.
551        //
552        // Test the path that breaks at line 168 (parent not in lookup).
553        let id_a = RefsFileId::from_u128(10);
554        let id_b = RefsFileId::from_u128(20);
555        let id_c = RefsFileId::from_u128(30); // parent not in lookup
556
557        let rec_a = RefsRecord::new(
558            make_v3_record(10, 20, UsnReason::FILE_CREATE, "file_a"),
559            id_a,
560            id_b,
561        );
562        let rec_b = RefsRecord::new(
563            make_v3_record(20, 30, UsnReason::FILE_CREATE, "dir_b"),
564            id_b,
565            id_c,
566        );
567        // No record for id_c, so id_c becomes a root anchor.
568
569        let analyzer = RefsAnalyzer::new(vec![rec_a, rec_b]);
570        let paths = analyzer.reconstruct_paths();
571
572        // A should resolve to dir_b\file_a
573        assert_eq!(
574            paths.get(&id_a).map(std::string::String::as_str),
575            Some("dir_b\\file_a")
576        );
577        // B should resolve to dir_b
578        assert_eq!(
579            paths.get(&id_b).map(std::string::String::as_str),
580            Some("dir_b")
581        );
582    }
583}