gravityfile_scan/inode.rs
1//! Inode tracking for hardlink deduplication.
2//!
3//! Uses a countdown DashMap: on first sight we record (nlink - 2) remaining
4//! links; each subsequent encounter decrements the counter and removes the
5//! entry when exhausted. This bounds memory to the number of *outstanding*
6//! hard-link duplicates rather than the full inode set.
7
8use std::collections::HashMap;
9
10use gravityfile_core::InodeInfo;
11
12/// Tracks seen inodes to prevent double-counting hardlinks.
13///
14/// When a file has multiple hardlinks, we only want to count its size once.
15/// This tracker uses a countdown map to track (inode, device) pairs
16/// and automatically evicts entries once all expected hard links have been seen.
17///
18/// Uses a plain `HashMap` since all access occurs from a single thread
19/// (the sequential `for entry_result in walker` loop in `collect_entries`).
20#[derive(Debug, Default)]
21pub struct InodeTracker {
22 seen: HashMap<InodeInfo, u32>,
23}
24
25impl InodeTracker {
26 /// Create a new inode tracker.
27 pub fn new() -> Self {
28 Self {
29 seen: HashMap::new(),
30 }
31 }
32
33 /// Track an inode. Returns `true` if this is the first time seeing it.
34 ///
35 /// `nlink` is the hard-link count from file metadata. When `nlink <= 1`
36 /// the file cannot have any additional hard links so we return `true`
37 /// immediately without touching the map (no memory allocated).
38 ///
39 /// On the first encounter the entry is inserted with a countdown equal to
40 /// `nlink - 1` — the number of duplicate encounters we still need to
41 /// suppress. Each subsequent encounter decrements the counter; when it
42 /// reaches zero the entry is removed, bounding memory to the number of
43 /// in-flight duplicate hard links.
44 pub fn track(&mut self, info: InodeInfo, nlink: u64) -> bool {
45 use std::collections::hash_map::Entry;
46
47 if nlink <= 1 {
48 return true;
49 }
50
51 match self.seen.entry(info) {
52 Entry::Vacant(e) => {
53 // First time we see this inode. Record how many duplicate
54 // encounters we still expect to suppress: (nlink - 1) remaining
55 // hard links after this first one. We saturate at u32::MAX for
56 // pathological cases.
57 let remaining = (nlink - 1).min(u32::MAX as u64) as u32;
58 e.insert(remaining);
59 true
60 }
61 Entry::Occupied(mut e) => {
62 let count = e.get_mut();
63 if *count <= 1 {
64 e.remove();
65 } else {
66 *count -= 1;
67 }
68 false
69 }
70 }
71 }
72
73 /// Returns the number of inodes currently being tracked (in-flight duplicates).
74 ///
75 /// This is the number of entries in the countdown map — i.e. inodes whose
76 /// full set of hard links has not yet been encountered. It reaches zero
77 /// once every hard-linked inode has been seen the expected number of times.
78 ///
79 /// Not part of the stable public API; exposed for integration tests.
80 #[doc(hidden)]
81 pub fn pending_count(&self) -> usize {
82 self.seen.len()
83 }
84}
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89
90 #[test]
91 fn test_track_new_inode_nlink_1() {
92 let mut tracker = InodeTracker::new();
93 let info = InodeInfo::new(12345, 1);
94 // nlink=1 means no hard links; always fresh
95 assert!(tracker.track(info, 1));
96 assert!(tracker.track(info, 1));
97 }
98
99 #[test]
100 fn test_track_hardlink_nlink_2() {
101 let mut tracker = InodeTracker::new();
102 let info = InodeInfo::new(12345, 1);
103 // Two hard links: first returns true, second returns false
104 assert!(tracker.track(info, 2));
105 assert!(!tracker.track(info, 2));
106 // Entry should be evicted; next call treats it as fresh again
107 assert_eq!(tracker.pending_count(), 0);
108 }
109
110 #[test]
111 fn test_track_hardlink_nlink_3() {
112 let mut tracker = InodeTracker::new();
113 let info = InodeInfo::new(99, 2);
114 // Three hard links: first is counted, next two are duplicates
115 assert!(tracker.track(info, 3));
116 assert!(!tracker.track(info, 3));
117 assert!(!tracker.track(info, 3));
118 assert_eq!(tracker.pending_count(), 0);
119 }
120
121 #[test]
122 fn test_different_devices() {
123 let mut tracker = InodeTracker::new();
124 let info1 = InodeInfo::new(12345, 1);
125 let info2 = InodeInfo::new(12345, 2); // Same inode, different device
126
127 assert!(tracker.track(info1, 2));
128 assert!(tracker.track(info2, 2)); // Different device, so it's new
129 }
130}