Skip to main content

gravityfile_scan/
inode.rs

1//! Inode tracking for hardlink deduplication.
2//!
3//! Uses a countdown DashMap: on first sight we record (nlink - 2) remaining
4//! links; each subsequent encounter decrements the counter and removes the
5//! entry when exhausted. This bounds memory to the number of *outstanding*
6//! hard-link duplicates rather than the full inode set.
7
8use std::collections::HashMap;
9
10use gravityfile_core::InodeInfo;
11
12/// Tracks seen inodes to prevent double-counting hardlinks.
13///
14/// When a file has multiple hardlinks, we only want to count its size once.
15/// This tracker uses a countdown map to track (inode, device) pairs
16/// and automatically evicts entries once all expected hard links have been seen.
17///
18/// Uses a plain `HashMap` since all access occurs from a single thread
19/// (the sequential `for entry_result in walker` loop in `collect_entries`).
20#[derive(Debug, Default)]
21pub struct InodeTracker {
22    seen: HashMap<InodeInfo, u32>,
23}
24
25impl InodeTracker {
26    /// Create a new inode tracker.
27    pub fn new() -> Self {
28        Self {
29            seen: HashMap::new(),
30        }
31    }
32
33    /// Track an inode. Returns `true` if this is the first time seeing it.
34    ///
35    /// `nlink` is the hard-link count from file metadata. When `nlink <= 1`
36    /// the file cannot have any additional hard links so we return `true`
37    /// immediately without touching the map (no memory allocated).
38    ///
39    /// On the first encounter the entry is inserted with a countdown equal to
40    /// `nlink - 1` — the number of duplicate encounters we still need to
41    /// suppress. Each subsequent encounter decrements the counter; when it
42    /// reaches zero the entry is removed, bounding memory to the number of
43    /// in-flight duplicate hard links.
44    pub fn track(&mut self, info: InodeInfo, nlink: u64) -> bool {
45        use std::collections::hash_map::Entry;
46
47        if nlink <= 1 {
48            return true;
49        }
50
51        match self.seen.entry(info) {
52            Entry::Vacant(e) => {
53                // First time we see this inode. Record how many duplicate
54                // encounters we still expect to suppress: (nlink - 1) remaining
55                // hard links after this first one.  We saturate at u32::MAX for
56                // pathological cases.
57                let remaining = (nlink - 1).min(u32::MAX as u64) as u32;
58                e.insert(remaining);
59                true
60            }
61            Entry::Occupied(mut e) => {
62                let count = e.get_mut();
63                if *count <= 1 {
64                    e.remove();
65                } else {
66                    *count -= 1;
67                }
68                false
69            }
70        }
71    }
72
73    /// Returns the number of inodes currently being tracked (in-flight duplicates).
74    ///
75    /// This is the number of entries in the countdown map — i.e. inodes whose
76    /// full set of hard links has not yet been encountered.  It reaches zero
77    /// once every hard-linked inode has been seen the expected number of times.
78    ///
79    /// Not part of the stable public API; exposed for integration tests.
80    #[doc(hidden)]
81    pub fn pending_count(&self) -> usize {
82        self.seen.len()
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    #[test]
91    fn test_track_new_inode_nlink_1() {
92        let mut tracker = InodeTracker::new();
93        let info = InodeInfo::new(12345, 1);
94        // nlink=1 means no hard links; always fresh
95        assert!(tracker.track(info, 1));
96        assert!(tracker.track(info, 1));
97    }
98
99    #[test]
100    fn test_track_hardlink_nlink_2() {
101        let mut tracker = InodeTracker::new();
102        let info = InodeInfo::new(12345, 1);
103        // Two hard links: first returns true, second returns false
104        assert!(tracker.track(info, 2));
105        assert!(!tracker.track(info, 2));
106        // Entry should be evicted; next call treats it as fresh again
107        assert_eq!(tracker.pending_count(), 0);
108    }
109
110    #[test]
111    fn test_track_hardlink_nlink_3() {
112        let mut tracker = InodeTracker::new();
113        let info = InodeInfo::new(99, 2);
114        // Three hard links: first is counted, next two are duplicates
115        assert!(tracker.track(info, 3));
116        assert!(!tracker.track(info, 3));
117        assert!(!tracker.track(info, 3));
118        assert_eq!(tracker.pending_count(), 0);
119    }
120
121    #[test]
122    fn test_different_devices() {
123        let mut tracker = InodeTracker::new();
124        let info1 = InodeInfo::new(12345, 1);
125        let info2 = InodeInfo::new(12345, 2); // Same inode, different device
126
127        assert!(tracker.track(info1, 2));
128        assert!(tracker.track(info2, 2)); // Different device, so it's new
129    }
130}