gix_pack/cache/
lru.rs

1use super::DecodeEntry;
2
3#[cfg(feature = "pack-cache-lru-dynamic")]
4mod memory {
5    use std::num::NonZeroUsize;
6
7    use clru::WeightScale;
8
9    use super::DecodeEntry;
10    use crate::cache::set_vec_to_slice;
11
12    struct Entry {
13        data: Vec<u8>,
14        kind: gix_object::Kind,
15        compressed_size: usize,
16    }
17
18    type Key = (u32, u64);
19    struct CustomScale;
20
21    impl WeightScale<Key, Entry> for CustomScale {
22        fn weight(&self, _key: &Key, value: &Entry) -> usize {
23            value.data.len()
24        }
25    }
26
27    /// An LRU cache with hash map backing and an eviction rule based on the memory usage for object data in bytes.
28    pub struct MemoryCappedHashmap {
29        inner: clru::CLruCache<Key, Entry, std::collections::hash_map::RandomState, CustomScale>,
30        free_list: Vec<Vec<u8>>,
31        debug: gix_features::cache::Debug,
32    }
33
34    impl MemoryCappedHashmap {
35        /// Return a new instance which evicts least recently used items if it uses more than `memory_cap_in_bytes`
36        /// object data.
37        pub fn new(memory_cap_in_bytes: usize) -> MemoryCappedHashmap {
38            MemoryCappedHashmap {
39                inner: clru::CLruCache::with_config(
40                    clru::CLruCacheConfig::new(NonZeroUsize::new(memory_cap_in_bytes).expect("non zero"))
41                        .with_scale(CustomScale),
42                ),
43                free_list: Vec::new(),
44                debug: gix_features::cache::Debug::new(format!("MemoryCappedHashmap({memory_cap_in_bytes}B)")),
45            }
46        }
47    }
48
49    impl DecodeEntry for MemoryCappedHashmap {
50        fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
51            self.debug.put();
52            let Some(data) = set_vec_to_slice(self.free_list.pop().unwrap_or_default(), data) else {
53                return;
54            };
55            let res = self.inner.put_with_weight(
56                (pack_id, offset),
57                Entry {
58                    data,
59                    kind,
60                    compressed_size,
61                },
62            );
63            match res {
64                Ok(Some(previous_entry)) => self.free_list.push(previous_entry.data),
65                Ok(None) => {}
66                Err((_key, value)) => self.free_list.push(value.data),
67            }
68        }
69
70        fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
71            let res = self.inner.get(&(pack_id, offset)).and_then(|e| {
72                set_vec_to_slice(out, &e.data)?;
73                Some((e.kind, e.compressed_size))
74            });
75            if res.is_some() {
76                self.debug.hit();
77            } else {
78                self.debug.miss();
79            }
80            res
81        }
82    }
83}
84
85#[cfg(feature = "pack-cache-lru-dynamic")]
86pub use memory::MemoryCappedHashmap;
87
88#[cfg(feature = "pack-cache-lru-static")]
89mod _static {
90    use super::DecodeEntry;
91    use crate::cache::set_vec_to_slice;
92    struct Entry {
93        pack_id: u32,
94        offset: u64,
95        data: Vec<u8>,
96        kind: gix_object::Kind,
97        compressed_size: usize,
98    }
99
100    /// A cache using a least-recently-used implementation capable of storing the `SIZE` most recent objects.
101    /// The cache must be small as the search is 'naive' and the underlying data structure is a linked list.
102    /// Values of 64 seem to improve performance.
103    pub struct StaticLinkedList<const SIZE: usize> {
104        inner: uluru::LRUCache<Entry, SIZE>,
105        last_evicted: Vec<u8>,
106        debug: gix_features::cache::Debug,
107        /// the amount of bytes we are currently holding, taking into account the capacities of all Vecs we keep.
108        mem_used: usize,
109        /// The total amount of memory we should be able to hold with all entries combined.
110        mem_limit: usize,
111    }
112
113    impl<const SIZE: usize> StaticLinkedList<SIZE> {
114        /// Create a new list with a memory limit of `mem_limit` in bytes. If 0, there is no memory limit.
115        pub fn new(mem_limit: usize) -> Self {
116            StaticLinkedList {
117                inner: Default::default(),
118                last_evicted: Vec::new(),
119                debug: gix_features::cache::Debug::new(format!("StaticLinkedList<{SIZE}>")),
120                mem_used: 0,
121                mem_limit: if mem_limit == 0 { usize::MAX } else { mem_limit },
122            }
123        }
124    }
125
126    impl<const SIZE: usize> Default for StaticLinkedList<SIZE> {
127        fn default() -> Self {
128            Self::new(96 * 1024 * 1024)
129        }
130    }
131
132    impl<const SIZE: usize> DecodeEntry for StaticLinkedList<SIZE> {
133        fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
134            // We cannot possibly hold this much.
135            if data.len() > self.mem_limit {
136                return;
137            }
138            // If we could hold it but are at limit, all we can do is make space.
139            let mem_free = self.mem_limit - self.mem_used;
140            if data.len() > mem_free {
141                // prefer freeing free-lists instead of clearing our cache
142                let free_list_cap = self.last_evicted.len();
143                self.last_evicted = Vec::new();
144                // still not enough? clear everything
145                if data.len() > mem_free + free_list_cap {
146                    self.inner.clear();
147                    self.mem_used = 0;
148                } else {
149                    self.mem_used -= free_list_cap;
150                }
151            }
152            self.debug.put();
153            let mut v = std::mem::take(&mut self.last_evicted);
154            self.mem_used -= v.capacity();
155            if set_vec_to_slice(&mut v, data).is_none() {
156                return;
157            }
158            self.mem_used += v.capacity();
159            if let Some(previous) = self.inner.insert(Entry {
160                offset,
161                pack_id,
162                data: v,
163                kind,
164                compressed_size,
165            }) {
166                // No need to adjust capacity as we already counted it.
167                self.last_evicted = previous.data;
168            }
169        }
170
171        fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
172            let res = self.inner.lookup(|e: &mut Entry| {
173                if e.pack_id == pack_id && e.offset == offset {
174                    set_vec_to_slice(&mut *out, &e.data)?;
175                    Some((e.kind, e.compressed_size))
176                } else {
177                    None
178                }
179            });
180            if res.is_some() {
181                self.debug.hit();
182            } else {
183                self.debug.miss();
184            }
185            res
186        }
187    }
188
189    #[cfg(test)]
190    mod tests {
191        use super::*;
192
193        #[test]
194        fn no_limit() {
195            let c = StaticLinkedList::<10>::new(0);
196            assert_eq!(
197                c.mem_limit,
198                usize::MAX,
199                "zero is automatically turned into a large limit that is equivalent to unlimited"
200            );
201        }
202
203        #[test]
204        fn journey() {
205            let mut c = StaticLinkedList::<10>::new(100);
206            assert_eq!(c.mem_limit, 100);
207            assert_eq!(c.mem_used, 0);
208
209            // enough memory for normal operation
210            let mut last_mem_used = 0;
211            for _ in 0..10 {
212                c.put(0, 0, &[0], gix_object::Kind::Blob, 1);
213                assert!(c.mem_used > last_mem_used);
214                last_mem_used = c.mem_used;
215            }
216            assert_eq!(c.mem_used, 80, "there is a minimal vec size");
217            assert_eq!(c.inner.len(), 10);
218            assert_eq!(c.last_evicted.len(), 0);
219
220            c.put(0, 0, &(0..20).collect::<Vec<_>>(), gix_object::Kind::Blob, 1);
221            assert_eq!(c.inner.len(), 10);
222            assert_eq!(c.mem_used, 80 + 20);
223            assert_eq!(c.last_evicted.len(), 1);
224
225            c.put(0, 0, &(0..50).collect::<Vec<_>>(), gix_object::Kind::Blob, 1);
226            assert_eq!(c.inner.len(), 1, "cache clearance wasn't necessary");
227            assert_eq!(c.last_evicted.len(), 0, "the free list was cleared");
228            assert_eq!(c.mem_used, 50);
229
230            c.put(0, 0, &(0..101).collect::<Vec<_>>(), gix_object::Kind::Blob, 1);
231            assert_eq!(
232                c.inner.len(),
233                1,
234                "objects that won't ever fit within the memory limit are ignored"
235            );
236        }
237    }
238}
239
240#[cfg(feature = "pack-cache-lru-static")]
241pub use _static::StaticLinkedList;