datafusion_execution/cache/
cache_unit.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::collections::HashMap;
19use std::sync::{Arc, Mutex};
20
21use crate::cache::cache_manager::{
22    FileMetadata, FileMetadataCache, FileMetadataCacheEntry,
23};
24use crate::cache::lru_queue::LruQueue;
25use crate::cache::CacheAccessor;
26
27use datafusion_common::Statistics;
28
29use dashmap::DashMap;
30use object_store::path::Path;
31use object_store::ObjectMeta;
32
33/// Default implementation of [`FileStatisticsCache`]
34///
35/// Stores collected statistics for files
36///
37/// Cache is invalided when file size or last modification has changed
38///
39/// [`FileStatisticsCache`]: crate::cache::cache_manager::FileStatisticsCache
40#[derive(Default)]
41pub struct DefaultFileStatisticsCache {
42    statistics: DashMap<Path, (ObjectMeta, Arc<Statistics>)>,
43}
44
45impl CacheAccessor<Path, Arc<Statistics>> for DefaultFileStatisticsCache {
46    type Extra = ObjectMeta;
47
48    /// Get `Statistics` for file location.
49    fn get(&self, k: &Path) -> Option<Arc<Statistics>> {
50        self.statistics
51            .get(k)
52            .map(|s| Some(Arc::clone(&s.value().1)))
53            .unwrap_or(None)
54    }
55
56    /// Get `Statistics` for file location. Returns None if file has changed or not found.
57    fn get_with_extra(&self, k: &Path, e: &Self::Extra) -> Option<Arc<Statistics>> {
58        self.statistics
59            .get(k)
60            .map(|s| {
61                let (saved_meta, statistics) = s.value();
62                if saved_meta.size != e.size
63                    || saved_meta.last_modified != e.last_modified
64                {
65                    // file has changed
66                    None
67                } else {
68                    Some(Arc::clone(statistics))
69                }
70            })
71            .unwrap_or(None)
72    }
73
74    /// Save collected file statistics
75    fn put(&self, _key: &Path, _value: Arc<Statistics>) -> Option<Arc<Statistics>> {
76        panic!("Put cache in DefaultFileStatisticsCache without Extra not supported.")
77    }
78
79    fn put_with_extra(
80        &self,
81        key: &Path,
82        value: Arc<Statistics>,
83        e: &Self::Extra,
84    ) -> Option<Arc<Statistics>> {
85        self.statistics
86            .insert(key.clone(), (e.clone(), value))
87            .map(|x| x.1)
88    }
89
90    fn remove(&mut self, k: &Path) -> Option<Arc<Statistics>> {
91        self.statistics.remove(k).map(|x| x.1 .1)
92    }
93
94    fn contains_key(&self, k: &Path) -> bool {
95        self.statistics.contains_key(k)
96    }
97
98    fn len(&self) -> usize {
99        self.statistics.len()
100    }
101
102    fn clear(&self) {
103        self.statistics.clear()
104    }
105    fn name(&self) -> String {
106        "DefaultFileStatisticsCache".to_string()
107    }
108}
109
110/// Default implementation of [`ListFilesCache`]
111///
112/// Collected files metadata for listing files.
113///
114/// Cache is not invalided until user calls [`Self::remove`] or [`Self::clear`].
115///
116/// [`ListFilesCache`]: crate::cache::cache_manager::ListFilesCache
117#[derive(Default)]
118pub struct DefaultListFilesCache {
119    statistics: DashMap<Path, Arc<Vec<ObjectMeta>>>,
120}
121
122impl CacheAccessor<Path, Arc<Vec<ObjectMeta>>> for DefaultListFilesCache {
123    type Extra = ObjectMeta;
124
125    fn get(&self, k: &Path) -> Option<Arc<Vec<ObjectMeta>>> {
126        self.statistics.get(k).map(|x| Arc::clone(x.value()))
127    }
128
129    fn get_with_extra(
130        &self,
131        _k: &Path,
132        _e: &Self::Extra,
133    ) -> Option<Arc<Vec<ObjectMeta>>> {
134        panic!("Not supported DefaultListFilesCache get_with_extra")
135    }
136
137    fn put(
138        &self,
139        key: &Path,
140        value: Arc<Vec<ObjectMeta>>,
141    ) -> Option<Arc<Vec<ObjectMeta>>> {
142        self.statistics.insert(key.clone(), value)
143    }
144
145    fn put_with_extra(
146        &self,
147        _key: &Path,
148        _value: Arc<Vec<ObjectMeta>>,
149        _e: &Self::Extra,
150    ) -> Option<Arc<Vec<ObjectMeta>>> {
151        panic!("Not supported DefaultListFilesCache put_with_extra")
152    }
153
154    fn remove(&mut self, k: &Path) -> Option<Arc<Vec<ObjectMeta>>> {
155        self.statistics.remove(k).map(|x| x.1)
156    }
157
158    fn contains_key(&self, k: &Path) -> bool {
159        self.statistics.contains_key(k)
160    }
161
162    fn len(&self) -> usize {
163        self.statistics.len()
164    }
165
166    fn clear(&self) {
167        self.statistics.clear()
168    }
169
170    fn name(&self) -> String {
171        "DefaultListFilesCache".to_string()
172    }
173}
174
175/// Handles the inner state of the [`DefaultFilesMetadataCache`] struct.
176struct DefaultFilesMetadataCacheState {
177    lru_queue: LruQueue<Path, (ObjectMeta, Arc<dyn FileMetadata>)>,
178    memory_limit: usize,
179    memory_used: usize,
180    cache_hits: HashMap<Path, usize>,
181}
182
183impl DefaultFilesMetadataCacheState {
184    fn new(memory_limit: usize) -> Self {
185        Self {
186            lru_queue: LruQueue::new(),
187            memory_limit,
188            memory_used: 0,
189            cache_hits: HashMap::new(),
190        }
191    }
192
193    /// Returns the respective entry from the cache, if it exists and the `size` and `last_modified`
194    /// properties from [`ObjectMeta`] match.
195    /// If the entry exists, it becomes the most recently used.
196    fn get(&mut self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
197        self.lru_queue
198            .get(&k.location)
199            .map(|(object_meta, metadata)| {
200                if object_meta.size != k.size
201                    || object_meta.last_modified != k.last_modified
202                {
203                    None
204                } else {
205                    *self.cache_hits.entry(k.location.clone()).or_insert(0) += 1;
206                    Some(Arc::clone(metadata))
207                }
208            })
209            .unwrap_or(None)
210    }
211
212    /// Checks if the metadata is currently cached (entry exists and the `size` and `last_modified`
213    /// properties of [`ObjectMeta`] match).
214    /// The LRU queue is not updated.
215    fn contains_key(&self, k: &ObjectMeta) -> bool {
216        self.lru_queue
217            .peek(&k.location)
218            .map(|(object_meta, _)| {
219                object_meta.size == k.size && object_meta.last_modified == k.last_modified
220            })
221            .unwrap_or(false)
222    }
223
224    /// Adds a new key-value pair to cache, meaning LRU entries might be evicted if required.
225    /// If the key is already in the cache, the previous metadata is returned.
226    /// If the size of the metadata is greater than the `memory_limit`, the value is not inserted.
227    fn put(
228        &mut self,
229        key: ObjectMeta,
230        value: Arc<dyn FileMetadata>,
231    ) -> Option<Arc<dyn FileMetadata>> {
232        let value_size = value.memory_size();
233
234        // no point in trying to add this value to the cache if it cannot fit entirely
235        if value_size > self.memory_limit {
236            return None;
237        }
238
239        self.cache_hits.insert(key.location.clone(), 0);
240        // if the key is already in the cache, the old value is removed
241        let old_value = self.lru_queue.put(key.location.clone(), (key, value));
242        self.memory_used += value_size;
243        if let Some((_, ref old_metadata)) = old_value {
244            self.memory_used -= old_metadata.memory_size();
245        }
246
247        self.evict_entries();
248
249        old_value.map(|v| v.1)
250    }
251
252    /// Evicts entries from the LRU cache until `memory_used` is lower than `memory_limit`.
253    fn evict_entries(&mut self) {
254        while self.memory_used > self.memory_limit {
255            if let Some(removed) = self.lru_queue.pop() {
256                let metadata: Arc<dyn FileMetadata> = removed.1 .1;
257                self.memory_used -= metadata.memory_size();
258            } else {
259                // cache is empty while memory_used > memory_limit, cannot happen
260                debug_assert!(
261                    false,
262                    "cache is empty while memory_used > memory_limit, cannot happen"
263                );
264                return;
265            }
266        }
267    }
268
269    /// Removes an entry from the cache and returns it, if it exists.
270    fn remove(&mut self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
271        if let Some((_, old_metadata)) = self.lru_queue.remove(&k.location) {
272            self.memory_used -= old_metadata.memory_size();
273            self.cache_hits.remove(&k.location);
274            Some(old_metadata)
275        } else {
276            None
277        }
278    }
279
280    /// Returns the number of entries currently cached.
281    fn len(&self) -> usize {
282        self.lru_queue.len()
283    }
284
285    /// Removes all entries from the cache.
286    fn clear(&mut self) {
287        self.lru_queue.clear();
288        self.memory_used = 0;
289        self.cache_hits.clear();
290    }
291}
292
293/// Default implementation of [`FileMetadataCache`]
294///
295/// Collected file embedded metadata cache.
296///
297/// The metadata for each file is invalidated when the file size or last
298/// modification time have been changed.
299///
300/// # Internal details
301///
302/// The `memory_limit` controls the maximum size of the cache, which uses a
303/// Least Recently Used eviction algorithm. When adding a new entry, if the total
304/// size of the cached entries exceeds `memory_limit`, the least recently used entries
305/// are evicted until the total size is lower than `memory_limit`.
306///
307/// # `Extra` Handling
308///
309/// Users should use the [`Self::get`] and [`Self::put`] methods. The
310/// [`Self::get_with_extra`] and [`Self::put_with_extra`] methods simply call
311/// `get` and `put`, respectively.
312pub struct DefaultFilesMetadataCache {
313    // the state is wrapped in a Mutex to ensure the operations are atomic
314    state: Mutex<DefaultFilesMetadataCacheState>,
315}
316
317impl DefaultFilesMetadataCache {
318    /// Create a new instance of [`DefaultFilesMetadataCache`].
319    ///
320    /// # Arguments
321    /// `memory_limit`:  the maximum size of the cache, in bytes
322    //
323    pub fn new(memory_limit: usize) -> Self {
324        Self {
325            state: Mutex::new(DefaultFilesMetadataCacheState::new(memory_limit)),
326        }
327    }
328
329    /// Returns the size of the cached memory, in bytes.
330    pub fn memory_used(&self) -> usize {
331        let state = self.state.lock().unwrap();
332        state.memory_used
333    }
334}
335
336impl FileMetadataCache for DefaultFilesMetadataCache {
337    fn cache_limit(&self) -> usize {
338        let state = self.state.lock().unwrap();
339        state.memory_limit
340    }
341
342    fn update_cache_limit(&self, limit: usize) {
343        let mut state = self.state.lock().unwrap();
344        state.memory_limit = limit;
345        state.evict_entries();
346    }
347
348    fn list_entries(&self) -> HashMap<Path, FileMetadataCacheEntry> {
349        let state = self.state.lock().unwrap();
350        let mut entries = HashMap::<Path, FileMetadataCacheEntry>::new();
351
352        for (path, (object_meta, metadata)) in state.lru_queue.list_entries() {
353            entries.insert(
354                path.clone(),
355                FileMetadataCacheEntry {
356                    object_meta: object_meta.clone(),
357                    size_bytes: metadata.memory_size(),
358                    hits: *state.cache_hits.get(path).expect("entry must exist"),
359                    extra: metadata.extra_info(),
360                },
361            );
362        }
363
364        entries
365    }
366}
367
368impl CacheAccessor<ObjectMeta, Arc<dyn FileMetadata>> for DefaultFilesMetadataCache {
369    type Extra = ObjectMeta;
370
371    fn get(&self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
372        let mut state = self.state.lock().unwrap();
373        state.get(k)
374    }
375
376    fn get_with_extra(
377        &self,
378        k: &ObjectMeta,
379        _e: &Self::Extra,
380    ) -> Option<Arc<dyn FileMetadata>> {
381        self.get(k)
382    }
383
384    fn put(
385        &self,
386        key: &ObjectMeta,
387        value: Arc<dyn FileMetadata>,
388    ) -> Option<Arc<dyn FileMetadata>> {
389        let mut state = self.state.lock().unwrap();
390        state.put(key.clone(), value)
391    }
392
393    fn put_with_extra(
394        &self,
395        key: &ObjectMeta,
396        value: Arc<dyn FileMetadata>,
397        _e: &Self::Extra,
398    ) -> Option<Arc<dyn FileMetadata>> {
399        self.put(key, value)
400    }
401
402    fn remove(&mut self, k: &ObjectMeta) -> Option<Arc<dyn FileMetadata>> {
403        let mut state = self.state.lock().unwrap();
404        state.remove(k)
405    }
406
407    fn contains_key(&self, k: &ObjectMeta) -> bool {
408        let state = self.state.lock().unwrap();
409        state.contains_key(k)
410    }
411
412    fn len(&self) -> usize {
413        let state = self.state.lock().unwrap();
414        state.len()
415    }
416
417    fn clear(&self) {
418        let mut state = self.state.lock().unwrap();
419        state.clear();
420    }
421
422    fn name(&self) -> String {
423        "DefaultFilesMetadataCache".to_string()
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use std::collections::HashMap;
430    use std::sync::Arc;
431
432    use crate::cache::cache_manager::{
433        FileMetadata, FileMetadataCache, FileMetadataCacheEntry,
434    };
435    use crate::cache::cache_unit::{
436        DefaultFileStatisticsCache, DefaultFilesMetadataCache, DefaultListFilesCache,
437    };
438    use crate::cache::CacheAccessor;
439    use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
440    use chrono::DateTime;
441    use datafusion_common::Statistics;
442    use object_store::path::Path;
443    use object_store::ObjectMeta;
444
445    #[test]
446    fn test_statistics_cache() {
447        let meta = ObjectMeta {
448            location: Path::from("test"),
449            last_modified: DateTime::parse_from_rfc3339("2022-09-27T22:36:00+02:00")
450                .unwrap()
451                .into(),
452            size: 1024,
453            e_tag: None,
454            version: None,
455        };
456        let cache = DefaultFileStatisticsCache::default();
457        assert!(cache.get_with_extra(&meta.location, &meta).is_none());
458
459        cache.put_with_extra(
460            &meta.location,
461            Statistics::new_unknown(&Schema::new(vec![Field::new(
462                "test_column",
463                DataType::Timestamp(TimeUnit::Second, None),
464                false,
465            )]))
466            .into(),
467            &meta,
468        );
469        assert!(cache.get_with_extra(&meta.location, &meta).is_some());
470
471        // file size changed
472        let mut meta2 = meta.clone();
473        meta2.size = 2048;
474        assert!(cache.get_with_extra(&meta2.location, &meta2).is_none());
475
476        // file last_modified changed
477        let mut meta2 = meta.clone();
478        meta2.last_modified = DateTime::parse_from_rfc3339("2022-09-27T22:40:00+02:00")
479            .unwrap()
480            .into();
481        assert!(cache.get_with_extra(&meta2.location, &meta2).is_none());
482
483        // different file
484        let mut meta2 = meta;
485        meta2.location = Path::from("test2");
486        assert!(cache.get_with_extra(&meta2.location, &meta2).is_none());
487    }
488
489    #[test]
490    fn test_list_file_cache() {
491        let meta = ObjectMeta {
492            location: Path::from("test"),
493            last_modified: DateTime::parse_from_rfc3339("2022-09-27T22:36:00+02:00")
494                .unwrap()
495                .into(),
496            size: 1024,
497            e_tag: None,
498            version: None,
499        };
500
501        let cache = DefaultListFilesCache::default();
502        assert!(cache.get(&meta.location).is_none());
503
504        cache.put(&meta.location, vec![meta.clone()].into());
505        assert_eq!(
506            cache.get(&meta.location).unwrap().first().unwrap().clone(),
507            meta.clone()
508        );
509    }
510
511    pub struct TestFileMetadata {
512        metadata: String,
513    }
514
515    impl FileMetadata for TestFileMetadata {
516        fn as_any(&self) -> &dyn std::any::Any {
517            self
518        }
519
520        fn memory_size(&self) -> usize {
521            self.metadata.len()
522        }
523
524        fn extra_info(&self) -> HashMap<String, String> {
525            HashMap::from([("extra_info".to_owned(), "abc".to_owned())])
526        }
527    }
528
529    #[test]
530    fn test_default_file_metadata_cache() {
531        let object_meta = ObjectMeta {
532            location: Path::from("test"),
533            last_modified: DateTime::parse_from_rfc3339("2025-07-29T12:12:12+00:00")
534                .unwrap()
535                .into(),
536            size: 1024,
537            e_tag: None,
538            version: None,
539        };
540
541        let metadata: Arc<dyn FileMetadata> = Arc::new(TestFileMetadata {
542            metadata: "retrieved_metadata".to_owned(),
543        });
544
545        let mut cache = DefaultFilesMetadataCache::new(1024 * 1024);
546        assert!(cache.get(&object_meta).is_none());
547
548        // put
549        cache.put(&object_meta, Arc::clone(&metadata));
550
551        // get and contains of a valid entry
552        assert!(cache.contains_key(&object_meta));
553        let value = cache.get(&object_meta);
554        assert!(value.is_some());
555        let test_file_metadata = Arc::downcast::<TestFileMetadata>(value.unwrap());
556        assert!(test_file_metadata.is_ok());
557        assert_eq!(test_file_metadata.unwrap().metadata, "retrieved_metadata");
558
559        // file size changed
560        let mut object_meta2 = object_meta.clone();
561        object_meta2.size = 2048;
562        assert!(cache.get(&object_meta2).is_none());
563        assert!(!cache.contains_key(&object_meta2));
564
565        // file last_modified changed
566        let mut object_meta2 = object_meta.clone();
567        object_meta2.last_modified =
568            DateTime::parse_from_rfc3339("2025-07-29T13:13:13+00:00")
569                .unwrap()
570                .into();
571        assert!(cache.get(&object_meta2).is_none());
572        assert!(!cache.contains_key(&object_meta2));
573
574        // different file
575        let mut object_meta2 = object_meta.clone();
576        object_meta2.location = Path::from("test2");
577        assert!(cache.get(&object_meta2).is_none());
578        assert!(!cache.contains_key(&object_meta2));
579
580        // remove
581        cache.remove(&object_meta);
582        assert!(cache.get(&object_meta).is_none());
583        assert!(!cache.contains_key(&object_meta));
584
585        // len and clear
586        cache.put(&object_meta, Arc::clone(&metadata));
587        cache.put(&object_meta2, metadata);
588        assert_eq!(cache.len(), 2);
589        cache.clear();
590        assert_eq!(cache.len(), 0);
591    }
592
593    fn generate_test_metadata_with_size(
594        path: &str,
595        size: usize,
596    ) -> (ObjectMeta, Arc<dyn FileMetadata>) {
597        let object_meta = ObjectMeta {
598            location: Path::from(path),
599            last_modified: chrono::Utc::now(),
600            size: size as u64,
601            e_tag: None,
602            version: None,
603        };
604        let metadata: Arc<dyn FileMetadata> = Arc::new(TestFileMetadata {
605            metadata: "a".repeat(size),
606        });
607
608        (object_meta, metadata)
609    }
610
611    #[test]
612    fn test_default_file_metadata_cache_with_limit() {
613        let mut cache = DefaultFilesMetadataCache::new(1000);
614        let (object_meta1, metadata1) = generate_test_metadata_with_size("1", 100);
615        let (object_meta2, metadata2) = generate_test_metadata_with_size("2", 500);
616        let (object_meta3, metadata3) = generate_test_metadata_with_size("3", 300);
617
618        cache.put(&object_meta1, metadata1);
619        cache.put(&object_meta2, metadata2);
620        cache.put(&object_meta3, metadata3);
621
622        // all entries will fit
623        assert_eq!(cache.len(), 3);
624        assert_eq!(cache.memory_used(), 900);
625        assert!(cache.contains_key(&object_meta1));
626        assert!(cache.contains_key(&object_meta2));
627        assert!(cache.contains_key(&object_meta3));
628
629        // add a new entry which will remove the least recently used ("1")
630        let (object_meta4, metadata4) = generate_test_metadata_with_size("4", 200);
631        cache.put(&object_meta4, metadata4);
632        assert_eq!(cache.len(), 3);
633        assert_eq!(cache.memory_used(), 1000);
634        assert!(!cache.contains_key(&object_meta1));
635        assert!(cache.contains_key(&object_meta4));
636
637        // get entry "2", which will move it to the top of the queue, and add a new one which will
638        // remove the new least recently used ("3")
639        cache.get(&object_meta2);
640        let (object_meta5, metadata5) = generate_test_metadata_with_size("5", 100);
641        cache.put(&object_meta5, metadata5);
642        assert_eq!(cache.len(), 3);
643        assert_eq!(cache.memory_used(), 800);
644        assert!(!cache.contains_key(&object_meta3));
645        assert!(cache.contains_key(&object_meta5));
646
647        // new entry which will not be able to fit in the 1000 bytes allocated
648        let (object_meta6, metadata6) = generate_test_metadata_with_size("6", 1200);
649        cache.put(&object_meta6, metadata6);
650        assert_eq!(cache.len(), 3);
651        assert_eq!(cache.memory_used(), 800);
652        assert!(!cache.contains_key(&object_meta6));
653
654        // new entry which is able to fit without removing any entry
655        let (object_meta7, metadata7) = generate_test_metadata_with_size("7", 200);
656        cache.put(&object_meta7, metadata7);
657        assert_eq!(cache.len(), 4);
658        assert_eq!(cache.memory_used(), 1000);
659        assert!(cache.contains_key(&object_meta7));
660
661        // new entry which will remove all other entries
662        let (object_meta8, metadata8) = generate_test_metadata_with_size("8", 999);
663        cache.put(&object_meta8, metadata8);
664        assert_eq!(cache.len(), 1);
665        assert_eq!(cache.memory_used(), 999);
666        assert!(cache.contains_key(&object_meta8));
667
668        // when updating an entry, the previous ones are not unnecessarily removed
669        let (object_meta9, metadata9) = generate_test_metadata_with_size("9", 300);
670        let (object_meta10, metadata10) = generate_test_metadata_with_size("10", 200);
671        let (object_meta11_v1, metadata11_v1) =
672            generate_test_metadata_with_size("11", 400);
673        cache.put(&object_meta9, metadata9);
674        cache.put(&object_meta10, metadata10);
675        cache.put(&object_meta11_v1, metadata11_v1);
676        assert_eq!(cache.memory_used(), 900);
677        assert_eq!(cache.len(), 3);
678        let (object_meta11_v2, metadata11_v2) =
679            generate_test_metadata_with_size("11", 500);
680        cache.put(&object_meta11_v2, metadata11_v2);
681        assert_eq!(cache.memory_used(), 1000);
682        assert_eq!(cache.len(), 3);
683        assert!(cache.contains_key(&object_meta9));
684        assert!(cache.contains_key(&object_meta10));
685        assert!(cache.contains_key(&object_meta11_v2));
686        assert!(!cache.contains_key(&object_meta11_v1));
687
688        // when updating an entry that now exceeds the limit, the LRU ("9") needs to be removed
689        let (object_meta11_v3, metadata11_v3) =
690            generate_test_metadata_with_size("11", 501);
691        cache.put(&object_meta11_v3, metadata11_v3);
692        assert_eq!(cache.memory_used(), 701);
693        assert_eq!(cache.len(), 2);
694        assert!(cache.contains_key(&object_meta10));
695        assert!(cache.contains_key(&object_meta11_v3));
696        assert!(!cache.contains_key(&object_meta11_v2));
697
698        // manually removing an entry that is not the LRU
699        cache.remove(&object_meta11_v3);
700        assert_eq!(cache.len(), 1);
701        assert_eq!(cache.memory_used(), 200);
702        assert!(cache.contains_key(&object_meta10));
703        assert!(!cache.contains_key(&object_meta11_v3));
704
705        // clear
706        cache.clear();
707        assert_eq!(cache.len(), 0);
708        assert_eq!(cache.memory_used(), 0);
709
710        // resizing the cache should clear the extra entries
711        let (object_meta12, metadata12) = generate_test_metadata_with_size("12", 300);
712        let (object_meta13, metadata13) = generate_test_metadata_with_size("13", 200);
713        let (object_meta14, metadata14) = generate_test_metadata_with_size("14", 500);
714        cache.put(&object_meta12, metadata12);
715        cache.put(&object_meta13, metadata13);
716        cache.put(&object_meta14, metadata14);
717        assert_eq!(cache.len(), 3);
718        assert_eq!(cache.memory_used(), 1000);
719        cache.update_cache_limit(600);
720        assert_eq!(cache.len(), 1);
721        assert_eq!(cache.memory_used(), 500);
722        assert!(!cache.contains_key(&object_meta12));
723        assert!(!cache.contains_key(&object_meta13));
724        assert!(cache.contains_key(&object_meta14));
725    }
726
727    #[test]
728    fn test_default_file_metadata_cache_entries_info() {
729        let mut cache = DefaultFilesMetadataCache::new(1000);
730        let (object_meta1, metadata1) = generate_test_metadata_with_size("1", 100);
731        let (object_meta2, metadata2) = generate_test_metadata_with_size("2", 200);
732        let (object_meta3, metadata3) = generate_test_metadata_with_size("3", 300);
733
734        // initial entries, all will have hits = 0
735        cache.put(&object_meta1, metadata1);
736        cache.put(&object_meta2, metadata2);
737        cache.put(&object_meta3, metadata3);
738        assert_eq!(
739            cache.list_entries(),
740            HashMap::from([
741                (
742                    Path::from("1"),
743                    FileMetadataCacheEntry {
744                        object_meta: object_meta1.clone(),
745                        size_bytes: 100,
746                        hits: 0,
747                        extra: HashMap::from([(
748                            "extra_info".to_owned(),
749                            "abc".to_owned()
750                        )]),
751                    }
752                ),
753                (
754                    Path::from("2"),
755                    FileMetadataCacheEntry {
756                        object_meta: object_meta2.clone(),
757                        size_bytes: 200,
758                        hits: 0,
759                        extra: HashMap::from([(
760                            "extra_info".to_owned(),
761                            "abc".to_owned()
762                        )]),
763                    }
764                ),
765                (
766                    Path::from("3"),
767                    FileMetadataCacheEntry {
768                        object_meta: object_meta3.clone(),
769                        size_bytes: 300,
770                        hits: 0,
771                        extra: HashMap::from([(
772                            "extra_info".to_owned(),
773                            "abc".to_owned()
774                        )]),
775                    }
776                )
777            ])
778        );
779
780        // new hit on "1"
781        cache.get(&object_meta1);
782        assert_eq!(
783            cache.list_entries(),
784            HashMap::from([
785                (
786                    Path::from("1"),
787                    FileMetadataCacheEntry {
788                        object_meta: object_meta1.clone(),
789                        size_bytes: 100,
790                        hits: 1,
791                        extra: HashMap::from([(
792                            "extra_info".to_owned(),
793                            "abc".to_owned()
794                        )]),
795                    }
796                ),
797                (
798                    Path::from("2"),
799                    FileMetadataCacheEntry {
800                        object_meta: object_meta2.clone(),
801                        size_bytes: 200,
802                        hits: 0,
803                        extra: HashMap::from([(
804                            "extra_info".to_owned(),
805                            "abc".to_owned()
806                        )]),
807                    }
808                ),
809                (
810                    Path::from("3"),
811                    FileMetadataCacheEntry {
812                        object_meta: object_meta3.clone(),
813                        size_bytes: 300,
814                        hits: 0,
815                        extra: HashMap::from([(
816                            "extra_info".to_owned(),
817                            "abc".to_owned()
818                        )]),
819                    }
820                )
821            ])
822        );
823
824        // new entry, will evict "2"
825        let (object_meta4, metadata4) = generate_test_metadata_with_size("4", 600);
826        cache.put(&object_meta4, metadata4);
827        assert_eq!(
828            cache.list_entries(),
829            HashMap::from([
830                (
831                    Path::from("1"),
832                    FileMetadataCacheEntry {
833                        object_meta: object_meta1.clone(),
834                        size_bytes: 100,
835                        hits: 1,
836                        extra: HashMap::from([(
837                            "extra_info".to_owned(),
838                            "abc".to_owned()
839                        )]),
840                    }
841                ),
842                (
843                    Path::from("3"),
844                    FileMetadataCacheEntry {
845                        object_meta: object_meta3.clone(),
846                        size_bytes: 300,
847                        hits: 0,
848                        extra: HashMap::from([(
849                            "extra_info".to_owned(),
850                            "abc".to_owned()
851                        )]),
852                    }
853                ),
854                (
855                    Path::from("4"),
856                    FileMetadataCacheEntry {
857                        object_meta: object_meta4.clone(),
858                        size_bytes: 600,
859                        hits: 0,
860                        extra: HashMap::from([(
861                            "extra_info".to_owned(),
862                            "abc".to_owned()
863                        )]),
864                    }
865                )
866            ])
867        );
868
869        // replace entry "1"
870        let (object_meta1_new, metadata1_new) = generate_test_metadata_with_size("1", 50);
871        cache.put(&object_meta1_new, metadata1_new);
872        assert_eq!(
873            cache.list_entries(),
874            HashMap::from([
875                (
876                    Path::from("1"),
877                    FileMetadataCacheEntry {
878                        object_meta: object_meta1_new.clone(),
879                        size_bytes: 50,
880                        hits: 0,
881                        extra: HashMap::from([(
882                            "extra_info".to_owned(),
883                            "abc".to_owned()
884                        )]),
885                    }
886                ),
887                (
888                    Path::from("3"),
889                    FileMetadataCacheEntry {
890                        object_meta: object_meta3.clone(),
891                        size_bytes: 300,
892                        hits: 0,
893                        extra: HashMap::from([(
894                            "extra_info".to_owned(),
895                            "abc".to_owned()
896                        )]),
897                    }
898                ),
899                (
900                    Path::from("4"),
901                    FileMetadataCacheEntry {
902                        object_meta: object_meta4.clone(),
903                        size_bytes: 600,
904                        hits: 0,
905                        extra: HashMap::from([(
906                            "extra_info".to_owned(),
907                            "abc".to_owned()
908                        )]),
909                    }
910                )
911            ])
912        );
913
914        // remove entry "4"
915        cache.remove(&object_meta4);
916        assert_eq!(
917            cache.list_entries(),
918            HashMap::from([
919                (
920                    Path::from("1"),
921                    FileMetadataCacheEntry {
922                        object_meta: object_meta1_new.clone(),
923                        size_bytes: 50,
924                        hits: 0,
925                        extra: HashMap::from([(
926                            "extra_info".to_owned(),
927                            "abc".to_owned()
928                        )]),
929                    }
930                ),
931                (
932                    Path::from("3"),
933                    FileMetadataCacheEntry {
934                        object_meta: object_meta3.clone(),
935                        size_bytes: 300,
936                        hits: 0,
937                        extra: HashMap::from([(
938                            "extra_info".to_owned(),
939                            "abc".to_owned()
940                        )]),
941                    }
942                )
943            ])
944        );
945
946        // clear
947        cache.clear();
948        assert_eq!(cache.list_entries(), HashMap::from([]));
949    }
950}