lance_core/
cache.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Cache implementation
5
6use std::any::{Any, TypeId};
7use std::sync::Arc;
8
9use futures::Future;
10use moka::sync::Cache;
11use object_store::path::Path;
12
13use crate::utils::path::LancePathExt;
14use crate::Result;
15
16pub use deepsize::{Context, DeepSizeOf};
17
18type ArcAny = Arc<dyn Any + Send + Sync>;
19
20#[derive(Clone)]
21struct SizedRecord {
22    record: ArcAny,
23    size_accessor: Arc<dyn Fn(&ArcAny) -> usize + Send + Sync>,
24}
25
26impl std::fmt::Debug for SizedRecord {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        f.debug_struct("SizedRecord")
29            .field("record", &self.record)
30            .finish()
31    }
32}
33
34impl SizedRecord {
35    fn new<T: DeepSizeOf + Send + Sync + 'static>(record: Arc<T>) -> Self {
36        let size_accessor =
37            |record: &ArcAny| -> usize { record.downcast_ref::<T>().unwrap().deep_size_of() };
38        Self {
39            record,
40            size_accessor: Arc::new(size_accessor),
41        }
42    }
43}
44
45/// Cache for various metadata about files.
46///
47/// The cache is keyed by the file path and the type of metadata.
48#[derive(Clone, Debug)]
49pub struct FileMetadataCache {
50    cache: Option<Arc<Cache<(Path, TypeId), SizedRecord>>>,
51    base_path: Option<Path>,
52}
53
54impl DeepSizeOf for FileMetadataCache {
55    fn deep_size_of_children(&self, _: &mut Context) -> usize {
56        self.cache
57            .as_ref()
58            .map(|cache| {
59                cache
60                    .iter()
61                    .map(|(_, v)| (v.size_accessor)(&v.record))
62                    .sum()
63            })
64            .unwrap_or(0)
65    }
66}
67
68pub enum CapacityMode {
69    Items,
70    Bytes,
71}
72
73impl FileMetadataCache {
74    /// Instantiates a new cache which, for legacy reasons, uses Items capacity mode.
75    pub fn new(capacity: usize) -> Self {
76        Self {
77            cache: Some(Arc::new(Cache::new(capacity as u64))),
78            base_path: None,
79        }
80    }
81
82    /// Instantiates a dummy cache that will never cache anything.
83    pub fn no_cache() -> Self {
84        Self {
85            cache: None,
86            base_path: None,
87        }
88    }
89
90    /// Instantiates a new cache with a given capacity and capacity mode.
91    pub fn with_capacity(capacity: usize, mode: CapacityMode) -> Self {
92        match mode {
93            CapacityMode::Items => Self::new(capacity),
94            CapacityMode::Bytes => Self {
95                cache: Some(Arc::new(
96                    Cache::builder()
97                        .weigher(|_, v: &SizedRecord| {
98                            (v.size_accessor)(&v.record).try_into().unwrap_or(u32::MAX)
99                        })
100                        .build(),
101                )),
102                base_path: None,
103            },
104        }
105    }
106
107    /// Creates a new cache which shares the same underlying cache but prepends `base_path` to all
108    /// keys.
109    pub fn with_base_path(&self, base_path: Path) -> Self {
110        Self {
111            cache: self.cache.clone(),
112            base_path: Some(base_path),
113        }
114    }
115
116    pub fn size(&self) -> usize {
117        if let Some(cache) = self.cache.as_ref() {
118            cache.run_pending_tasks();
119            cache.entry_count() as usize
120        } else {
121            0
122        }
123    }
124
125    pub fn approx_size(&self) -> usize {
126        if let Some(cache) = self.cache.as_ref() {
127            cache.entry_count() as usize
128        } else {
129            0
130        }
131    }
132    /// Fetch an item from the cache, using a str as the key
133    pub fn get_by_str<T: Send + Sync + 'static>(&self, path: &str) -> Option<Arc<T>> {
134        self.get(&Path::parse(path).unwrap())
135    }
136
137    /// Fetch an item from the cache
138    pub fn get<T: Send + Sync + 'static>(&self, path: &Path) -> Option<Arc<T>> {
139        let cache = self.cache.as_ref()?;
140        let temp: Path;
141        let path = if let Some(base_path) = &self.base_path {
142            temp = base_path.child_path(path);
143            &temp
144        } else {
145            path
146        };
147        cache
148            .get(&(path.to_owned(), TypeId::of::<T>()))
149            .map(|metadata| metadata.record.clone().downcast::<T>().unwrap())
150    }
151
152    /// Insert an item into the cache
153    pub fn insert<T: DeepSizeOf + Send + Sync + 'static>(&self, path: Path, metadata: Arc<T>) {
154        let Some(cache) = self.cache.as_ref() else {
155            return;
156        };
157        let path = if let Some(base_path) = &self.base_path {
158            base_path.child_path(&path)
159        } else {
160            path
161        };
162        cache.insert((path, TypeId::of::<T>()), SizedRecord::new(metadata));
163    }
164
165    /// Insert an item into the cache, using a str as the key
166    pub fn insert_by_str<T: DeepSizeOf + Send + Sync + 'static>(
167        &self,
168        key: &str,
169        metadata: Arc<T>,
170    ) {
171        self.insert(Path::parse(key).unwrap(), metadata);
172    }
173
174    /// Get an item
175    ///
176    /// If it exists in the cache return that
177    ///
178    /// If it doesn't then run `loader` to load the item, insert into cache, and return
179    pub async fn get_or_insert<T: DeepSizeOf + Send + Sync + 'static, F, Fut>(
180        &self,
181        path: &Path,
182        loader: F,
183    ) -> Result<Arc<T>>
184    where
185        F: Fn(&Path) -> Fut,
186        Fut: Future<Output = Result<T>>,
187    {
188        if let Some(metadata) = self.get::<T>(path) {
189            return Ok(metadata);
190        }
191
192        let metadata = Arc::new(loader(path).await?);
193        self.insert(path.to_owned(), metadata.clone());
194        Ok(metadata)
195    }
196}