Skip to main content

lance_core/cache/
backend.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Backend interface for cache implementors.
5//!
6//! This module defines the trait that custom cache backends must implement,
7//! along with the key and entry types they operate on. Most callers should
8//! use [`LanceCache`](super::LanceCache) instead of interacting with
9//! backends directly.
10
11use std::any::Any;
12use std::pin::Pin;
13use std::sync::Arc;
14
15use async_trait::async_trait;
16use futures::Future;
17
18use crate::Result;
19
20use super::CacheCodec;
21
22/// A type-erased cache entry.
23pub type CacheEntry = Arc<dyn Any + Send + Sync>;
24
25/// Structured cache key passed to [`CacheBackend`] methods.
26///
27/// CacheBackend impls receive these ready-made from [`LanceCache`](super::LanceCache)
28/// — you do not construct them yourself. Composed of three parts:
29/// - **prefix**: scopes the key to a dataset or index (e.g. `"s3://bucket/dataset/"`)
30/// - **key**: identifies the specific entry (e.g. `"42"` for a version number)
31/// - **type_name**: distinguishes different value types stored under the same
32///   user key (e.g. `"Vec<IndexMetadata>"`)
33#[derive(Clone, Debug, Hash, PartialEq, Eq)]
34pub struct InternalCacheKey {
35    prefix: Arc<str>,
36    key: Arc<str>,
37    type_name: &'static str,
38}
39
40impl InternalCacheKey {
41    pub fn new(prefix: Arc<str>, key: Arc<str>, type_name: &'static str) -> Self {
42        Self {
43            prefix,
44            key,
45            type_name,
46        }
47    }
48
49    pub fn prefix(&self) -> &str {
50        &self.prefix
51    }
52
53    pub fn key(&self) -> &str {
54        &self.key
55    }
56
57    pub fn type_name(&self) -> &'static str {
58        self.type_name
59    }
60
61    /// Returns true if this key's prefix starts with the given string.
62    pub fn starts_with(&self, prefix: &str) -> bool {
63        self.prefix.starts_with(prefix)
64    }
65}
66
67/// Low-level pluggable cache backend.
68///
69/// Implementations store entries keyed by [`InternalCacheKey`] and return
70/// type-erased [`CacheEntry`] values.
71/// [`LanceCache`](super::LanceCache) handles key construction and type safety;
72/// backend authors only need to implement storage and eviction.
73#[async_trait]
74pub trait CacheBackend: Send + Sync + std::fmt::Debug {
75    /// Look up an entry by its key.
76    ///
77    /// `codec` is provided so that persistent backends can deserialize the
78    /// entry from storage. In-memory backends can ignore it. When `codec`
79    /// is `None`, the entry type does not support serialization yet and
80    /// must be stored in-memory.
81    ///
82    /// The goal is for all cache entry types to eventually have codecs,
83    /// at which point the `Option` will be removed.
84    async fn get(&self, key: &InternalCacheKey, codec: Option<CacheCodec>) -> Option<CacheEntry>;
85
86    /// Store an entry. `size_bytes` is used for eviction accounting.
87    ///
88    /// See [`get`](Self::get) for codec semantics.
89    async fn insert(
90        &self,
91        key: &InternalCacheKey,
92        entry: CacheEntry,
93        size_bytes: usize,
94        codec: Option<CacheCodec>,
95    );
96
97    /// Get an existing entry or compute it from `loader`.
98    ///
99    /// Implementations should deduplicate concurrent loads for the same key
100    /// so the loader runs at most once.
101    ///
102    /// Returns `(entry, was_cached)` where `was_cached` is `true` if the entry
103    /// was already present in the cache (the loader was not invoked).
104    ///
105    /// See [`get`](Self::get) for codec semantics.
106    async fn get_or_insert<'a>(
107        &self,
108        key: &InternalCacheKey,
109        loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
110        codec: Option<CacheCodec>,
111    ) -> Result<(CacheEntry, bool)>;
112
113    /// Remove all entries whose prefix starts with the given string.
114    async fn invalidate_prefix(&self, prefix: &str);
115
116    /// Remove all entries.
117    async fn clear(&self);
118
119    /// Number of entries currently stored (may flush pending operations).
120    async fn num_entries(&self) -> usize;
121
122    /// Total weighted size in bytes of all stored entries (may flush pending operations).
123    async fn size_bytes(&self) -> usize;
124
125    /// Approximate number of entries, callable from synchronous contexts.
126    /// Backends that cannot provide this cheaply should return 0.
127    fn approx_num_entries(&self) -> usize {
128        0
129    }
130
131    /// Approximate weighted size in bytes, callable from synchronous contexts.
132    /// Used by `DeepSizeOf` to report cache memory usage.
133    /// Backends that cannot provide this cheaply should return 0.
134    ///
135    /// Assumes entries do not share underlying buffers; if they do, the
136    /// returned total may overcount.
137    fn approx_size_bytes(&self) -> usize {
138        0
139    }
140}