Skip to main content

lance_core/cache/
backend.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Backend interface for cache implementors.
5//!
6//! This module defines the trait that custom cache backends must implement,
7//! along with the key and entry types they operate on. Most callers should
8//! use [`LanceCache`](super::LanceCache) instead of interacting with
9//! backends directly.
10
11use std::any::Any;
12use std::pin::Pin;
13use std::sync::Arc;
14
15use async_trait::async_trait;
16use futures::Future;
17
18use crate::Result;
19
20use super::CacheCodec;
21
22/// A type-erased cache entry.
23pub type CacheEntry = Arc<dyn Any + Send + Sync>;
24
25/// Iterator over cache keys currently known to a backend.
26pub type CacheKeyIterator<'a> = Box<dyn Iterator<Item = InternalCacheKey> + Send + 'a>;
27
28/// Structured cache key passed to [`CacheBackend`] methods.
29///
30/// CacheBackend impls receive these ready-made from [`LanceCache`](super::LanceCache)
31/// — you do not construct them yourself. Composed of three parts:
32/// - **prefix**: scopes the key to a dataset or index (e.g. `"s3://bucket/dataset/"`)
33/// - **key**: identifies the specific entry (e.g. `"42"` for a version number)
34/// - **type_name**: distinguishes different value types stored under the same
35///   user key (e.g. `"Vec<IndexMetadata>"`)
36#[derive(Clone, Debug, Hash, PartialEq, Eq)]
37pub struct InternalCacheKey {
38    prefix: Arc<str>,
39    key: Arc<str>,
40    type_name: &'static str,
41}
42
43impl InternalCacheKey {
44    pub fn new(prefix: Arc<str>, key: Arc<str>, type_name: &'static str) -> Self {
45        Self {
46            prefix,
47            key,
48            type_name,
49        }
50    }
51
52    pub fn prefix(&self) -> &str {
53        &self.prefix
54    }
55
56    pub fn key(&self) -> &str {
57        &self.key
58    }
59
60    pub fn type_name(&self) -> &'static str {
61        self.type_name
62    }
63
64    /// Returns true if this key's prefix starts with the given string.
65    pub fn starts_with(&self, prefix: &str) -> bool {
66        self.prefix.starts_with(prefix)
67    }
68}
69
70/// Low-level pluggable cache backend.
71///
72/// Implementations store entries keyed by [`InternalCacheKey`] and return
73/// type-erased [`CacheEntry`] values.
74/// [`LanceCache`](super::LanceCache) handles key construction and type safety;
75/// backend authors only need to implement storage and eviction.
76#[async_trait]
77pub trait CacheBackend: Send + Sync + std::fmt::Debug {
78    /// Look up an entry by its key.
79    ///
80    /// `codec` is provided so that persistent backends can deserialize the
81    /// entry from storage. In-memory backends can ignore it. When `codec`
82    /// is `None`, the entry type does not support serialization yet and
83    /// must be stored in-memory.
84    ///
85    /// The goal is for all cache entry types to eventually have codecs,
86    /// at which point the `Option` will be removed.
87    async fn get(&self, key: &InternalCacheKey, codec: Option<CacheCodec>) -> Option<CacheEntry>;
88
89    /// Store an entry. `size_bytes` is used for eviction accounting.
90    ///
91    /// See [`get`](Self::get) for codec semantics.
92    async fn insert(
93        &self,
94        key: &InternalCacheKey,
95        entry: CacheEntry,
96        size_bytes: usize,
97        codec: Option<CacheCodec>,
98    );
99
100    /// Get an existing entry or compute it from `loader`.
101    ///
102    /// Implementations should deduplicate concurrent loads for the same key
103    /// so the loader runs at most once.
104    ///
105    /// Returns `(entry, was_cached)` where `was_cached` is `true` if the entry
106    /// was already present in the cache (the loader was not invoked).
107    ///
108    /// See [`get`](Self::get) for codec semantics.
109    async fn get_or_insert<'a>(
110        &self,
111        key: &InternalCacheKey,
112        loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
113        codec: Option<CacheCodec>,
114    ) -> Result<(CacheEntry, bool)>;
115
116    /// Remove all entries whose prefix starts with the given string.
117    async fn invalidate_prefix(&self, prefix: &str);
118
119    /// Remove all entries.
120    async fn clear(&self);
121
122    /// Return an iterator over cache keys currently known to this backend.
123    ///
124    /// Backends that cannot enumerate keys cheaply or accurately should return
125    /// `None`. An empty iterator means key inventory is supported and the
126    /// cache currently has no entries.
127    async fn keys(&self) -> Option<CacheKeyIterator<'_>> {
128        None
129    }
130
131    /// Number of entries currently stored (may flush pending operations).
132    async fn num_entries(&self) -> usize;
133
134    /// Total weighted size in bytes of all stored entries (may flush pending operations).
135    async fn size_bytes(&self) -> usize;
136
137    /// Approximate number of entries, callable from synchronous contexts.
138    /// Backends that cannot provide this cheaply should return 0.
139    fn approx_num_entries(&self) -> usize {
140        0
141    }
142
143    /// Approximate weighted size in bytes, callable from synchronous contexts.
144    /// Used by `DeepSizeOf` to report cache memory usage.
145    /// Backends that cannot provide this cheaply should return 0.
146    ///
147    /// Assumes entries do not share underlying buffers; if they do, the
148    /// returned total may overcount.
149    fn approx_size_bytes(&self) -> usize {
150        0
151    }
152}