lance_core/cache/backend.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Backend interface for cache implementors.
5//!
6//! This module defines the trait that custom cache backends must implement,
7//! along with the key and entry types they operate on. Most callers should
8//! use [`LanceCache`](super::LanceCache) instead of interacting with
9//! backends directly.
10
11use std::any::Any;
12use std::pin::Pin;
13use std::sync::Arc;
14
15use async_trait::async_trait;
16use futures::Future;
17
18use crate::Result;
19
20use super::CacheCodec;
21
22/// A type-erased cache entry.
23pub type CacheEntry = Arc<dyn Any + Send + Sync>;
24
25/// Iterator over cache keys currently known to a backend.
26pub type CacheKeyIterator<'a> = Box<dyn Iterator<Item = InternalCacheKey> + Send + 'a>;
27
28/// Structured cache key passed to [`CacheBackend`] methods.
29///
30/// CacheBackend impls receive these ready-made from [`LanceCache`](super::LanceCache)
31/// — you do not construct them yourself. Composed of three parts:
32/// - **prefix**: scopes the key to a dataset or index (e.g. `"s3://bucket/dataset/"`)
33/// - **key**: identifies the specific entry (e.g. `"42"` for a version number)
34/// - **type_name**: distinguishes different value types stored under the same
35/// user key (e.g. `"Vec<IndexMetadata>"`)
36#[derive(Clone, Debug, Hash, PartialEq, Eq)]
37pub struct InternalCacheKey {
38 prefix: Arc<str>,
39 key: Arc<str>,
40 type_name: &'static str,
41}
42
43impl InternalCacheKey {
44 pub fn new(prefix: Arc<str>, key: Arc<str>, type_name: &'static str) -> Self {
45 Self {
46 prefix,
47 key,
48 type_name,
49 }
50 }
51
52 pub fn prefix(&self) -> &str {
53 &self.prefix
54 }
55
56 pub fn key(&self) -> &str {
57 &self.key
58 }
59
60 pub fn type_name(&self) -> &'static str {
61 self.type_name
62 }
63
64 /// Returns true if this key's prefix starts with the given string.
65 pub fn starts_with(&self, prefix: &str) -> bool {
66 self.prefix.starts_with(prefix)
67 }
68}
69
70/// Low-level pluggable cache backend.
71///
72/// Implementations store entries keyed by [`InternalCacheKey`] and return
73/// type-erased [`CacheEntry`] values.
74/// [`LanceCache`](super::LanceCache) handles key construction and type safety;
75/// backend authors only need to implement storage and eviction.
76#[async_trait]
77pub trait CacheBackend: Send + Sync + std::fmt::Debug {
78 /// Look up an entry by its key.
79 ///
80 /// `codec` is provided so that persistent backends can deserialize the
81 /// entry from storage. In-memory backends can ignore it. When `codec`
82 /// is `None`, the entry type does not support serialization yet and
83 /// must be stored in-memory.
84 ///
85 /// The goal is for all cache entry types to eventually have codecs,
86 /// at which point the `Option` will be removed.
87 async fn get(&self, key: &InternalCacheKey, codec: Option<CacheCodec>) -> Option<CacheEntry>;
88
89 /// Store an entry. `size_bytes` is used for eviction accounting.
90 ///
91 /// See [`get`](Self::get) for codec semantics.
92 async fn insert(
93 &self,
94 key: &InternalCacheKey,
95 entry: CacheEntry,
96 size_bytes: usize,
97 codec: Option<CacheCodec>,
98 );
99
100 /// Get an existing entry or compute it from `loader`.
101 ///
102 /// Implementations should deduplicate concurrent loads for the same key
103 /// so the loader runs at most once.
104 ///
105 /// Returns `(entry, was_cached)` where `was_cached` is `true` if the entry
106 /// was already present in the cache (the loader was not invoked).
107 ///
108 /// See [`get`](Self::get) for codec semantics.
109 async fn get_or_insert<'a>(
110 &self,
111 key: &InternalCacheKey,
112 loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
113 codec: Option<CacheCodec>,
114 ) -> Result<(CacheEntry, bool)>;
115
116 /// Remove all entries whose prefix starts with the given string.
117 async fn invalidate_prefix(&self, prefix: &str);
118
119 /// Remove all entries.
120 async fn clear(&self);
121
122 /// Return an iterator over cache keys currently known to this backend.
123 ///
124 /// Backends that cannot enumerate keys cheaply or accurately should return
125 /// `None`. An empty iterator means key inventory is supported and the
126 /// cache currently has no entries.
127 async fn keys(&self) -> Option<CacheKeyIterator<'_>> {
128 None
129 }
130
131 /// Number of entries currently stored (may flush pending operations).
132 async fn num_entries(&self) -> usize;
133
134 /// Total weighted size in bytes of all stored entries (may flush pending operations).
135 async fn size_bytes(&self) -> usize;
136
137 /// Approximate number of entries, callable from synchronous contexts.
138 /// Backends that cannot provide this cheaply should return 0.
139 fn approx_num_entries(&self) -> usize {
140 0
141 }
142
143 /// Approximate weighted size in bytes, callable from synchronous contexts.
144 /// Used by `DeepSizeOf` to report cache memory usage.
145 /// Backends that cannot provide this cheaply should return 0.
146 ///
147 /// Assumes entries do not share underlying buffers; if they do, the
148 /// returned total may overcount.
149 fn approx_size_bytes(&self) -> usize {
150 0
151 }
152}