lance_core/cache/backend.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Backend interface for cache implementors.
5//!
6//! This module defines the trait that custom cache backends must implement,
7//! along with the key and entry types they operate on. Most callers should
8//! use [`LanceCache`](super::LanceCache) instead of interacting with
9//! backends directly.
10
11use std::any::Any;
12use std::pin::Pin;
13use std::sync::Arc;
14
15use async_trait::async_trait;
16use futures::Future;
17
18use crate::Result;
19
20use super::CacheCodec;
21
22/// A type-erased cache entry.
23pub type CacheEntry = Arc<dyn Any + Send + Sync>;
24
25/// Structured cache key passed to [`CacheBackend`] methods.
26///
27/// CacheBackend impls receive these ready-made from [`LanceCache`](super::LanceCache)
28/// — you do not construct them yourself. Composed of three parts:
29/// - **prefix**: scopes the key to a dataset or index (e.g. `"s3://bucket/dataset/"`)
30/// - **key**: identifies the specific entry (e.g. `"42"` for a version number)
31/// - **type_name**: distinguishes different value types stored under the same
32/// user key (e.g. `"Vec<IndexMetadata>"`)
33#[derive(Clone, Debug, Hash, PartialEq, Eq)]
34pub struct InternalCacheKey {
35 prefix: Arc<str>,
36 key: Arc<str>,
37 type_name: &'static str,
38}
39
40impl InternalCacheKey {
41 pub fn new(prefix: Arc<str>, key: Arc<str>, type_name: &'static str) -> Self {
42 Self {
43 prefix,
44 key,
45 type_name,
46 }
47 }
48
49 pub fn prefix(&self) -> &str {
50 &self.prefix
51 }
52
53 pub fn key(&self) -> &str {
54 &self.key
55 }
56
57 pub fn type_name(&self) -> &'static str {
58 self.type_name
59 }
60
61 /// Returns true if this key's prefix starts with the given string.
62 pub fn starts_with(&self, prefix: &str) -> bool {
63 self.prefix.starts_with(prefix)
64 }
65}
66
67/// Low-level pluggable cache backend.
68///
69/// Implementations store entries keyed by [`InternalCacheKey`] and return
70/// type-erased [`CacheEntry`] values.
71/// [`LanceCache`](super::LanceCache) handles key construction and type safety;
72/// backend authors only need to implement storage and eviction.
73#[async_trait]
74pub trait CacheBackend: Send + Sync + std::fmt::Debug {
75 /// Look up an entry by its key.
76 ///
77 /// `codec` is provided so that persistent backends can deserialize the
78 /// entry from storage. In-memory backends can ignore it. When `codec`
79 /// is `None`, the entry type does not support serialization yet and
80 /// must be stored in-memory.
81 ///
82 /// The goal is for all cache entry types to eventually have codecs,
83 /// at which point the `Option` will be removed.
84 async fn get(&self, key: &InternalCacheKey, codec: Option<CacheCodec>) -> Option<CacheEntry>;
85
86 /// Store an entry. `size_bytes` is used for eviction accounting.
87 ///
88 /// See [`get`](Self::get) for codec semantics.
89 async fn insert(
90 &self,
91 key: &InternalCacheKey,
92 entry: CacheEntry,
93 size_bytes: usize,
94 codec: Option<CacheCodec>,
95 );
96
97 /// Get an existing entry or compute it from `loader`.
98 ///
99 /// Implementations should deduplicate concurrent loads for the same key
100 /// so the loader runs at most once.
101 ///
102 /// Returns `(entry, was_cached)` where `was_cached` is `true` if the entry
103 /// was already present in the cache (the loader was not invoked).
104 ///
105 /// See [`get`](Self::get) for codec semantics.
106 async fn get_or_insert<'a>(
107 &self,
108 key: &InternalCacheKey,
109 loader: Pin<Box<dyn Future<Output = Result<(CacheEntry, usize)>> + Send + 'a>>,
110 codec: Option<CacheCodec>,
111 ) -> Result<(CacheEntry, bool)>;
112
113 /// Remove all entries whose prefix starts with the given string.
114 async fn invalidate_prefix(&self, prefix: &str);
115
116 /// Remove all entries.
117 async fn clear(&self);
118
119 /// Number of entries currently stored (may flush pending operations).
120 async fn num_entries(&self) -> usize;
121
122 /// Total weighted size in bytes of all stored entries (may flush pending operations).
123 async fn size_bytes(&self) -> usize;
124
125 /// Approximate number of entries, callable from synchronous contexts.
126 /// Backends that cannot provide this cheaply should return 0.
127 fn approx_num_entries(&self) -> usize {
128 0
129 }
130
131 /// Approximate weighted size in bytes, callable from synchronous contexts.
132 /// Used by `DeepSizeOf` to report cache memory usage.
133 /// Backends that cannot provide this cheaply should return 0.
134 ///
135 /// Assumes entries do not share underlying buffers; if they do, the
136 /// returned total may overcount.
137 fn approx_size_bytes(&self) -> usize {
138 0
139 }
140}