Skip to main content

lance/
session.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::collections::HashMap;
5use std::sync::Arc;
6
7use deepsize::DeepSizeOf;
8use lance_core::cache::LanceCache;
9use lance_core::{Error, Result};
10use lance_index::IndexType;
11use lance_io::object_store::ObjectStoreRegistry;
12
13use crate::dataset::{DEFAULT_INDEX_CACHE_SIZE, DEFAULT_METADATA_CACHE_SIZE};
14use crate::session::caches::GlobalMetadataCache;
15use crate::session::index_caches::GlobalIndexCache;
16
17use self::index_extension::IndexExtension;
18
19pub(crate) mod caches;
20pub(crate) mod index_caches;
21pub(crate) mod index_extension;
22
23/// A user session holds the runtime state for a [`crate::Dataset`]
24///
25/// A session will be created automatically when a Dataset is opened.  However, you
26/// can manually create the session and provide it to the Dataset builder in order
27/// to share runtime state between multiple datasets.
28///
29/// This can be used to share caches between multiple datasets, increasing the hit
30/// rate and reducing the amount of memory used.
31///
32/// A session contains two different caches:
33///  - The index cache is used to cache opened indices and will cache index data
34///  - The metadata cache is used to cache a variety of dataset metadata (more
35///    details can be found in the [performance guide](https://lance.org/guide/performance/)
36#[derive(Clone)]
37pub struct Session {
38    /// Global cache for opened indices.
39    ///
40    /// Sub-caches are created from this cache for each dataset by adding the
41    /// URI and index UUID as a key prefix. If there is a fragment re-use index,
42    /// that is also in the key prefix. This prevents collisions between different
43    /// datasets and indices.
44    pub(crate) index_cache: GlobalIndexCache,
45
46    /// Global cache for file metadata.
47    ///
48    /// Sub-caches are created from this cache for each dataset by adding the
49    /// URI as a key prefix. See the [`LanceDataset::metadata_cache`] field.
50    /// This prevents collisions between different datasets.
51    pub(crate) metadata_cache: caches::GlobalMetadataCache,
52
53    pub(crate) index_extensions: HashMap<(IndexType, String), Arc<dyn IndexExtension>>,
54
55    store_registry: Arc<ObjectStoreRegistry>,
56}
57
58impl DeepSizeOf for Session {
59    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
60        let mut size = 0;
61        // Measure the actual cache contents through the wrapper types
62        size += self.index_cache.deep_size_of_children(context);
63        size += self.metadata_cache.deep_size_of_children(context);
64        for ext in self.index_extensions.values() {
65            size += ext.deep_size_of_children(context);
66        }
67        size
68    }
69}
70
71impl std::fmt::Debug for Session {
72    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73        f.debug_struct("Session")
74            .field(
75                "index_cache",
76                &format!("IndexCache(items={})", self.index_cache.0.approx_size(),),
77            )
78            .field(
79                "file_metadata_cache",
80                &format!(
81                    "LanceCache(items={}, size_bytes={})",
82                    self.metadata_cache.0.approx_size(),
83                    self.metadata_cache.0.approx_size_bytes(),
84                ),
85            )
86            .field(
87                "index_extensions",
88                &self.index_extensions.keys().collect::<Vec<_>>(),
89            )
90            .finish()
91    }
92}
93
94impl Session {
95    /// Create a new session.
96    ///
97    /// Parameters:
98    ///
99    /// - ***index_cache_size***: the size of the index cache.
100    /// - ***metadata_cache_size***: the size of the metadata cache.
101    /// - ***store_registry***: the object store registry to use when opening
102    ///   datasets. This determines which schemes are available, and also allows
103    ///   re-using object stores.
104    pub fn new(
105        index_cache_size: usize,
106        metadata_cache_size: usize,
107        store_registry: Arc<ObjectStoreRegistry>,
108    ) -> Self {
109        Self {
110            index_cache: GlobalIndexCache(LanceCache::with_capacity(index_cache_size)),
111            metadata_cache: GlobalMetadataCache(LanceCache::with_capacity(metadata_cache_size)),
112            index_extensions: HashMap::new(),
113            store_registry,
114        }
115    }
116
117    /// Register a new index extension.
118    ///
119    /// A name can only be registered once per type of index extension.
120    ///
121    /// Parameters:
122    ///
123    /// - ***name***: the name of the extension.
124    /// - ***extension***: the extension to register.
125    pub fn register_index_extension(
126        &mut self,
127        name: String,
128        extension: Arc<dyn IndexExtension>,
129    ) -> Result<()> {
130        match extension.index_type() {
131            IndexType::Vector => {
132                if self
133                    .index_extensions
134                    .contains_key(&(IndexType::Vector, name.clone()))
135                {
136                    return Err(Error::invalid_input(format!(
137                        "{name} is already registered"
138                    )));
139                }
140
141                if let Some(ext) = extension.to_vector() {
142                    self.index_extensions
143                        .insert((IndexType::Vector, name), ext.to_generic());
144                } else {
145                    return Err(Error::invalid_input(format!(
146                        "{name} is not a vector index extension"
147                    )));
148                }
149            }
150            _ => {
151                return Err(Error::invalid_input(format!(
152                    "scalar index extension is not support yet: {}",
153                    extension.index_type()
154                )));
155            }
156        }
157
158        Ok(())
159    }
160
161    /// Return the current size of the session in bytes
162    ///
163    /// Keep in mind that this is not trivial to compute, as we will need to walk the caches
164    pub fn size_bytes(&self) -> u64 {
165        // We re-expose deep_size_of here so that users don't
166        // need the deepsize crate themselves (e.g. to use deep_size_of)
167        self.deep_size_of() as u64
168    }
169
170    /// Get the approximate number of items in the session.
171    ///
172    /// This is a rough estimate of the number of items in the session.  It is not
173    /// exact and is not guaranteed to be accurate.
174    pub fn approx_num_items(&self) -> usize {
175        self.index_cache.0.approx_size()
176            + self.metadata_cache.0.approx_size()
177            + self.index_extensions.len()
178    }
179
180    /// Get the object store registry.
181    pub fn store_registry(&self) -> Arc<ObjectStoreRegistry> {
182        self.store_registry.clone()
183    }
184
185    /// Fetch statistics for the metadata cache
186    pub async fn metadata_cache_stats(&self) -> lance_core::cache::CacheStats {
187        self.metadata_cache.0.stats().await
188    }
189
190    /// Fetch statistics for the index cache
191    pub async fn index_cache_stats(&self) -> lance_core::cache::CacheStats {
192        self.index_cache.0.stats().await
193    }
194}
195
196impl Default for Session {
197    fn default() -> Self {
198        Self::new(
199            DEFAULT_INDEX_CACHE_SIZE,
200            DEFAULT_METADATA_CACHE_SIZE,
201            Arc::new(ObjectStoreRegistry::default()),
202        )
203    }
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209    use lance_index::vector::VectorIndex;
210
211    #[tokio::test]
212    async fn test_disable_index_cache() {
213        let no_cache = Session::new(0, 0, Default::default());
214        assert!(
215            no_cache
216                .index_cache
217                .get_unsized::<dyn VectorIndex>("abc")
218                .await
219                .is_none()
220        );
221    }
222}