Skip to main content

lance_table/format/
index.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Metadata for index
5
6use std::sync::Arc;
7
8use chrono::{DateTime, Utc};
9use deepsize::DeepSizeOf;
10use roaring::RoaringBitmap;
11use uuid::Uuid;
12
13use super::pb;
14use lance_core::{Error, Result};
15
16/// Index metadata
17#[derive(Debug, Clone, PartialEq)]
18pub struct IndexMetadata {
19    /// Unique ID across all dataset versions.
20    pub uuid: Uuid,
21
22    /// Fields to build the index.
23    pub fields: Vec<i32>,
24
25    /// Human readable index name
26    pub name: String,
27
28    /// The version of the dataset this index was last updated on
29    ///
30    /// This is set when the index is created (based on the version used to train the index)
31    /// This is updated when the index is updated or remapped
32    pub dataset_version: u64,
33
34    /// The fragment ids this index covers.
35    ///
36    /// This may contain fragment ids that no longer exist in the dataset.
37    ///
38    /// If this is None, then this is unknown.
39    pub fragment_bitmap: Option<RoaringBitmap>,
40
41    /// Metadata specific to the index type
42    ///
43    /// This is an Option because older versions of Lance may not have this defined.  However, it should always
44    /// be present in newer versions.
45    pub index_details: Option<Arc<prost_types::Any>>,
46
47    /// The index version.
48    pub index_version: i32,
49
50    /// Timestamp when the index was created
51    ///
52    /// This field is optional for backward compatibility. For existing indices created before
53    /// this field was added, this will be None.
54    pub created_at: Option<DateTime<Utc>>,
55
56    /// The base path index of the index files. Used when the index is imported or referred from another dataset.
57    /// Lance uses it as key of the base_paths field in Manifest to determine the actual base path of the index files.
58    pub base_id: Option<u32>,
59}
60
61impl IndexMetadata {
62    pub fn effective_fragment_bitmap(
63        &self,
64        existing_fragments: &RoaringBitmap,
65    ) -> Option<RoaringBitmap> {
66        let fragment_bitmap = self.fragment_bitmap.as_ref()?;
67        Some(fragment_bitmap & existing_fragments)
68    }
69}
70
71impl DeepSizeOf for IndexMetadata {
72    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
73        self.uuid.as_bytes().deep_size_of_children(context)
74            + self.fields.deep_size_of_children(context)
75            + self.name.deep_size_of_children(context)
76            + self.dataset_version.deep_size_of_children(context)
77            + self
78                .fragment_bitmap
79                .as_ref()
80                .map(|fragment_bitmap| fragment_bitmap.serialized_size())
81                .unwrap_or(0)
82    }
83}
84
85impl TryFrom<pb::IndexMetadata> for IndexMetadata {
86    type Error = Error;
87
88    fn try_from(proto: pb::IndexMetadata) -> Result<Self> {
89        let fragment_bitmap = if proto.fragment_bitmap.is_empty() {
90            None
91        } else {
92            Some(RoaringBitmap::deserialize_from(
93                &mut proto.fragment_bitmap.as_slice(),
94            )?)
95        };
96
97        Ok(Self {
98            uuid: proto.uuid.as_ref().map(Uuid::try_from).ok_or_else(|| {
99                Error::invalid_input("uuid field does not exist in Index metadata".to_string())
100            })??,
101            name: proto.name,
102            fields: proto.fields,
103            dataset_version: proto.dataset_version,
104            fragment_bitmap,
105            index_details: proto.index_details.map(Arc::new),
106            index_version: proto.index_version.unwrap_or_default(),
107            created_at: proto.created_at.map(|ts| {
108                DateTime::from_timestamp_millis(ts as i64)
109                    .expect("Invalid timestamp in index metadata")
110            }),
111            base_id: proto.base_id,
112        })
113    }
114}
115
116impl From<&IndexMetadata> for pb::IndexMetadata {
117    fn from(idx: &IndexMetadata) -> Self {
118        let mut fragment_bitmap = Vec::new();
119        if let Some(bitmap) = &idx.fragment_bitmap
120            && let Err(e) = bitmap.serialize_into(&mut fragment_bitmap)
121        {
122            // In theory, this should never error. But if we do, just
123            // recover gracefully.
124            log::error!("Failed to serialize fragment bitmap: {}", e);
125            fragment_bitmap.clear();
126        }
127
128        Self {
129            uuid: Some((&idx.uuid).into()),
130            name: idx.name.clone(),
131            fields: idx.fields.clone(),
132            dataset_version: idx.dataset_version,
133            fragment_bitmap,
134            index_details: idx
135                .index_details
136                .as_ref()
137                .map(|details| details.as_ref().clone()),
138            index_version: Some(idx.index_version),
139            created_at: idx.created_at.map(|dt| dt.timestamp_millis() as u64),
140            base_id: idx.base_id,
141        }
142    }
143}