lance_table/format/
index.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Metadata for index
5
6use std::sync::Arc;
7
8use chrono::{DateTime, Utc};
9use deepsize::DeepSizeOf;
10use roaring::RoaringBitmap;
11use snafu::location;
12use uuid::Uuid;
13
14use super::pb;
15use lance_core::{Error, Result};
16
17/// Index metadata
18#[derive(Debug, Clone, PartialEq)]
19pub struct IndexMetadata {
20    /// Unique ID across all dataset versions.
21    pub uuid: Uuid,
22
23    /// Fields to build the index.
24    pub fields: Vec<i32>,
25
26    /// Human readable index name
27    pub name: String,
28
29    /// The latest version of the dataset this index covers
30    pub dataset_version: u64,
31
32    /// The fragment ids this index covers.
33    ///
34    /// This may contain fragment ids that no longer exist in the dataset.
35    ///
36    /// If this is None, then this is unknown.
37    pub fragment_bitmap: Option<RoaringBitmap>,
38
39    /// Metadata specific to the index type
40    ///
41    /// This is an Option because older versions of Lance may not have this defined.  However, it should always
42    /// be present in newer versions.
43    pub index_details: Option<Arc<prost_types::Any>>,
44
45    /// The index version.
46    pub index_version: i32,
47
48    /// Timestamp when the index was created
49    ///
50    /// This field is optional for backward compatibility. For existing indices created before
51    /// this field was added, this will be None.
52    pub created_at: Option<DateTime<Utc>>,
53
54    /// The base path index of the index files. Used when the index is imported or referred from another dataset.
55    /// Lance uses it as key of the base_paths field in Manifest to determine the actual base path of the index files.
56    pub base_id: Option<u32>,
57}
58
59impl IndexMetadata {
60    pub fn effective_fragment_bitmap(
61        &self,
62        existing_fragments: &RoaringBitmap,
63    ) -> Option<RoaringBitmap> {
64        let fragment_bitmap = self.fragment_bitmap.as_ref()?;
65        Some(fragment_bitmap & existing_fragments)
66    }
67}
68
69impl DeepSizeOf for IndexMetadata {
70    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
71        self.uuid.as_bytes().deep_size_of_children(context)
72            + self.fields.deep_size_of_children(context)
73            + self.name.deep_size_of_children(context)
74            + self.dataset_version.deep_size_of_children(context)
75            + self
76                .fragment_bitmap
77                .as_ref()
78                .map(|fragment_bitmap| fragment_bitmap.serialized_size())
79                .unwrap_or(0)
80    }
81}
82
83impl TryFrom<pb::IndexMetadata> for IndexMetadata {
84    type Error = Error;
85
86    fn try_from(proto: pb::IndexMetadata) -> Result<Self> {
87        let fragment_bitmap = if proto.fragment_bitmap.is_empty() {
88            None
89        } else {
90            Some(RoaringBitmap::deserialize_from(
91                &mut proto.fragment_bitmap.as_slice(),
92            )?)
93        };
94
95        Ok(Self {
96            uuid: proto.uuid.as_ref().map(Uuid::try_from).ok_or_else(|| {
97                Error::io(
98                    "uuid field does not exist in Index metadata".to_string(),
99                    location!(),
100                )
101            })??,
102            name: proto.name,
103            fields: proto.fields,
104            dataset_version: proto.dataset_version,
105            fragment_bitmap,
106            index_details: proto.index_details.map(Arc::new),
107            index_version: proto.index_version.unwrap_or_default(),
108            created_at: proto.created_at.map(|ts| {
109                DateTime::from_timestamp_millis(ts as i64)
110                    .expect("Invalid timestamp in index metadata")
111            }),
112            base_id: proto.base_id,
113        })
114    }
115}
116
117impl From<&IndexMetadata> for pb::IndexMetadata {
118    fn from(idx: &IndexMetadata) -> Self {
119        let mut fragment_bitmap = Vec::new();
120        if let Some(bitmap) = &idx.fragment_bitmap {
121            if let Err(e) = bitmap.serialize_into(&mut fragment_bitmap) {
122                // In theory, this should never error. But if we do, just
123                // recover gracefully.
124                log::error!("Failed to serialize fragment bitmap: {}", e);
125                fragment_bitmap.clear();
126            }
127        }
128
129        Self {
130            uuid: Some((&idx.uuid).into()),
131            name: idx.name.clone(),
132            fields: idx.fields.clone(),
133            dataset_version: idx.dataset_version,
134            fragment_bitmap,
135            index_details: idx
136                .index_details
137                .as_ref()
138                .map(|details| details.as_ref().clone()),
139            index_version: Some(idx.index_version),
140            created_at: idx.created_at.map(|dt| dt.timestamp_millis() as u64),
141            base_id: idx.base_id,
142        }
143    }
144}