lance_table/format/
index.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Metadata for index
5
6use std::sync::Arc;
7
8use chrono::{DateTime, Utc};
9use deepsize::DeepSizeOf;
10use roaring::RoaringBitmap;
11use snafu::location;
12use uuid::Uuid;
13
14use super::pb;
15use lance_core::{Error, Result};
16
17/// Index metadata
18#[derive(Debug, Clone, PartialEq)]
19pub struct IndexMetadata {
20    /// Unique ID across all dataset versions.
21    pub uuid: Uuid,
22
23    /// Fields to build the index.
24    pub fields: Vec<i32>,
25
26    /// Human readable index name
27    pub name: String,
28
29    /// The version of the dataset this index was last updated on
30    ///
31    /// This is set when the index is created (based on the version used to train the index)
32    /// This is updated when the index is updated or remapped
33    pub dataset_version: u64,
34
35    /// The fragment ids this index covers.
36    ///
37    /// This may contain fragment ids that no longer exist in the dataset.
38    ///
39    /// If this is None, then this is unknown.
40    pub fragment_bitmap: Option<RoaringBitmap>,
41
42    /// Metadata specific to the index type
43    ///
44    /// This is an Option because older versions of Lance may not have this defined.  However, it should always
45    /// be present in newer versions.
46    pub index_details: Option<Arc<prost_types::Any>>,
47
48    /// The index version.
49    pub index_version: i32,
50
51    /// Timestamp when the index was created
52    ///
53    /// This field is optional for backward compatibility. For existing indices created before
54    /// this field was added, this will be None.
55    pub created_at: Option<DateTime<Utc>>,
56
57    /// The base path index of the index files. Used when the index is imported or referred from another dataset.
58    /// Lance uses it as key of the base_paths field in Manifest to determine the actual base path of the index files.
59    pub base_id: Option<u32>,
60}
61
62impl IndexMetadata {
63    pub fn effective_fragment_bitmap(
64        &self,
65        existing_fragments: &RoaringBitmap,
66    ) -> Option<RoaringBitmap> {
67        let fragment_bitmap = self.fragment_bitmap.as_ref()?;
68        Some(fragment_bitmap & existing_fragments)
69    }
70}
71
72impl DeepSizeOf for IndexMetadata {
73    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
74        self.uuid.as_bytes().deep_size_of_children(context)
75            + self.fields.deep_size_of_children(context)
76            + self.name.deep_size_of_children(context)
77            + self.dataset_version.deep_size_of_children(context)
78            + self
79                .fragment_bitmap
80                .as_ref()
81                .map(|fragment_bitmap| fragment_bitmap.serialized_size())
82                .unwrap_or(0)
83    }
84}
85
86impl TryFrom<pb::IndexMetadata> for IndexMetadata {
87    type Error = Error;
88
89    fn try_from(proto: pb::IndexMetadata) -> Result<Self> {
90        let fragment_bitmap = if proto.fragment_bitmap.is_empty() {
91            None
92        } else {
93            Some(RoaringBitmap::deserialize_from(
94                &mut proto.fragment_bitmap.as_slice(),
95            )?)
96        };
97
98        Ok(Self {
99            uuid: proto.uuid.as_ref().map(Uuid::try_from).ok_or_else(|| {
100                Error::io(
101                    "uuid field does not exist in Index metadata".to_string(),
102                    location!(),
103                )
104            })??,
105            name: proto.name,
106            fields: proto.fields,
107            dataset_version: proto.dataset_version,
108            fragment_bitmap,
109            index_details: proto.index_details.map(Arc::new),
110            index_version: proto.index_version.unwrap_or_default(),
111            created_at: proto.created_at.map(|ts| {
112                DateTime::from_timestamp_millis(ts as i64)
113                    .expect("Invalid timestamp in index metadata")
114            }),
115            base_id: proto.base_id,
116        })
117    }
118}
119
120impl From<&IndexMetadata> for pb::IndexMetadata {
121    fn from(idx: &IndexMetadata) -> Self {
122        let mut fragment_bitmap = Vec::new();
123        if let Some(bitmap) = &idx.fragment_bitmap {
124            if let Err(e) = bitmap.serialize_into(&mut fragment_bitmap) {
125                // In theory, this should never error. But if we do, just
126                // recover gracefully.
127                log::error!("Failed to serialize fragment bitmap: {}", e);
128                fragment_bitmap.clear();
129            }
130        }
131
132        Self {
133            uuid: Some((&idx.uuid).into()),
134            name: idx.name.clone(),
135            fields: idx.fields.clone(),
136            dataset_version: idx.dataset_version,
137            fragment_bitmap,
138            index_details: idx
139                .index_details
140                .as_ref()
141                .map(|details| details.as_ref().clone()),
142            index_version: Some(idx.index_version),
143            created_at: idx.created_at.map(|dt| dt.timestamp_millis() as u64),
144            base_id: idx.base_id,
145        }
146    }
147}