lance_table/format/
index.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Metadata for index
5
6use chrono::{DateTime, Utc};
7use deepsize::DeepSizeOf;
8use roaring::RoaringBitmap;
9use snafu::location;
10use uuid::Uuid;
11
12use super::pb;
13use lance_core::{Error, Result};
14/// Index metadata
15#[derive(Debug, Clone, PartialEq)]
16pub struct Index {
17    /// Unique ID across all dataset versions.
18    pub uuid: Uuid,
19
20    /// Fields to build the index.
21    pub fields: Vec<i32>,
22
23    /// Human readable index name
24    pub name: String,
25
26    /// The latest version of the dataset this index covers
27    pub dataset_version: u64,
28
29    /// The fragment ids this index covers.
30    ///
31    /// If this is None, then this is unknown.
32    pub fragment_bitmap: Option<RoaringBitmap>,
33
34    /// Metadata specific to the index type
35    ///
36    /// This is an Option because older versions of Lance may not have this defined.  However, it should always
37    /// be present in newer versions.
38    pub index_details: Option<prost_types::Any>,
39
40    /// The index version.
41    pub index_version: i32,
42
43    /// Timestamp when the index was created
44    ///
45    /// This field is optional for backward compatibility. For existing indices created before
46    /// this field was added, this will be None.
47    pub created_at: Option<DateTime<Utc>>,
48}
49
50impl DeepSizeOf for Index {
51    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
52        self.uuid.as_bytes().deep_size_of_children(context)
53            + self.fields.deep_size_of_children(context)
54            + self.name.deep_size_of_children(context)
55            + self.dataset_version.deep_size_of_children(context)
56            + self
57                .fragment_bitmap
58                .as_ref()
59                .map(|fragment_bitmap| fragment_bitmap.serialized_size())
60                .unwrap_or(0)
61    }
62}
63
64impl TryFrom<pb::IndexMetadata> for Index {
65    type Error = Error;
66
67    fn try_from(proto: pb::IndexMetadata) -> Result<Self> {
68        let fragment_bitmap = if proto.fragment_bitmap.is_empty() {
69            None
70        } else {
71            Some(RoaringBitmap::deserialize_from(
72                &mut proto.fragment_bitmap.as_slice(),
73            )?)
74        };
75
76        Ok(Self {
77            uuid: proto.uuid.as_ref().map(Uuid::try_from).ok_or_else(|| {
78                Error::io(
79                    "uuid field does not exist in Index metadata".to_string(),
80                    location!(),
81                )
82            })??,
83            name: proto.name,
84            fields: proto.fields,
85            dataset_version: proto.dataset_version,
86            fragment_bitmap,
87            index_details: proto.index_details,
88            index_version: proto.index_version.unwrap_or_default(),
89            created_at: proto.created_at.map(|ts| {
90                DateTime::from_timestamp_millis(ts as i64)
91                    .expect("Invalid timestamp in index metadata")
92            }),
93        })
94    }
95}
96
97impl From<&Index> for pb::IndexMetadata {
98    fn from(idx: &Index) -> Self {
99        let mut fragment_bitmap = Vec::new();
100        if let Some(bitmap) = &idx.fragment_bitmap {
101            if let Err(e) = bitmap.serialize_into(&mut fragment_bitmap) {
102                // In theory, this should never error. But if we do, just
103                // recover gracefully.
104                log::error!("Failed to serialize fragment bitmap: {}", e);
105                fragment_bitmap.clear();
106            }
107        }
108
109        Self {
110            uuid: Some((&idx.uuid).into()),
111            name: idx.name.clone(),
112            fields: idx.fields.clone(),
113            dataset_version: idx.dataset_version,
114            fragment_bitmap,
115            index_details: idx.index_details.clone(),
116            index_version: Some(idx.index_version),
117            created_at: idx.created_at.map(|dt| dt.timestamp_millis() as u64),
118        }
119    }
120}