lance_table/format/
index.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Metadata for index
5
6use chrono::{DateTime, Utc};
7use deepsize::DeepSizeOf;
8use roaring::RoaringBitmap;
9use snafu::location;
10use uuid::Uuid;
11
12use super::pb;
13use lance_core::{Error, Result};
14/// Index metadata
15#[derive(Debug, Clone, PartialEq)]
16pub struct Index {
17    /// Unique ID across all dataset versions.
18    pub uuid: Uuid,
19
20    /// Fields to build the index.
21    pub fields: Vec<i32>,
22
23    /// Human readable index name
24    pub name: String,
25
26    /// The latest version of the dataset this index covers
27    pub dataset_version: u64,
28
29    /// The fragment ids this index covers.
30    ///
31    /// This may contain fragment ids that no longer exist in the dataset.
32    ///
33    /// If this is None, then this is unknown.
34    pub fragment_bitmap: Option<RoaringBitmap>,
35
36    /// Metadata specific to the index type
37    ///
38    /// This is an Option because older versions of Lance may not have this defined.  However, it should always
39    /// be present in newer versions.
40    pub index_details: Option<prost_types::Any>,
41
42    /// The index version.
43    pub index_version: i32,
44
45    /// Timestamp when the index was created
46    ///
47    /// This field is optional for backward compatibility. For existing indices created before
48    /// this field was added, this will be None.
49    pub created_at: Option<DateTime<Utc>>,
50}
51
52impl Index {
53    pub fn effective_fragment_bitmap(
54        &self,
55        existing_fragments: &RoaringBitmap,
56    ) -> Option<RoaringBitmap> {
57        let fragment_bitmap = self.fragment_bitmap.as_ref()?;
58        Some(fragment_bitmap & existing_fragments)
59    }
60}
61
62impl DeepSizeOf for Index {
63    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
64        self.uuid.as_bytes().deep_size_of_children(context)
65            + self.fields.deep_size_of_children(context)
66            + self.name.deep_size_of_children(context)
67            + self.dataset_version.deep_size_of_children(context)
68            + self
69                .fragment_bitmap
70                .as_ref()
71                .map(|fragment_bitmap| fragment_bitmap.serialized_size())
72                .unwrap_or(0)
73    }
74}
75
76impl TryFrom<pb::IndexMetadata> for Index {
77    type Error = Error;
78
79    fn try_from(proto: pb::IndexMetadata) -> Result<Self> {
80        let fragment_bitmap = if proto.fragment_bitmap.is_empty() {
81            None
82        } else {
83            Some(RoaringBitmap::deserialize_from(
84                &mut proto.fragment_bitmap.as_slice(),
85            )?)
86        };
87
88        Ok(Self {
89            uuid: proto.uuid.as_ref().map(Uuid::try_from).ok_or_else(|| {
90                Error::io(
91                    "uuid field does not exist in Index metadata".to_string(),
92                    location!(),
93                )
94            })??,
95            name: proto.name,
96            fields: proto.fields,
97            dataset_version: proto.dataset_version,
98            fragment_bitmap,
99            index_details: proto.index_details,
100            index_version: proto.index_version.unwrap_or_default(),
101            created_at: proto.created_at.map(|ts| {
102                DateTime::from_timestamp_millis(ts as i64)
103                    .expect("Invalid timestamp in index metadata")
104            }),
105        })
106    }
107}
108
109impl From<&Index> for pb::IndexMetadata {
110    fn from(idx: &Index) -> Self {
111        let mut fragment_bitmap = Vec::new();
112        if let Some(bitmap) = &idx.fragment_bitmap {
113            if let Err(e) = bitmap.serialize_into(&mut fragment_bitmap) {
114                // In theory, this should never error. But if we do, just
115                // recover gracefully.
116                log::error!("Failed to serialize fragment bitmap: {}", e);
117                fragment_bitmap.clear();
118            }
119        }
120
121        Self {
122            uuid: Some((&idx.uuid).into()),
123            name: idx.name.clone(),
124            fields: idx.fields.clone(),
125            dataset_version: idx.dataset_version,
126            fragment_bitmap,
127            index_details: idx.index_details.clone(),
128            index_version: Some(idx.index_version),
129            created_at: idx.created_at.map(|dt| dt.timestamp_millis() as u64),
130        }
131    }
132}