Skip to main content

lance_index/
traits.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use lance_core::Result;
5
6use lance_table::format::IndexMetadata;
7
8/// A set of criteria used to filter potential indices to use for a query
9#[derive(Debug, Default)]
10pub struct IndexCriteria<'a> {
11    /// Only consider indices for this column (this also means the index
12    /// maps to a single column)
13    pub for_column: Option<&'a str>,
14    /// Only consider indices with this name
15    pub has_name: Option<&'a str>,
16    /// If true, only consider indices that support FTS
17    pub must_support_fts: bool,
18    /// If true, only consider indices that support exact equality
19    pub must_support_exact_equality: bool,
20}
21
22impl<'a> IndexCriteria<'a> {
23    /// Only consider indices for this column (this also means the index
24    /// maps to a single column)
25    pub fn for_column(mut self, column: &'a str) -> Self {
26        self.for_column = Some(column);
27        self
28    }
29
30    /// Only consider indices with this name
31    pub fn with_name(mut self, name: &'a str) -> Self {
32        self.has_name = Some(name);
33        self
34    }
35
36    /// Only consider indices that support FTS
37    pub fn supports_fts(mut self) -> Self {
38        self.must_support_fts = true;
39        self
40    }
41
42    /// Only consider indices that support exact equality
43    ///
44    /// This will disqualify, for example, the ngram and inverted indices
45    /// or an index like a bloom filter
46    pub fn supports_exact_equality(mut self) -> Self {
47        self.must_support_exact_equality = true;
48        self
49    }
50}
51
52#[deprecated(since = "0.39.0", note = "Use IndexCriteria instead")]
53pub type ScalarIndexCriteria<'a> = IndexCriteria<'a>;
54
55/// Options for prewarming an inverted index.
56#[non_exhaustive]
57#[derive(Debug, Clone, Default, PartialEq, Eq)]
58pub struct FtsPrewarmOptions {
59    /// If true, prewarm positions along with posting lists.
60    pub with_position: bool,
61}
62
63impl FtsPrewarmOptions {
64    pub fn new() -> Self {
65        Self::default()
66    }
67
68    pub fn with_position(mut self, with_position: bool) -> Self {
69        self.with_position = with_position;
70        self
71    }
72}
73
74/// Options for prewarming an index.
75#[non_exhaustive]
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum PrewarmOptions {
78    Fts(FtsPrewarmOptions),
79}
80
81/// Additional information about an index
82///
83/// Note that a single index might consist of multiple segments.  Each segment has its own
84/// UUID and collection of files and covers some subset of the data fragments.
85///
86/// All segments in an index should have the same index type and index details.
87pub trait IndexDescription: Send + Sync {
88    /// Returns the index name
89    ///
90    /// This is the user-defined name of the index.  It is shared by all segments of the index
91    /// and is what is used to refer to the index in the API.  It is guaranteed to be unique
92    /// within the dataset.
93    fn name(&self) -> &str;
94
95    /// Returns the index metadata
96    ///
97    /// This is the raw metadata information stored in the manifest.  There is one
98    /// IndexMetadata for each segment of the index.
99    fn metadata(&self) -> &[IndexMetadata];
100
101    /// Returns the physical index segments that make up this logical index.
102    ///
103    /// This is an alias for [`Self::metadata`] with a less ambiguous name.
104    fn segments(&self) -> &[IndexMetadata] {
105        self.metadata()
106    }
107
108    /// Returns the index type URL
109    ///
110    /// This is extracted from the type url of the index details
111    fn type_url(&self) -> &str;
112
113    /// Returns the index type
114    ///
115    /// This is a short string identifier that is friendlier than the type URL but not
116    /// guaranteed to be unique.
117    ///
118    /// This is calculated by the plugin and will be "Unknown" if no plugin could be found
119    /// for the type URL.
120    fn index_type(&self) -> &str;
121
122    /// Returns the number of rows indexed by the index, across all segments.
123    ///
124    /// This is an approximate count and may include rows that have been
125    /// deleted.
126    fn rows_indexed(&self) -> u64;
127
128    /// Returns the ids of the fields that the index is built on.
129    fn field_ids(&self) -> &[u32];
130
131    /// Returns a JSON string representation of the index details
132    ///
133    /// The format of these details will vary depending on the index type and
134    /// since indexes can be provided by plugins we cannot fully define it here.
135    ///
136    /// However, plugins should do their best to maintain backwards compatibility
137    /// and consider this method part of the public API.
138    ///
139    /// See individual index plugins for more description of the expected format.
140    ///
141    /// The conversion from Any to JSON is controlled by the index
142    /// plugin.  As a result, this method may fail if there is no plugin
143    /// available for the index.
144    fn details(&self) -> Result<String>;
145
146    /// Returns the total size in bytes of all files across all segments.
147    ///
148    /// Returns `None` if file size information is not available for any segment
149    /// (for backward compatibility with indices created before file tracking was added).
150    fn total_size_bytes(&self) -> Option<u64>;
151}