1use std::{collections::HashMap, sync::Arc};
8
9use arrow_array::{ArrayRef, RecordBatch, UInt32Array};
10use arrow_schema::Field;
11use async_trait::async_trait;
12use datafusion::execution::SendableRecordBatchStream;
13use ivf::storage::IvfModel;
14use lance_core::{Result, ROW_ID_FIELD};
15use lance_io::object_store::ObjectStore;
16use lance_io::traits::Reader;
17use lance_linalg::distance::DistanceType;
18use lazy_static::lazy_static;
19use object_store::path::Path;
20use quantizer::{QuantizationType, Quantizer};
21use v3::subindex::SubIndexType;
22
23pub mod bq;
24pub mod flat;
25pub mod graph;
26pub mod hnsw;
27pub mod ivf;
28pub mod kmeans;
29pub mod pq;
30pub mod quantizer;
31pub mod residual;
32pub mod sq;
33pub mod storage;
34pub mod transform;
35pub mod utils;
36pub mod v3;
37
38use super::pb;
39use crate::{prefilter::PreFilter, Index};
40pub use residual::RESIDUAL_COLUMN;
41
42pub const DIST_COL: &str = "_distance";
44pub const DISTANCE_TYPE_KEY: &str = "distance_type";
45pub const INDEX_UUID_COLUMN: &str = "__index_uuid";
46pub const PART_ID_COLUMN: &str = "__ivf_part_id";
47pub const PQ_CODE_COLUMN: &str = "__pq_code";
48pub const SQ_CODE_COLUMN: &str = "__sq_code";
49
50lazy_static! {
51 pub static ref VECTOR_RESULT_SCHEMA: arrow_schema::SchemaRef =
52 arrow_schema::SchemaRef::new(arrow_schema::Schema::new(vec![
53 Field::new(DIST_COL, arrow_schema::DataType::Float32, false),
54 ROW_ID_FIELD.clone(),
55 ]));
56}
57
58#[derive(Debug, Clone)]
60pub struct Query {
61 pub column: String,
63
64 pub key: ArrayRef,
66
67 pub k: usize,
69
70 pub lower_bound: Option<f32>,
72
73 pub upper_bound: Option<f32>,
75
76 pub nprobes: usize,
78
79 pub ef: Option<usize>,
82
83 pub refine_factor: Option<u32>,
86
87 pub metric_type: DistanceType,
89
90 pub use_index: bool,
92}
93
94impl From<pb::VectorMetricType> for DistanceType {
95 fn from(proto: pb::VectorMetricType) -> Self {
96 match proto {
97 pb::VectorMetricType::L2 => Self::L2,
98 pb::VectorMetricType::Cosine => Self::Cosine,
99 pb::VectorMetricType::Dot => Self::Dot,
100 pb::VectorMetricType::Hamming => Self::Hamming,
101 }
102 }
103}
104
105impl From<DistanceType> for pb::VectorMetricType {
106 fn from(mt: DistanceType) -> Self {
107 match mt {
108 DistanceType::L2 => Self::L2,
109 DistanceType::Cosine => Self::Cosine,
110 DistanceType::Dot => Self::Dot,
111 DistanceType::Hamming => Self::Hamming,
112 }
113 }
114}
115
116#[async_trait]
119#[allow(clippy::redundant_pub_crate)]
120pub trait VectorIndex: Send + Sync + std::fmt::Debug + Index {
121 async fn search(&self, query: &Query, pre_filter: Arc<dyn PreFilter>) -> Result<RecordBatch>;
140
141 fn find_partitions(&self, query: &Query) -> Result<UInt32Array>;
142
143 async fn search_in_partition(
144 &self,
145 partition_id: usize,
146 query: &Query,
147 pre_filter: Arc<dyn PreFilter>,
148 ) -> Result<RecordBatch>;
149
150 fn is_loadable(&self) -> bool;
153
154 fn use_residual(&self) -> bool;
156
157 fn check_can_remap(&self) -> Result<()>;
160
161 async fn load(
166 &self,
167 reader: Arc<dyn Reader>,
168 offset: usize,
169 length: usize,
170 ) -> Result<Box<dyn VectorIndex>>;
171
172 async fn load_partition(
174 &self,
175 reader: Arc<dyn Reader>,
176 offset: usize,
177 length: usize,
178 _partition_id: usize,
179 ) -> Result<Box<dyn VectorIndex>> {
180 self.load(reader, offset, length).await
181 }
182
183 async fn partition_reader(
185 &self,
186 _partition_id: usize,
187 _with_vector: bool,
188 ) -> Result<SendableRecordBatchStream> {
189 unimplemented!("only for IVF")
190 }
191
192 async fn to_batch_stream(&self, with_vector: bool) -> Result<SendableRecordBatchStream>;
194
195 fn row_ids(&self) -> Box<dyn Iterator<Item = &'_ u64> + '_>;
197
198 async fn remap(&mut self, mapping: &HashMap<u64, Option<u64>>) -> Result<()>;
207
208 async fn remap_to(
213 self: Arc<Self>,
214 _store: ObjectStore,
215 _mapping: &HashMap<u64, Option<u64>>,
216 _column: String,
217 _index_dir: Path,
218 ) -> Result<()> {
219 unimplemented!("only for v3 index")
220 }
221
222 fn metric_type(&self) -> DistanceType;
224
225 fn ivf_model(&self) -> IvfModel;
226 fn quantizer(&self) -> Quantizer;
227
228 fn sub_index_type(&self) -> (SubIndexType, QuantizationType);
230}