1pub(crate) mod ann_build;
2#[cfg(feature = "native")]
3mod builder;
4pub(crate) mod format;
5#[cfg(feature = "native")]
6mod merger;
7mod reader;
8mod store;
9#[cfg(feature = "native")]
10mod tracker;
11mod types;
12mod vector_data;
13
14#[cfg(feature = "native")]
15pub use builder::{MemoryBreakdown, SegmentBuilder, SegmentBuilderConfig, SegmentBuilderStats};
16#[cfg(feature = "native")]
17pub use merger::{MergeStats, SegmentMerger, delete_segment};
18pub use reader::{AsyncSegmentReader, SegmentReader, SparseIndex, VectorIndex, VectorSearchResult};
19pub use store::*;
20#[cfg(feature = "native")]
21pub use tracker::{SegmentSnapshot, SegmentTracker};
22pub use types::{FieldStats, SegmentFiles, SegmentId, SegmentMeta, TrainedVectorStructures};
23pub use vector_data::{
24 FlatVectorData, IVFRaBitQIndexData, LazyFlatVectorData, ScaNNIndexData, dequantize_raw,
25};
26
27#[cfg(test)]
28#[cfg(feature = "native")]
29mod tests {
30 use super::*;
31 use crate::directories::RamDirectory;
32 use crate::dsl::SchemaBuilder;
33 use std::sync::Arc;
34
35 #[tokio::test]
36 async fn test_async_segment_reader() {
37 let mut schema_builder = SchemaBuilder::default();
38 let title = schema_builder.add_text_field("title", true, true);
39 let schema = Arc::new(schema_builder.build());
40
41 let dir = RamDirectory::new();
42 let segment_id = SegmentId::new();
43
44 let config = SegmentBuilderConfig::default();
46 let mut builder = SegmentBuilder::new((*schema).clone(), config).unwrap();
47
48 let mut doc = crate::dsl::Document::new();
49 doc.add_text(title, "Hello World");
50 builder.add_document(doc).unwrap();
51
52 let mut doc = crate::dsl::Document::new();
53 doc.add_text(title, "Goodbye World");
54 builder.add_document(doc).unwrap();
55
56 builder.build(&dir, segment_id, None).await.unwrap();
57
58 let reader = AsyncSegmentReader::open(&dir, segment_id, schema.clone(), 0, 16)
60 .await
61 .unwrap();
62
63 assert_eq!(reader.num_docs(), 2);
64
65 let postings = reader.get_postings(title, b"hello").await.unwrap();
67 assert!(postings.is_some());
68 assert_eq!(postings.unwrap().doc_count(), 1);
69
70 let postings = reader.get_postings(title, b"world").await.unwrap();
71 assert!(postings.is_some());
72 assert_eq!(postings.unwrap().doc_count(), 2);
73
74 let doc = reader.doc(0).await.unwrap().unwrap();
76 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
77 }
78
79 #[tokio::test]
80 async fn test_dense_vector_ordinal_tracking() {
81 use crate::query::MultiValueCombiner;
82
83 let mut schema_builder = SchemaBuilder::default();
84 let embedding = schema_builder.add_dense_vector_field("embedding", 4, true, true);
86 let schema = Arc::new(schema_builder.build());
87
88 let dir = RamDirectory::new();
89 let segment_id = SegmentId::new();
90
91 let config = SegmentBuilderConfig::default();
92 let mut builder = SegmentBuilder::new((*schema).clone(), config).unwrap();
93
94 let mut doc = crate::dsl::Document::new();
96 doc.add_dense_vector(embedding, vec![1.0, 0.0, 0.0, 0.0]);
97 builder.add_document(doc).unwrap();
98
99 let mut doc = crate::dsl::Document::new();
101 doc.add_dense_vector(embedding, vec![0.0, 1.0, 0.0, 0.0]);
102 doc.add_dense_vector(embedding, vec![0.0, 0.0, 1.0, 0.0]);
103 builder.add_document(doc).unwrap();
104
105 let mut doc = crate::dsl::Document::new();
107 doc.add_dense_vector(embedding, vec![0.0, 0.0, 0.0, 1.0]);
108 builder.add_document(doc).unwrap();
109
110 builder.build(&dir, segment_id, None).await.unwrap();
111
112 let reader = AsyncSegmentReader::open(&dir, segment_id, schema.clone(), 0, 16)
113 .await
114 .unwrap();
115
116 let query = vec![0.0, 0.9, 0.1, 0.0];
118 let results = reader
119 .search_dense_vector(embedding, &query, 10, 0, 1, MultiValueCombiner::Max)
120 .await
121 .unwrap();
122
123 let doc1_result = results.iter().find(|r| r.doc_id == 1);
125 assert!(doc1_result.is_some(), "Doc 1 should be in results");
126
127 let doc1 = doc1_result.unwrap();
128 assert!(
130 doc1.ordinals.len() <= 2,
131 "Doc 1 should have at most 2 ordinals, got {}",
132 doc1.ordinals.len()
133 );
134
135 for (ordinal, _score) in &doc1.ordinals {
137 assert!(*ordinal <= 1, "Ordinal should be 0 or 1, got {}", ordinal);
138 }
139 }
140
141 #[tokio::test]
142 async fn test_sparse_vector_ordinal_tracking() {
143 use crate::query::MultiValueCombiner;
144
145 let mut schema_builder = SchemaBuilder::default();
146 let sparse = schema_builder.add_sparse_vector_field("sparse", true, true);
147 let schema = Arc::new(schema_builder.build());
148
149 let dir = RamDirectory::new();
150 let segment_id = SegmentId::new();
151
152 let config = SegmentBuilderConfig::default();
153 let mut builder = SegmentBuilder::new((*schema).clone(), config).unwrap();
154
155 let mut doc = crate::dsl::Document::new();
157 doc.add_sparse_vector(sparse, vec![(0, 1.0), (1, 0.5)]);
158 builder.add_document(doc).unwrap();
159
160 let mut doc = crate::dsl::Document::new();
162 doc.add_sparse_vector(sparse, vec![(0, 0.8), (2, 0.3)]);
163 doc.add_sparse_vector(sparse, vec![(1, 0.9), (3, 0.4)]);
164 builder.add_document(doc).unwrap();
165
166 let mut doc = crate::dsl::Document::new();
168 doc.add_sparse_vector(sparse, vec![(2, 1.0), (3, 0.5)]);
169 builder.add_document(doc).unwrap();
170
171 builder.build(&dir, segment_id, None).await.unwrap();
172
173 let reader = AsyncSegmentReader::open(&dir, segment_id, schema.clone(), 0, 16)
174 .await
175 .unwrap();
176
177 let query = vec![(0u32, 1.0f32)];
179 let results = reader
180 .search_sparse_vector(sparse, &query, 10, MultiValueCombiner::Sum, 1.0)
181 .await
182 .unwrap();
183
184 assert!(results.len() >= 2, "Should have at least 2 results");
186
187 let doc1_result = results.iter().find(|r| r.doc_id == 1);
189 assert!(doc1_result.is_some(), "Doc 1 should be in results");
190
191 let doc1 = doc1_result.unwrap();
192 assert!(
194 !doc1.ordinals.is_empty(),
195 "Doc 1 should have ordinal information"
196 );
197
198 for (ordinal, _score) in &doc1.ordinals {
200 assert!(*ordinal <= 1, "Ordinal should be 0 or 1, got {}", ordinal);
201 }
202 }
203}