Skip to main content

hermes_core/segment/
mod.rs

1pub(crate) mod ann_build;
2#[cfg(feature = "native")]
3mod builder;
4pub(crate) mod format;
5#[cfg(feature = "native")]
6mod merger;
7mod reader;
8mod store;
9#[cfg(feature = "native")]
10mod tracker;
11mod types;
12mod vector_data;
13
14#[cfg(feature = "native")]
15pub use builder::{MemoryBreakdown, SegmentBuilder, SegmentBuilderConfig, SegmentBuilderStats};
16#[cfg(feature = "native")]
17pub use merger::{MergeStats, SegmentMerger, delete_segment};
18pub use reader::{AsyncSegmentReader, SegmentReader, SparseIndex, VectorIndex, VectorSearchResult};
19pub use store::*;
20#[cfg(feature = "native")]
21pub use tracker::{SegmentSnapshot, SegmentTracker};
22pub use types::{FieldStats, SegmentFiles, SegmentId, SegmentMeta, TrainedVectorStructures};
23pub use vector_data::{
24    FlatVectorData, IVFRaBitQIndexData, LazyFlatVectorData, ScaNNIndexData, dequantize_raw,
25};
26
27#[cfg(test)]
28#[cfg(feature = "native")]
29mod tests {
30    use super::*;
31    use crate::directories::RamDirectory;
32    use crate::dsl::SchemaBuilder;
33    use std::sync::Arc;
34
35    #[tokio::test]
36    async fn test_async_segment_reader() {
37        let mut schema_builder = SchemaBuilder::default();
38        let title = schema_builder.add_text_field("title", true, true);
39        let schema = Arc::new(schema_builder.build());
40
41        let dir = RamDirectory::new();
42        let segment_id = SegmentId::new();
43
44        // Build segment using sync builder
45        let config = SegmentBuilderConfig::default();
46        let mut builder = SegmentBuilder::new((*schema).clone(), config).unwrap();
47
48        let mut doc = crate::dsl::Document::new();
49        doc.add_text(title, "Hello World");
50        builder.add_document(doc).unwrap();
51
52        let mut doc = crate::dsl::Document::new();
53        doc.add_text(title, "Goodbye World");
54        builder.add_document(doc).unwrap();
55
56        builder.build(&dir, segment_id, None).await.unwrap();
57
58        // Open with async reader
59        let reader = AsyncSegmentReader::open(&dir, segment_id, schema.clone(), 0, 16)
60            .await
61            .unwrap();
62
63        assert_eq!(reader.num_docs(), 2);
64
65        // Test postings lookup
66        let postings = reader.get_postings(title, b"hello").await.unwrap();
67        assert!(postings.is_some());
68        assert_eq!(postings.unwrap().doc_count(), 1);
69
70        let postings = reader.get_postings(title, b"world").await.unwrap();
71        assert!(postings.is_some());
72        assert_eq!(postings.unwrap().doc_count(), 2);
73
74        // Test document retrieval
75        let doc = reader.doc(0).await.unwrap().unwrap();
76        assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
77    }
78
79    #[tokio::test]
80    async fn test_dense_vector_ordinal_tracking() {
81        use crate::query::MultiValueCombiner;
82
83        let mut schema_builder = SchemaBuilder::default();
84        // Use simple add method - defaults to Flat index
85        let embedding = schema_builder.add_dense_vector_field("embedding", 4, true, true);
86        let schema = Arc::new(schema_builder.build());
87
88        let dir = RamDirectory::new();
89        let segment_id = SegmentId::new();
90
91        let config = SegmentBuilderConfig::default();
92        let mut builder = SegmentBuilder::new((*schema).clone(), config).unwrap();
93
94        // Doc 0: single vector
95        let mut doc = crate::dsl::Document::new();
96        doc.add_dense_vector(embedding, vec![1.0, 0.0, 0.0, 0.0]);
97        builder.add_document(doc).unwrap();
98
99        // Doc 1: multi-valued vectors (2 vectors)
100        let mut doc = crate::dsl::Document::new();
101        doc.add_dense_vector(embedding, vec![0.0, 1.0, 0.0, 0.0]);
102        doc.add_dense_vector(embedding, vec![0.0, 0.0, 1.0, 0.0]);
103        builder.add_document(doc).unwrap();
104
105        // Doc 2: single vector
106        let mut doc = crate::dsl::Document::new();
107        doc.add_dense_vector(embedding, vec![0.0, 0.0, 0.0, 1.0]);
108        builder.add_document(doc).unwrap();
109
110        builder.build(&dir, segment_id, None).await.unwrap();
111
112        let reader = AsyncSegmentReader::open(&dir, segment_id, schema.clone(), 0, 16)
113            .await
114            .unwrap();
115
116        // Query close to doc 1's first vector
117        let query = vec![0.0, 0.9, 0.1, 0.0];
118        let results = reader
119            .search_dense_vector(embedding, &query, 10, 0, 1, MultiValueCombiner::Max)
120            .await
121            .unwrap();
122
123        // Doc 1 should be in results with ordinal tracking
124        let doc1_result = results.iter().find(|r| r.doc_id == 1);
125        assert!(doc1_result.is_some(), "Doc 1 should be in results");
126
127        let doc1 = doc1_result.unwrap();
128        // Should have 2 ordinals (0 and 1) for the two vectors
129        assert!(
130            doc1.ordinals.len() <= 2,
131            "Doc 1 should have at most 2 ordinals, got {}",
132            doc1.ordinals.len()
133        );
134
135        // Check ordinals are valid (0 or 1)
136        for (ordinal, _score) in &doc1.ordinals {
137            assert!(*ordinal <= 1, "Ordinal should be 0 or 1, got {}", ordinal);
138        }
139    }
140
141    #[tokio::test]
142    async fn test_sparse_vector_ordinal_tracking() {
143        use crate::query::MultiValueCombiner;
144
145        let mut schema_builder = SchemaBuilder::default();
146        let sparse = schema_builder.add_sparse_vector_field("sparse", true, true);
147        let schema = Arc::new(schema_builder.build());
148
149        let dir = RamDirectory::new();
150        let segment_id = SegmentId::new();
151
152        let config = SegmentBuilderConfig::default();
153        let mut builder = SegmentBuilder::new((*schema).clone(), config).unwrap();
154
155        // Doc 0: single sparse vector
156        let mut doc = crate::dsl::Document::new();
157        doc.add_sparse_vector(sparse, vec![(0, 1.0), (1, 0.5)]);
158        builder.add_document(doc).unwrap();
159
160        // Doc 1: multi-valued sparse vectors (2 vectors)
161        let mut doc = crate::dsl::Document::new();
162        doc.add_sparse_vector(sparse, vec![(0, 0.8), (2, 0.3)]);
163        doc.add_sparse_vector(sparse, vec![(1, 0.9), (3, 0.4)]);
164        builder.add_document(doc).unwrap();
165
166        // Doc 2: single sparse vector
167        let mut doc = crate::dsl::Document::new();
168        doc.add_sparse_vector(sparse, vec![(2, 1.0), (3, 0.5)]);
169        builder.add_document(doc).unwrap();
170
171        builder.build(&dir, segment_id, None).await.unwrap();
172
173        let reader = AsyncSegmentReader::open(&dir, segment_id, schema.clone(), 0, 16)
174            .await
175            .unwrap();
176
177        // Query matching dimension 0
178        let query = vec![(0u32, 1.0f32)];
179        let results = reader
180            .search_sparse_vector(sparse, &query, 10, MultiValueCombiner::Sum, 1.0)
181            .await
182            .unwrap();
183
184        // Both doc 0 and doc 1 have dimension 0
185        assert!(results.len() >= 2, "Should have at least 2 results");
186
187        // Check doc 1 has ordinal tracking
188        let doc1_result = results.iter().find(|r| r.doc_id == 1);
189        assert!(doc1_result.is_some(), "Doc 1 should be in results");
190
191        let doc1 = doc1_result.unwrap();
192        // Doc 1's first sparse vector has dim 0, so ordinal should be 0
193        assert!(
194            !doc1.ordinals.is_empty(),
195            "Doc 1 should have ordinal information"
196        );
197
198        // Check ordinals are valid (0 or 1)
199        for (ordinal, _score) in &doc1.ordinals {
200            assert!(*ordinal <= 1, "Ordinal should be 0 or 1, got {}", ordinal);
201        }
202    }
203}