Skip to main content

hermes_core/segment/
mod.rs

1#[cfg(feature = "native")]
2mod builder;
3#[cfg(feature = "native")]
4mod merger;
5mod reader;
6mod store;
7#[cfg(feature = "native")]
8mod tracker;
9mod types;
10mod vector_data;
11
12#[cfg(feature = "native")]
13pub use builder::{MemoryBreakdown, SegmentBuilder, SegmentBuilderConfig, SegmentBuilderStats};
14#[cfg(feature = "native")]
15pub use merger::{MergeStats, SegmentMerger, TrainedVectorStructures, delete_segment};
16pub use reader::{AsyncSegmentReader, SegmentReader, SparseIndex, VectorIndex, VectorSearchResult};
17pub use store::*;
18#[cfg(feature = "native")]
19pub use tracker::{SegmentSnapshot, SegmentTracker};
20pub use types::{FieldStats, SegmentFiles, SegmentId, SegmentMeta};
21pub use vector_data::{FlatVectorData, IVFRaBitQIndexData, LazyFlatVectorData, ScaNNIndexData};
22
23#[cfg(test)]
24#[cfg(feature = "native")]
25mod tests {
26    use super::*;
27    use crate::directories::RamDirectory;
28    use crate::dsl::SchemaBuilder;
29    use std::sync::Arc;
30
31    #[tokio::test]
32    async fn test_async_segment_reader() {
33        let mut schema_builder = SchemaBuilder::default();
34        let title = schema_builder.add_text_field("title", true, true);
35        let schema = Arc::new(schema_builder.build());
36
37        let dir = RamDirectory::new();
38        let segment_id = SegmentId::new();
39
40        // Build segment using sync builder
41        let config = SegmentBuilderConfig::default();
42        let mut builder = SegmentBuilder::new((*schema).clone(), config).unwrap();
43
44        let mut doc = crate::dsl::Document::new();
45        doc.add_text(title, "Hello World");
46        builder.add_document(doc).unwrap();
47
48        let mut doc = crate::dsl::Document::new();
49        doc.add_text(title, "Goodbye World");
50        builder.add_document(doc).unwrap();
51
52        builder.build(&dir, segment_id).await.unwrap();
53
54        // Open with async reader
55        let reader = AsyncSegmentReader::open(&dir, segment_id, schema.clone(), 0, 16)
56            .await
57            .unwrap();
58
59        assert_eq!(reader.num_docs(), 2);
60
61        // Test postings lookup
62        let postings = reader.get_postings(title, b"hello").await.unwrap();
63        assert!(postings.is_some());
64        assert_eq!(postings.unwrap().doc_count(), 1);
65
66        let postings = reader.get_postings(title, b"world").await.unwrap();
67        assert!(postings.is_some());
68        assert_eq!(postings.unwrap().doc_count(), 2);
69
70        // Test document retrieval
71        let doc = reader.doc(0).await.unwrap().unwrap();
72        assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
73    }
74
75    #[tokio::test]
76    async fn test_dense_vector_ordinal_tracking() {
77        use crate::query::MultiValueCombiner;
78
79        let mut schema_builder = SchemaBuilder::default();
80        // Use simple add method - defaults to Flat index
81        let embedding = schema_builder.add_dense_vector_field("embedding", 4, true, true);
82        let schema = Arc::new(schema_builder.build());
83
84        let dir = RamDirectory::new();
85        let segment_id = SegmentId::new();
86
87        let config = SegmentBuilderConfig::default();
88        let mut builder = SegmentBuilder::new((*schema).clone(), config).unwrap();
89
90        // Doc 0: single vector
91        let mut doc = crate::dsl::Document::new();
92        doc.add_dense_vector(embedding, vec![1.0, 0.0, 0.0, 0.0]);
93        builder.add_document(doc).unwrap();
94
95        // Doc 1: multi-valued vectors (2 vectors)
96        let mut doc = crate::dsl::Document::new();
97        doc.add_dense_vector(embedding, vec![0.0, 1.0, 0.0, 0.0]);
98        doc.add_dense_vector(embedding, vec![0.0, 0.0, 1.0, 0.0]);
99        builder.add_document(doc).unwrap();
100
101        // Doc 2: single vector
102        let mut doc = crate::dsl::Document::new();
103        doc.add_dense_vector(embedding, vec![0.0, 0.0, 0.0, 1.0]);
104        builder.add_document(doc).unwrap();
105
106        builder.build(&dir, segment_id).await.unwrap();
107
108        let reader = AsyncSegmentReader::open(&dir, segment_id, schema.clone(), 0, 16)
109            .await
110            .unwrap();
111
112        // Query close to doc 1's first vector
113        let query = vec![0.0, 0.9, 0.1, 0.0];
114        let results = reader
115            .search_dense_vector(embedding, &query, 10, 0, 1, MultiValueCombiner::Max)
116            .await
117            .unwrap();
118
119        // Doc 1 should be in results with ordinal tracking
120        let doc1_result = results.iter().find(|r| r.doc_id == 1);
121        assert!(doc1_result.is_some(), "Doc 1 should be in results");
122
123        let doc1 = doc1_result.unwrap();
124        // Should have 2 ordinals (0 and 1) for the two vectors
125        assert!(
126            doc1.ordinals.len() <= 2,
127            "Doc 1 should have at most 2 ordinals, got {}",
128            doc1.ordinals.len()
129        );
130
131        // Check ordinals are valid (0 or 1)
132        for (ordinal, _score) in &doc1.ordinals {
133            assert!(*ordinal <= 1, "Ordinal should be 0 or 1, got {}", ordinal);
134        }
135    }
136
137    #[tokio::test]
138    async fn test_sparse_vector_ordinal_tracking() {
139        use crate::query::MultiValueCombiner;
140
141        let mut schema_builder = SchemaBuilder::default();
142        let sparse = schema_builder.add_sparse_vector_field("sparse", true, true);
143        let schema = Arc::new(schema_builder.build());
144
145        let dir = RamDirectory::new();
146        let segment_id = SegmentId::new();
147
148        let config = SegmentBuilderConfig::default();
149        let mut builder = SegmentBuilder::new((*schema).clone(), config).unwrap();
150
151        // Doc 0: single sparse vector
152        let mut doc = crate::dsl::Document::new();
153        doc.add_sparse_vector(sparse, vec![(0, 1.0), (1, 0.5)]);
154        builder.add_document(doc).unwrap();
155
156        // Doc 1: multi-valued sparse vectors (2 vectors)
157        let mut doc = crate::dsl::Document::new();
158        doc.add_sparse_vector(sparse, vec![(0, 0.8), (2, 0.3)]);
159        doc.add_sparse_vector(sparse, vec![(1, 0.9), (3, 0.4)]);
160        builder.add_document(doc).unwrap();
161
162        // Doc 2: single sparse vector
163        let mut doc = crate::dsl::Document::new();
164        doc.add_sparse_vector(sparse, vec![(2, 1.0), (3, 0.5)]);
165        builder.add_document(doc).unwrap();
166
167        builder.build(&dir, segment_id).await.unwrap();
168
169        let reader = AsyncSegmentReader::open(&dir, segment_id, schema.clone(), 0, 16)
170            .await
171            .unwrap();
172
173        // Query matching dimension 0
174        let query = vec![(0u32, 1.0f32)];
175        let results = reader
176            .search_sparse_vector(sparse, &query, 10, MultiValueCombiner::Sum, 1.0)
177            .await
178            .unwrap();
179
180        // Both doc 0 and doc 1 have dimension 0
181        assert!(results.len() >= 2, "Should have at least 2 results");
182
183        // Check doc 1 has ordinal tracking
184        let doc1_result = results.iter().find(|r| r.doc_id == 1);
185        assert!(doc1_result.is_some(), "Doc 1 should be in results");
186
187        let doc1 = doc1_result.unwrap();
188        // Doc 1's first sparse vector has dim 0, so ordinal should be 0
189        assert!(
190            !doc1.ordinals.is_empty(),
191            "Doc 1 should have ordinal information"
192        );
193
194        // Check ordinals are valid (0 or 1)
195        for (ordinal, _score) in &doc1.ordinals {
196            assert!(*ordinal <= 1, "Ordinal should be 0 or 1, got {}", ordinal);
197        }
198    }
199}