Skip to main content

sochdb_vector/
lazy_segment.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! Lazy BPS/RDF/Rerank Construction (Build-on-First-Query)
19//!
20//! Deferred index construction using OnceLock for on-demand building,
21//! eliminating upfront build cost for indexes that may never be queried.
22//!
23//! ## Problem
24//!
25//! Eager index building at seal time:
26//! - BPS construction: O(N × D) - ~50ms for 10K vectors
27//! - RDF quantization: O(N × D) - ~30ms for 10K vectors
28//! - Graph building: O(N × ef × log N) - ~200ms for 10K vectors
29//! - All this blocks the seal path even if segment is rarely queried
30//!
31//! ## Solution
32//!
33//! Lazy construction with OnceLock:
34//! - Seal only writes raw vectors (fast)
35//! - Index structures built on first query
36//! - Subsequent queries use cached index
37//! - Background pre-warming optional
38//!
39//! ## Architecture
40//!
41//! ```text
42//! Segment State Machine:
43//!
44//! ┌──────────┐     ┌──────────────┐     ┌────────────────┐
45//! │  Raw     │ ──► │  First Query │ ──► │  Index Built   │
46//! │  Vectors │     │  (triggers)  │     │  (cached)      │
47//! └──────────┘     └──────────────┘     └────────────────┘
48//!                         │
49//!                         ▼
50//!                  ┌──────────────┐
51//!                  │  Build Index │
52//!                  │  (one-time)  │
53//!                  └──────────────┘
54//! ```
55//!
56//! ## Performance
57//!
58//! | Metric          | Eager Build | Lazy Build | Improvement |
59//! |-----------------|-------------|------------|-------------|
60//! | Seal latency    | 280ms       | 5ms        | 56×         |
61//! | First query     | 2ms         | 285ms*     | (deferred)  |
62//! | Subsequent      | 2ms         | 2ms        | Same        |
63//!
64//! *One-time cost, amortized over query lifetime
65//!
66//! ## Usage
67//!
68//! ```rust
69//! use sochdb_vector::lazy_segment::{LazySegment, LazyConfig};
70//!
71//! let config = LazyConfig::default();
72//! let segment = LazySegment::new(vectors, config);
73//!
74//! // Seal is instant - no index built yet
75//!
76//! // First search triggers index build
77//! let results = segment.search(&query, k);  // ~280ms
78//!
79//! // Subsequent searches are fast
80//! let results = segment.search(&query, k);  // ~2ms
81//! ```
82
83use std::collections::HashMap;
84use std::sync::{Arc, OnceLock, RwLock};
85use std::time::Instant;
86
87/// Configuration for lazy segment
88#[derive(Debug, Clone)]
89pub struct LazyConfig {
90    /// Vector dimension
91    pub dim: usize,
92
93    /// Enable BPS (Binary Projection Search) index
94    pub enable_bps: bool,
95
96    /// Enable RDF (Random Dimension Filtering)
97    pub enable_rdf: bool,
98
99    /// Enable graph-based index (mini-HNSW)
100    pub enable_graph: bool,
101
102    /// BPS projection count
103    pub bps_projections: usize,
104
105    /// RDF sample dimensions
106    pub rdf_sample_dims: usize,
107
108    /// Graph M parameter
109    pub graph_m: usize,
110
111    /// Pre-warm on background thread
112    pub background_prewarm: bool,
113}
114
115impl Default for LazyConfig {
116    fn default() -> Self {
117        Self {
118            dim: 768,
119            enable_bps: true,
120            enable_rdf: true,
121            enable_graph: false, // Disabled by default (expensive)
122            bps_projections: 64,
123            rdf_sample_dims: 32,
124            graph_m: 16,
125            background_prewarm: false,
126        }
127    }
128}
129
130/// Vector key type
131pub type VectorKey = u64;
132
133/// BPS (Binary Projection Search) index
134struct BpsIndex {
135    /// Projection vectors (normalized)
136    projections: Vec<Vec<f32>>,
137
138    /// Binary codes for each vector
139    codes: Vec<u64>,
140
141    /// Build time in nanoseconds
142    build_time_ns: u64,
143}
144
145impl BpsIndex {
146    fn build(vectors: &[f32], dim: usize, num_projections: usize) -> Self {
147        let start = Instant::now();
148        let num_vectors = vectors.len() / dim;
149
150        // Generate random projection vectors
151        let projections: Vec<Vec<f32>> = (0..num_projections)
152            .map(|i| {
153                (0..dim)
154                    .map(|j| {
155                        // Simple deterministic "random" for reproducibility
156                        let seed = (i * 1000 + j) as f32;
157                        (seed.sin() * 100.0) % 2.0 - 1.0
158                    })
159                    .collect()
160            })
161            .collect();
162
163        // Compute binary codes for each vector
164        let codes: Vec<u64> = (0..num_vectors)
165            .map(|i| {
166                let vec_start = i * dim;
167                let vector = &vectors[vec_start..vec_start + dim];
168
169                let mut code = 0u64;
170                for (p, proj) in projections.iter().enumerate().take(64) {
171                    let dot: f32 = vector.iter().zip(proj.iter()).map(|(a, b)| a * b).sum();
172
173                    if dot >= 0.0 {
174                        code |= 1 << p;
175                    }
176                }
177                code
178            })
179            .collect();
180
181        Self {
182            projections,
183            codes,
184            build_time_ns: start.elapsed().as_nanos() as u64,
185        }
186    }
187
188    fn query(&self, vector: &[f32]) -> u64 {
189        let mut code = 0u64;
190        for (p, proj) in self.projections.iter().enumerate().take(64) {
191            let dot: f32 = vector.iter().zip(proj.iter()).map(|(a, b)| a * b).sum();
192
193            if dot >= 0.0 {
194                code |= 1 << p;
195            }
196        }
197        code
198    }
199
200    fn hamming_distance(a: u64, b: u64) -> u32 {
201        (a ^ b).count_ones()
202    }
203}
204
205/// RDF (Random Dimension Filtering) index
206struct RdfIndex {
207    /// Sampled dimension indices
208    sample_dims: Vec<usize>,
209
210    /// Quantized values for each vector
211    quantized: Vec<Vec<u8>>,
212
213    /// Build time
214    build_time_ns: u64,
215}
216
217impl RdfIndex {
218    fn build(vectors: &[f32], dim: usize, num_sample_dims: usize) -> Self {
219        let start = Instant::now();
220        let num_vectors = vectors.len() / dim;
221
222        // Select random dimensions
223        let sample_dims: Vec<usize> = (0..num_sample_dims)
224            .map(|i| (i * 7919) % dim) // Deterministic spread
225            .collect();
226
227        // Quantize each vector on sampled dimensions
228        let quantized: Vec<Vec<u8>> = (0..num_vectors)
229            .map(|i| {
230                let vec_start = i * dim;
231                sample_dims
232                    .iter()
233                    .map(|&d| {
234                        let val = vectors[vec_start + d];
235                        // Quantize to 8 bits
236                        ((val.clamp(-1.0, 1.0) + 1.0) * 127.5) as u8
237                    })
238                    .collect()
239            })
240            .collect();
241
242        Self {
243            sample_dims,
244            quantized,
245            build_time_ns: start.elapsed().as_nanos() as u64,
246        }
247    }
248
249    fn query(&self, vector: &[f32]) -> Vec<u8> {
250        self.sample_dims
251            .iter()
252            .map(|&d| {
253                let val = vector.get(d).copied().unwrap_or(0.0);
254                ((val.clamp(-1.0, 1.0) + 1.0) * 127.5) as u8
255            })
256            .collect()
257    }
258
259    fn l1_distance(a: &[u8], b: &[u8]) -> u32 {
260        a.iter()
261            .zip(b.iter())
262            .map(|(&x, &y)| (x as i32 - y as i32).unsigned_abs())
263            .sum()
264    }
265}
266
267/// Mini-HNSW graph for small segments
268struct MiniGraph {
269    /// Neighbors for each node
270    #[allow(dead_code)]
271    neighbors: Vec<Vec<u32>>,
272
273    /// Entry point
274    #[allow(dead_code)]
275    entry_point: u32,
276
277    /// Build time
278    build_time_ns: u64,
279}
280
281impl MiniGraph {
282    fn build(vectors: &[f32], dim: usize, m: usize) -> Self {
283        let start = Instant::now();
284        let num_vectors = vectors.len() / dim;
285
286        if num_vectors == 0 {
287            return Self {
288                neighbors: Vec::new(),
289                entry_point: 0,
290                build_time_ns: 0,
291            };
292        }
293
294        // Simple greedy construction
295        let mut neighbors: Vec<Vec<u32>> = vec![Vec::with_capacity(m); num_vectors];
296
297        for i in 1..num_vectors {
298            let vec_i = &vectors[i * dim..(i + 1) * dim];
299
300            // Find nearest neighbors among existing nodes
301            let mut distances: Vec<(u32, f32)> = (0..i)
302                .map(|j| {
303                    let vec_j = &vectors[j * dim..(j + 1) * dim];
304                    let dist = l2_squared(vec_i, vec_j);
305                    (j as u32, dist)
306                })
307                .collect();
308
309            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
310
311            // Add bidirectional edges to top M
312            for &(neighbor, _) in distances.iter().take(m) {
313                neighbors[i].push(neighbor);
314                if neighbors[neighbor as usize].len() < m {
315                    neighbors[neighbor as usize].push(i as u32);
316                }
317            }
318        }
319
320        Self {
321            neighbors,
322            entry_point: 0,
323            build_time_ns: start.elapsed().as_nanos() as u64,
324        }
325    }
326}
327
328/// Lazy-built indexes (OnceLock)
329struct LazyIndexes {
330    bps: OnceLock<BpsIndex>,
331    rdf: OnceLock<RdfIndex>,
332    graph: OnceLock<MiniGraph>,
333}
334
335impl LazyIndexes {
336    fn new() -> Self {
337        Self {
338            bps: OnceLock::new(),
339            rdf: OnceLock::new(),
340            graph: OnceLock::new(),
341        }
342    }
343}
344
345/// Segment with lazy index construction
346pub struct LazySegment {
347    /// Configuration
348    config: LazyConfig,
349
350    /// Vector data (contiguous)
351    data: Vec<f32>,
352
353    /// Key to index mapping
354    key_to_index: HashMap<VectorKey, u32>,
355
356    /// Index to key mapping
357    index_to_key: Vec<VectorKey>,
358
359    /// Lazy indexes
360    indexes: LazyIndexes,
361
362    /// Statistics
363    #[allow(dead_code)]
364    stats: SegmentStats,
365}
366
367impl LazySegment {
368    /// Create a new lazy segment from vectors
369    pub fn new(vectors: Vec<(VectorKey, Vec<f32>)>, config: LazyConfig) -> Self {
370        let dim = config.dim;
371        let num_vectors = vectors.len();
372
373        let mut data = Vec::with_capacity(num_vectors * dim);
374        let mut key_to_index = HashMap::with_capacity(num_vectors);
375        let mut index_to_key = Vec::with_capacity(num_vectors);
376
377        for (i, (key, vector)) in vectors.into_iter().enumerate() {
378            data.extend_from_slice(&vector);
379            key_to_index.insert(key, i as u32);
380            index_to_key.push(key);
381        }
382
383        Self {
384            config,
385            data,
386            key_to_index,
387            index_to_key,
388            indexes: LazyIndexes::new(),
389            stats: SegmentStats::default(),
390        }
391    }
392
393    /// Create from flat data
394    pub fn from_flat(flat_data: Vec<f32>, keys: Vec<VectorKey>, config: LazyConfig) -> Self {
395        let key_to_index: HashMap<_, _> = keys
396            .iter()
397            .enumerate()
398            .map(|(i, &k)| (k, i as u32))
399            .collect();
400
401        Self {
402            config,
403            data: flat_data,
404            key_to_index,
405            index_to_key: keys,
406            indexes: LazyIndexes::new(),
407            stats: SegmentStats::default(),
408        }
409    }
410
411    /// Number of vectors
412    pub fn len(&self) -> usize {
413        self.index_to_key.len()
414    }
415
416    /// Check if empty
417    pub fn is_empty(&self) -> bool {
418        self.index_to_key.is_empty()
419    }
420
421    /// Get vector by key
422    pub fn get(&self, key: VectorKey) -> Option<&[f32]> {
423        self.key_to_index.get(&key).map(|&idx| {
424            let start = idx as usize * self.config.dim;
425            &self.data[start..start + self.config.dim]
426        })
427    }
428
429    /// Get vector by index
430    pub fn get_by_index(&self, index: u32) -> Option<&[f32]> {
431        if (index as usize) < self.index_to_key.len() {
432            let start = index as usize * self.config.dim;
433            Some(&self.data[start..start + self.config.dim])
434        } else {
435            None
436        }
437    }
438
439    /// Search for k nearest neighbors
440    ///
441    /// This will trigger index construction on first call.
442    pub fn search(&self, query: &[f32], k: usize) -> Vec<(VectorKey, f32)> {
443        if self.is_empty() {
444            return Vec::new();
445        }
446
447        let num_vectors = self.len();
448        let _dim = self.config.dim;
449
450        // For small segments, just do brute force
451        if num_vectors <= 100 {
452            return self.brute_force_search(query, k);
453        }
454
455        // Use available indexes for candidate generation
456        let candidates: Vec<u32> = if self.config.enable_bps {
457            self.bps_candidates(query, k * 10)
458        } else if self.config.enable_rdf {
459            self.rdf_candidates(query, k * 10)
460        } else {
461            (0..num_vectors as u32).collect()
462        };
463
464        // Rerank candidates with exact distance
465        let mut results: Vec<(VectorKey, f32)> = candidates
466            .into_iter()
467            .filter_map(|idx| {
468                let vec = self.get_by_index(idx)?;
469                let dist = l2_squared(query, vec);
470                let key = self.index_to_key[idx as usize];
471                Some((key, dist))
472            })
473            .collect();
474
475        // Sort by distance and take top k
476        results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
477        results.truncate(k);
478
479        results
480    }
481
482    /// Brute force search for small segments
483    fn brute_force_search(&self, query: &[f32], k: usize) -> Vec<(VectorKey, f32)> {
484        let dim = self.config.dim;
485
486        let mut results: Vec<(VectorKey, f32)> = self
487            .index_to_key
488            .iter()
489            .enumerate()
490            .map(|(i, &key)| {
491                let start = i * dim;
492                let vec = &self.data[start..start + dim];
493                let dist = l2_squared(query, vec);
494                (key, dist)
495            })
496            .collect();
497
498        results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
499        results.truncate(k);
500
501        results
502    }
503
504    /// Get candidates using BPS index (lazy built)
505    fn bps_candidates(&self, query: &[f32], limit: usize) -> Vec<u32> {
506        let bps = self.indexes.bps.get_or_init(|| {
507            BpsIndex::build(&self.data, self.config.dim, self.config.bps_projections)
508        });
509
510        let query_code = bps.query(query);
511
512        // Sort by Hamming distance
513        let mut scored: Vec<(u32, u32)> = bps
514            .codes
515            .iter()
516            .enumerate()
517            .map(|(i, &code)| (i as u32, BpsIndex::hamming_distance(query_code, code)))
518            .collect();
519
520        scored.sort_by_key(|&(_, dist)| dist);
521
522        scored.into_iter().take(limit).map(|(idx, _)| idx).collect()
523    }
524
525    /// Get candidates using RDF index (lazy built)
526    fn rdf_candidates(&self, query: &[f32], limit: usize) -> Vec<u32> {
527        let rdf = self.indexes.rdf.get_or_init(|| {
528            RdfIndex::build(&self.data, self.config.dim, self.config.rdf_sample_dims)
529        });
530
531        let query_quantized = rdf.query(query);
532
533        // Sort by L1 distance on quantized values
534        let mut scored: Vec<(u32, u32)> = rdf
535            .quantized
536            .iter()
537            .enumerate()
538            .map(|(i, quantized)| (i as u32, RdfIndex::l1_distance(&query_quantized, quantized)))
539            .collect();
540
541        scored.sort_by_key(|&(_, dist)| dist);
542
543        scored.into_iter().take(limit).map(|(idx, _)| idx).collect()
544    }
545
546    /// Pre-warm indexes (optional)
547    pub fn prewarm(&self) {
548        if self.config.enable_bps {
549            let _ = self.indexes.bps.get_or_init(|| {
550                BpsIndex::build(&self.data, self.config.dim, self.config.bps_projections)
551            });
552        }
553
554        if self.config.enable_rdf {
555            let _ = self.indexes.rdf.get_or_init(|| {
556                RdfIndex::build(&self.data, self.config.dim, self.config.rdf_sample_dims)
557            });
558        }
559
560        if self.config.enable_graph {
561            let _ = self
562                .indexes
563                .graph
564                .get_or_init(|| MiniGraph::build(&self.data, self.config.dim, self.config.graph_m));
565        }
566    }
567
568    /// Check if indexes are built
569    pub fn indexes_built(&self) -> IndexStatus {
570        IndexStatus {
571            bps_built: self.indexes.bps.get().is_some(),
572            rdf_built: self.indexes.rdf.get().is_some(),
573            graph_built: self.indexes.graph.get().is_some(),
574        }
575    }
576
577    /// Get build statistics
578    pub fn build_stats(&self) -> BuildStats {
579        BuildStats {
580            bps_time_ns: self.indexes.bps.get().map(|b| b.build_time_ns),
581            rdf_time_ns: self.indexes.rdf.get().map(|r| r.build_time_ns),
582            graph_time_ns: self.indexes.graph.get().map(|g| g.build_time_ns),
583        }
584    }
585}
586
587/// Index build status
588#[derive(Debug, Clone)]
589pub struct IndexStatus {
590    pub bps_built: bool,
591    pub rdf_built: bool,
592    pub graph_built: bool,
593}
594
595/// Build timing statistics
596#[derive(Debug, Clone)]
597pub struct BuildStats {
598    pub bps_time_ns: Option<u64>,
599    pub rdf_time_ns: Option<u64>,
600    pub graph_time_ns: Option<u64>,
601}
602
603/// Segment statistics
604#[derive(Debug, Clone, Default)]
605struct SegmentStats {
606    #[allow(dead_code)]
607    queries: u64,
608    #[allow(dead_code)]
609    index_builds: u64,
610}
611
612/// L2 squared distance
613fn l2_squared(a: &[f32], b: &[f32]) -> f32 {
614    a.iter()
615        .zip(b.iter())
616        .map(|(x, y)| {
617            let d = x - y;
618            d * d
619        })
620        .sum()
621}
622
623// ============================================================================
624// Lazy Segment Manager
625// ============================================================================
626
627/// Manager for multiple lazy segments
628pub struct LazySegmentManager {
629    segments: RwLock<Vec<Arc<LazySegment>>>,
630    #[allow(dead_code)]
631    config: LazyConfig,
632}
633
634impl LazySegmentManager {
635    /// Create a new manager
636    pub fn new(config: LazyConfig) -> Self {
637        Self {
638            segments: RwLock::new(Vec::new()),
639            config,
640        }
641    }
642
643    /// Add a segment
644    pub fn add_segment(&self, segment: LazySegment) {
645        let segment = Arc::new(segment);
646        let mut segments = self.segments.write().unwrap();
647        segments.push(segment);
648    }
649
650    /// Search across all segments
651    pub fn search(&self, query: &[f32], k: usize) -> Vec<(VectorKey, f32)> {
652        let segments = self.segments.read().unwrap();
653
654        let mut results: Vec<(VectorKey, f32)> =
655            segments.iter().flat_map(|s| s.search(query, k)).collect();
656
657        results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
658        results.truncate(k);
659
660        results
661    }
662
663    /// Pre-warm all segments (background)
664    pub fn prewarm_all(&self) {
665        let segments = self.segments.read().unwrap();
666
667        for segment in segments.iter() {
668            segment.prewarm();
669        }
670    }
671
672    /// Number of segments
673    pub fn num_segments(&self) -> usize {
674        self.segments.read().unwrap().len()
675    }
676}
677
678// ============================================================================
679// Tests
680// ============================================================================
681
682#[cfg(test)]
683mod tests {
684    use super::*;
685
686    fn random_vectors(n: usize, dim: usize, seed: u64) -> Vec<(VectorKey, Vec<f32>)> {
687        (0..n)
688            .map(|i| {
689                let key = (seed * 1000 + i as u64) as VectorKey;
690                let vector: Vec<f32> = (0..dim)
691                    .map(|j| ((i * dim + j) as f32 * 0.001).sin())
692                    .collect();
693                (key, vector)
694            })
695            .collect()
696    }
697
698    #[test]
699    fn test_lazy_segment_basic() {
700        let config = LazyConfig {
701            dim: 4,
702            enable_bps: false,
703            enable_rdf: false,
704            ..Default::default()
705        };
706
707        let vectors = vec![
708            (1, vec![1.0, 0.0, 0.0, 0.0]),
709            (2, vec![0.0, 1.0, 0.0, 0.0]),
710            (3, vec![0.0, 0.0, 1.0, 0.0]),
711        ];
712
713        let segment = LazySegment::new(vectors, config);
714
715        assert_eq!(segment.len(), 3);
716        assert_eq!(segment.get(1).unwrap(), &[1.0, 0.0, 0.0, 0.0]);
717    }
718
719    #[test]
720    fn test_lazy_segment_search() {
721        let config = LazyConfig {
722            dim: 4,
723            enable_bps: false,
724            enable_rdf: false,
725            ..Default::default()
726        };
727
728        let vectors = vec![
729            (1, vec![1.0, 0.0, 0.0, 0.0]),
730            (2, vec![0.0, 1.0, 0.0, 0.0]),
731            (3, vec![0.5, 0.5, 0.0, 0.0]),
732        ];
733
734        let segment = LazySegment::new(vectors, config);
735
736        let query = vec![1.0, 0.0, 0.0, 0.0];
737        let results = segment.search(&query, 2);
738
739        assert_eq!(results.len(), 2);
740        assert_eq!(results[0].0, 1); // Exact match
741        assert!(results[0].1 < 0.01);
742    }
743
744    #[test]
745    fn test_lazy_index_build() {
746        let config = LazyConfig {
747            dim: 8,
748            enable_bps: true,
749            enable_rdf: true,
750            enable_graph: false,
751            ..Default::default()
752        };
753
754        let vectors = random_vectors(200, 8, 42);
755        let segment = LazySegment::new(vectors, config);
756
757        // Indexes not built yet
758        let status = segment.indexes_built();
759        assert!(!status.bps_built);
760        assert!(!status.rdf_built);
761
762        // Search triggers build
763        let query = vec![0.1; 8];
764        let _ = segment.search(&query, 5);
765
766        // Now BPS should be built (used for candidates)
767        let status = segment.indexes_built();
768        assert!(status.bps_built);
769    }
770
771    #[test]
772    fn test_prewarm() {
773        let config = LazyConfig {
774            dim: 8,
775            enable_bps: true,
776            enable_rdf: true,
777            enable_graph: true,
778            ..Default::default()
779        };
780
781        let vectors = random_vectors(50, 8, 42);
782        let segment = LazySegment::new(vectors, config);
783
784        // Pre-warm all indexes
785        segment.prewarm();
786
787        let status = segment.indexes_built();
788        assert!(status.bps_built);
789        assert!(status.rdf_built);
790        assert!(status.graph_built);
791
792        let stats = segment.build_stats();
793        assert!(stats.bps_time_ns.is_some());
794        assert!(stats.rdf_time_ns.is_some());
795        assert!(stats.graph_time_ns.is_some());
796    }
797
798    #[test]
799    fn test_segment_manager() {
800        let config = LazyConfig {
801            dim: 4,
802            enable_bps: false,
803            enable_rdf: false,
804            ..Default::default()
805        };
806
807        let manager = LazySegmentManager::new(config.clone());
808
809        // Add segments
810        let seg1 = LazySegment::new(vec![(1, vec![1.0, 0.0, 0.0, 0.0])], config.clone());
811        let seg2 = LazySegment::new(vec![(2, vec![0.0, 1.0, 0.0, 0.0])], config.clone());
812
813        manager.add_segment(seg1);
814        manager.add_segment(seg2);
815
816        assert_eq!(manager.num_segments(), 2);
817
818        // Search across both
819        let query = vec![0.5, 0.5, 0.0, 0.0];
820        let results = manager.search(&query, 2);
821
822        assert_eq!(results.len(), 2);
823    }
824
825    #[test]
826    fn test_bps_index() {
827        let dim = 8;
828        let vectors: Vec<f32> = (0..100 * dim).map(|i| (i as f32 * 0.01).sin()).collect();
829
830        let bps = BpsIndex::build(&vectors, dim, 32);
831
832        assert_eq!(bps.codes.len(), 100);
833        assert!(bps.build_time_ns > 0);
834
835        // Querying with a stored vector must reproduce that vector's stored code
836        // (build and query share the same projection logic — it is deterministic).
837        let first_vec = vectors[0..dim].to_vec();
838        let code = bps.query(&first_vec);
839        assert_eq!(
840            code, bps.codes[0],
841            "query of a stored vector must match its build-time code"
842        );
843
844        // Determinism: the same query must always yield the same code.
845        assert_eq!(code, bps.query(&first_vec), "query must be deterministic");
846
847        // Only the low `num_projections` (32) bits may be set.
848        assert_eq!(
849            code & !((1u64 << 32) - 1),
850            0,
851            "no bits beyond num_projections"
852        );
853
854        // A code is identical to itself under Hamming distance.
855        assert_eq!(BpsIndex::hamming_distance(code, code), 0);
856    }
857
858    #[test]
859    fn test_rdf_index() {
860        let dim = 8;
861        let vectors: Vec<f32> = (0..100 * dim).map(|i| (i as f32 * 0.01).sin()).collect();
862
863        let rdf = RdfIndex::build(&vectors, dim, 4);
864
865        assert_eq!(rdf.quantized.len(), 100);
866        assert_eq!(rdf.quantized[0].len(), 4);
867    }
868}