Skip to main content

diskann_rs/
incremental.rs

1//! # Incremental DiskANN Index
2//!
3//! This module provides `IncrementalDiskANN`, a wrapper around `DiskANN` that supports:
4//! - **Adding vectors** without rebuilding the entire index
5//! - **Deleting vectors** via tombstones (lazy deletion)
6//! - **Compaction** to merge deltas and remove tombstones
7//!
8//! ## Architecture
9//!
10//! ```text
11//! ┌─────────────────────────────────────────────────────────┐
12//! │                  IncrementalDiskANN                     │
13//! ├─────────────────────────────────────────────────────────┤
14//! │  ┌─────────────────┐  ┌─────────────────────────────┐   │
15//! │  │   Base Index    │  │        Delta Layer          │   │
16//! │  │   (DiskANN)     │  │   (in-memory vectors +      │   │
17//! │  │   - immutable   │  │    small Vamana graph)      │   │
18//! │  │   - mmap'd      │  │   - mutable                 │   │
19//! │  └─────────────────┘  └─────────────────────────────┘   │
20//! │                                                         │
21//! │  ┌─────────────────────────────────────────────────┐    │
22//! │  │              Tombstone Set                      │    │
23//! │  │   (deleted IDs from base, excluded at search)   │    │
24//! │  └─────────────────────────────────────────────────┘    │
25//! └─────────────────────────────────────────────────────────┘
26//! ```
27//!
28//! ## Usage
29//!
30//! ```no_run
31//! use anndists::dist::DistL2;
32//! use diskann_rs::{IncrementalDiskANN, DiskAnnParams};
33//!
34//! // Build initial index
35//! let vectors = vec![vec![0.0; 128]; 1000];
36//! let mut index = IncrementalDiskANN::<DistL2>::build_default(&vectors, "index.db").unwrap();
37//!
38//! // Add new vectors incrementally
39//! let new_vectors = vec![vec![1.0; 128]; 100];
40//! let new_ids = index.add_vectors(&new_vectors).unwrap();
41//!
42//! // Delete vectors (lazy - marks as tombstone)
43//! index.delete_vectors(&[0, 5, 10]).unwrap();
44//!
45//! // Search (automatically excludes tombstones, includes delta)
46//! let results = index.search(&vec![0.5; 128], 10, 64);
47//!
48//! // Compact when delta gets large (rebuilds everything)
49//! if index.should_compact() {
50//!     index.compact("index_v2.db").unwrap();
51//! }
52//! ```
53
54use crate::{DiskANN, DiskAnnError, DiskAnnParams};
55use anndists::prelude::Distance;
56use rayon::prelude::*;
57use std::collections::{BinaryHeap, HashSet};
58use std::cmp::{Ordering, Reverse};
59use std::sync::RwLock;
60
61/// Configuration for the incremental index behavior
62#[derive(Clone, Copy, Debug)]
63pub struct IncrementalConfig {
64    /// Maximum vectors in delta before suggesting compaction
65    pub delta_threshold: usize,
66    /// Maximum tombstone ratio before suggesting compaction (0.0-1.0)
67    pub tombstone_ratio_threshold: f32,
68    /// Parameters for delta graph construction
69    pub delta_params: DiskAnnParams,
70}
71
72impl Default for IncrementalConfig {
73    fn default() -> Self {
74        Self {
75            delta_threshold: 10_000,
76            tombstone_ratio_threshold: 0.1,
77            delta_params: DiskAnnParams {
78                max_degree: 32,        // Smaller for delta
79                build_beam_width: 64,
80                alpha: 1.2,
81            },
82        }
83    }
84}
85
86/// Internal candidate for search merging
87#[derive(Clone, Copy)]
88struct Candidate {
89    dist: f32,
90    id: u64,  // Global ID (base or delta)
91}
92
93impl PartialEq for Candidate {
94    fn eq(&self, other: &Self) -> bool {
95        self.dist == other.dist && self.id == other.id
96    }
97}
98impl Eq for Candidate {}
99impl PartialOrd for Candidate {
100    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
101        self.dist.partial_cmp(&other.dist)
102    }
103}
104impl Ord for Candidate {
105    fn cmp(&self, other: &Self) -> Ordering {
106        self.partial_cmp(other).unwrap_or(Ordering::Equal)
107    }
108}
109
110/// Delta layer: in-memory vectors with a small navigation graph
111struct DeltaLayer {
112    /// Vectors added after base index was built
113    vectors: Vec<Vec<f32>>,
114    /// Small Vamana-style adjacency graph for delta vectors
115    /// graph[i] contains neighbor indices (local to delta)
116    graph: Vec<Vec<u32>>,
117    /// Entry point for delta searches (local index)
118    entry_point: Option<u32>,
119    /// Max degree for delta graph
120    max_degree: usize,
121}
122
123impl DeltaLayer {
124    fn new(max_degree: usize) -> Self {
125        Self {
126            vectors: Vec::new(),
127            graph: Vec::new(),
128            entry_point: None,
129            max_degree,
130        }
131    }
132
133    fn len(&self) -> usize {
134        self.vectors.len()
135    }
136
137    fn is_empty(&self) -> bool {
138        self.vectors.is_empty()
139    }
140
141    /// Add vectors to the delta layer and update the graph
142    fn add_vectors<D: Distance<f32> + Copy + Sync>(
143        &mut self,
144        vectors: &[Vec<f32>],
145        dist: D,
146    ) -> Vec<u64> {
147        let start_idx = self.vectors.len();
148        let mut new_ids = Vec::with_capacity(vectors.len());
149
150        for (i, v) in vectors.iter().enumerate() {
151            let local_idx = start_idx + i;
152            // Global ID: base_offset + local_idx (we use u64::MAX/2 as delta offset)
153            let global_id = DELTA_ID_OFFSET + local_idx as u64;
154            new_ids.push(global_id);
155
156            self.vectors.push(v.clone());
157            self.graph.push(Vec::new());
158
159            // Connect to existing delta vectors using greedy search + prune
160            if local_idx > 0 {
161                let neighbors = self.find_and_prune_neighbors(local_idx, dist);
162                self.graph[local_idx] = neighbors.clone();
163
164                // Reverse edges (make graph bidirectional-ish)
165                for &nb in &neighbors {
166                    let nb_idx = nb as usize;
167                    if !self.graph[nb_idx].contains(&(local_idx as u32))
168                        && self.graph[nb_idx].len() < self.max_degree
169                    {
170                        self.graph[nb_idx].push(local_idx as u32);
171                    }
172                }
173            }
174
175            // Update entry point to be closest to centroid (simplified: just use first)
176            if self.entry_point.is_none() {
177                self.entry_point = Some(0);
178            }
179        }
180
181        // Recompute entry point as approximate medoid
182        if self.vectors.len() > 1 {
183            self.entry_point = Some(self.compute_medoid(dist));
184        }
185
186        new_ids
187    }
188
189    fn compute_medoid<D: Distance<f32> + Copy + Sync>(&self, dist: D) -> u32 {
190        if self.vectors.is_empty() {
191            return 0;
192        }
193
194        // Compute centroid
195        let dim = self.vectors[0].len();
196        let mut centroid = vec![0.0f32; dim];
197        for v in &self.vectors {
198            for (i, &val) in v.iter().enumerate() {
199                centroid[i] += val;
200            }
201        }
202        for val in &mut centroid {
203            *val /= self.vectors.len() as f32;
204        }
205
206        // Find closest to centroid
207        let (best_idx, _) = self.vectors
208            .iter()
209            .enumerate()
210            .map(|(idx, v)| (idx, dist.eval(&centroid, v)))
211            .min_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
212            .unwrap_or((0, f32::MAX));
213
214        best_idx as u32
215    }
216
217    fn find_and_prune_neighbors<D: Distance<f32> + Copy>(
218        &self,
219        node_idx: usize,
220        dist: D,
221    ) -> Vec<u32> {
222        let query = &self.vectors[node_idx];
223        let beam_width = (self.max_degree * 2).max(16);
224
225        // Greedy search from entry point
226        let candidates = if let Some(entry) = self.entry_point {
227            self.greedy_search_internal(query, entry as usize, beam_width, dist)
228        } else {
229            // No entry point yet, just compute distances to all
230            self.vectors.iter()
231                .enumerate()
232                .filter(|(i, _)| *i != node_idx)
233                .map(|(i, v)| (i as u32, dist.eval(query, v)))
234                .collect()
235        };
236
237        // Alpha-prune
238        self.prune_neighbors(node_idx, &candidates, dist)
239    }
240
241    fn greedy_search_internal<D: Distance<f32> + Copy>(
242        &self,
243        query: &[f32],
244        start: usize,
245        beam_width: usize,
246        dist: D,
247    ) -> Vec<(u32, f32)> {
248        if self.vectors.is_empty() || start >= self.vectors.len() {
249            return Vec::new();
250        }
251
252        let mut visited = HashSet::new();
253        let mut frontier: BinaryHeap<Reverse<Candidate>> = BinaryHeap::new();
254        let mut results: BinaryHeap<Candidate> = BinaryHeap::new();
255
256        let start_dist = dist.eval(query, &self.vectors[start]);
257        let start_cand = Candidate { dist: start_dist, id: start as u64 };
258        frontier.push(Reverse(start_cand));
259        results.push(start_cand);
260        visited.insert(start);
261
262        while let Some(Reverse(best)) = frontier.peek().copied() {
263            if results.len() >= beam_width {
264                if let Some(worst) = results.peek() {
265                    if best.dist >= worst.dist {
266                        break;
267                    }
268                }
269            }
270            let Reverse(current) = frontier.pop().unwrap();
271            let cur_idx = current.id as usize;
272
273            if cur_idx < self.graph.len() {
274                for &nb in &self.graph[cur_idx] {
275                    let nb_idx = nb as usize;
276                    if !visited.insert(nb_idx) {
277                        continue;
278                    }
279                    if nb_idx >= self.vectors.len() {
280                        continue;
281                    }
282
283                    let d = dist.eval(query, &self.vectors[nb_idx]);
284                    let cand = Candidate { dist: d, id: nb as u64 };
285
286                    if results.len() < beam_width {
287                        results.push(cand);
288                        frontier.push(Reverse(cand));
289                    } else if d < results.peek().unwrap().dist {
290                        results.pop();
291                        results.push(cand);
292                        frontier.push(Reverse(cand));
293                    }
294                }
295            }
296        }
297
298        results.into_vec()
299            .into_iter()
300            .map(|c| (c.id as u32, c.dist))
301            .collect()
302    }
303
304    fn prune_neighbors<D: Distance<f32> + Copy>(
305        &self,
306        node_idx: usize,
307        candidates: &[(u32, f32)],
308        dist: D,
309    ) -> Vec<u32> {
310        if candidates.is_empty() {
311            return Vec::new();
312        }
313
314        let alpha = 1.2f32;
315        let mut sorted = candidates.to_vec();
316        sorted.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
317
318        let mut pruned = Vec::new();
319
320        for &(cand_id, cand_dist) in &sorted {
321            if cand_id as usize == node_idx {
322                continue;
323            }
324
325            let mut ok = true;
326            for &sel in &pruned {
327                let d = dist.eval(
328                    &self.vectors[cand_id as usize],
329                    &self.vectors[sel as usize],
330                );
331                if d < alpha * cand_dist {
332                    ok = false;
333                    break;
334                }
335            }
336
337            if ok {
338                pruned.push(cand_id);
339                if pruned.len() >= self.max_degree {
340                    break;
341                }
342            }
343        }
344
345        pruned
346    }
347
348    fn search<D: Distance<f32> + Copy>(
349        &self,
350        query: &[f32],
351        k: usize,
352        beam_width: usize,
353        dist: D,
354    ) -> Vec<(u64, f32)> {
355        if self.vectors.is_empty() {
356            return Vec::new();
357        }
358
359        let entry = self.entry_point.unwrap_or(0) as usize;
360        let mut results = self.greedy_search_internal(query, entry, beam_width, dist);
361        results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
362        results.truncate(k);
363
364        // Convert local IDs to global delta IDs
365        results.into_iter()
366            .map(|(local_id, d)| (DELTA_ID_OFFSET + local_id as u64, d))
367            .collect()
368    }
369
370    fn get_vector(&self, local_idx: usize) -> Option<&Vec<f32>> {
371        self.vectors.get(local_idx)
372    }
373}
374
375/// Offset for delta vector IDs to distinguish from base IDs
376const DELTA_ID_OFFSET: u64 = 1u64 << 48;
377
378/// Check if an ID is from the delta layer
379#[inline]
380pub fn is_delta_id(id: u64) -> bool {
381    id >= DELTA_ID_OFFSET
382}
383
384/// Convert a delta global ID to local delta index
385#[inline]
386pub fn delta_local_idx(id: u64) -> usize {
387    (id - DELTA_ID_OFFSET) as usize
388}
389
390/// An incremental DiskANN index supporting add/delete without full rebuild
391pub struct IncrementalDiskANN<D>
392where
393    D: Distance<f32> + Send + Sync + Copy + Clone + 'static,
394{
395    /// The base immutable index (memory-mapped)
396    base: Option<DiskANN<D>>,
397    /// Delta layer for newly added vectors
398    delta: RwLock<DeltaLayer>,
399    /// Set of deleted vector IDs (tombstones)
400    tombstones: RwLock<HashSet<u64>>,
401    /// Distance metric
402    dist: D,
403    /// Configuration
404    config: IncrementalConfig,
405    /// Path to the base index file
406    base_path: Option<String>,
407    /// Dimensionality
408    dim: usize,
409}
410
411impl<D> IncrementalDiskANN<D>
412where
413    D: Distance<f32> + Send + Sync + Copy + Clone + Default + 'static,
414{
415    /// Build a new incremental index with default parameters
416    pub fn build_default(
417        vectors: &[Vec<f32>],
418        file_path: &str,
419    ) -> Result<Self, DiskAnnError> {
420        Self::build_with_config(vectors, file_path, IncrementalConfig::default())
421    }
422
423    /// Open an existing index for incremental updates
424    pub fn open(path: &str) -> Result<Self, DiskAnnError> {
425        Self::open_with_config(path, IncrementalConfig::default())
426    }
427}
428
429impl<D> IncrementalDiskANN<D>
430where
431    D: Distance<f32> + Send + Sync + Copy + Clone + 'static,
432{
433    /// Build a new incremental index with custom configuration
434    pub fn build_with_config(
435        vectors: &[Vec<f32>],
436        file_path: &str,
437        config: IncrementalConfig,
438    ) -> Result<Self, DiskAnnError>
439    where
440        D: Default,
441    {
442        let dist = D::default();
443        let dim = vectors.first().map(|v| v.len()).unwrap_or(0);
444
445        let base = DiskANN::<D>::build_index_default(vectors, dist, file_path)?;
446
447        Ok(Self {
448            base: Some(base),
449            delta: RwLock::new(DeltaLayer::new(config.delta_params.max_degree)),
450            tombstones: RwLock::new(HashSet::new()),
451            dist,
452            config,
453            base_path: Some(file_path.to_string()),
454            dim,
455        })
456    }
457
458    /// Open an existing index with custom configuration
459    pub fn open_with_config(path: &str, config: IncrementalConfig) -> Result<Self, DiskAnnError>
460    where
461        D: Default,
462    {
463        let dist = D::default();
464        let base = DiskANN::<D>::open_index_default_metric(path)?;
465        let dim = base.dim;
466
467        Ok(Self {
468            base: Some(base),
469            delta: RwLock::new(DeltaLayer::new(config.delta_params.max_degree)),
470            tombstones: RwLock::new(HashSet::new()),
471            dist,
472            config,
473            base_path: Some(path.to_string()),
474            dim,
475        })
476    }
477
478    /// Create an empty incremental index (no base, delta-only)
479    pub fn new_empty(dim: usize, dist: D, config: IncrementalConfig) -> Self {
480        Self {
481            base: None,
482            delta: RwLock::new(DeltaLayer::new(config.delta_params.max_degree)),
483            tombstones: RwLock::new(HashSet::new()),
484            dist,
485            config,
486            base_path: None,
487            dim,
488        }
489    }
490
491    /// Add new vectors to the index. Returns their assigned IDs.
492    pub fn add_vectors(&self, vectors: &[Vec<f32>]) -> Result<Vec<u64>, DiskAnnError> {
493        if vectors.is_empty() {
494            return Ok(Vec::new());
495        }
496
497        // Validate dimensions
498        for (i, v) in vectors.iter().enumerate() {
499            if v.len() != self.dim {
500                return Err(DiskAnnError::IndexError(format!(
501                    "Vector {} has dimension {} but index expects {}",
502                    i, v.len(), self.dim
503                )));
504            }
505        }
506
507        let mut delta = self.delta.write().unwrap();
508        let ids = delta.add_vectors(vectors, self.dist);
509        Ok(ids)
510    }
511
512    /// Delete vectors by their IDs (lazy deletion via tombstones)
513    pub fn delete_vectors(&self, ids: &[u64]) -> Result<(), DiskAnnError> {
514        let mut tombstones = self.tombstones.write().unwrap();
515        for &id in ids {
516            tombstones.insert(id);
517        }
518        Ok(())
519    }
520
521    /// Check if a vector ID has been deleted
522    pub fn is_deleted(&self, id: u64) -> bool {
523        self.tombstones.read().unwrap().contains(&id)
524    }
525
526    /// Search the index, merging results from base and delta, excluding tombstones
527    pub fn search(&self, query: &[f32], k: usize, beam_width: usize) -> Vec<u64> {
528        self.search_with_dists(query, k, beam_width)
529            .into_iter()
530            .map(|(id, _)| id)
531            .collect()
532    }
533
534    /// Search returning (id, distance) pairs
535    pub fn search_with_dists(&self, query: &[f32], k: usize, beam_width: usize) -> Vec<(u64, f32)> {
536        let tombstones = self.tombstones.read().unwrap();
537        let delta = self.delta.read().unwrap();
538
539        // Collect candidates from both layers
540        let mut all_candidates: Vec<(u64, f32)> = Vec::with_capacity(k * 2);
541
542        // Search base index
543        if let Some(ref base) = self.base {
544            let base_results = base.search_with_dists(query, k + tombstones.len(), beam_width);
545            for (id, dist) in base_results {
546                let global_id = id as u64;
547                if !tombstones.contains(&global_id) {
548                    all_candidates.push((global_id, dist));
549                }
550            }
551        }
552
553        // Search delta layer
554        if !delta.is_empty() {
555            let delta_results = delta.search(query, k, beam_width, self.dist);
556            for (id, dist) in delta_results {
557                if !tombstones.contains(&id) {
558                    all_candidates.push((id, dist));
559                }
560            }
561        }
562
563        // Merge and return top-k
564        all_candidates.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
565        all_candidates.truncate(k);
566        all_candidates
567    }
568
569    /// Parallel batch search
570    pub fn search_batch(
571        &self,
572        queries: &[Vec<f32>],
573        k: usize,
574        beam_width: usize,
575    ) -> Vec<Vec<u64>> {
576        queries
577            .par_iter()
578            .map(|q| self.search(q, k, beam_width))
579            .collect()
580    }
581
582    /// Get a vector by its ID (works for both base and delta)
583    pub fn get_vector(&self, id: u64) -> Option<Vec<f32>> {
584        if is_delta_id(id) {
585            let delta = self.delta.read().unwrap();
586            delta.get_vector(delta_local_idx(id)).cloned()
587        } else if let Some(ref base) = self.base {
588            let idx = id as usize;
589            if idx < base.num_vectors {
590                Some(base.get_vector(idx))
591            } else {
592                None
593            }
594        } else {
595            None
596        }
597    }
598
599    /// Check if compaction is recommended
600    pub fn should_compact(&self) -> bool {
601        let delta = self.delta.read().unwrap();
602        let tombstones = self.tombstones.read().unwrap();
603
604        let base_size = self.base.as_ref().map(|b| b.num_vectors).unwrap_or(0);
605        let total_size = base_size + delta.len();
606
607        // Check delta threshold
608        if delta.len() >= self.config.delta_threshold {
609            return true;
610        }
611
612        // Check tombstone ratio
613        if total_size > 0 {
614            let tombstone_ratio = tombstones.len() as f32 / total_size as f32;
615            if tombstone_ratio >= self.config.tombstone_ratio_threshold {
616                return true;
617            }
618        }
619
620        false
621    }
622
623    /// Compact the index: merge base + delta, remove tombstones, write new file
624    pub fn compact(&mut self, new_path: &str) -> Result<(), DiskAnnError>
625    where
626        D: Default,
627    {
628        let tombstones = self.tombstones.read().unwrap().clone();
629        let delta = self.delta.read().unwrap();
630
631        // Collect all live vectors
632        let mut all_vectors: Vec<Vec<f32>> = Vec::new();
633
634        // Add base vectors (excluding tombstones)
635        if let Some(ref base) = self.base {
636            for i in 0..base.num_vectors {
637                if !tombstones.contains(&(i as u64)) {
638                    all_vectors.push(base.get_vector(i));
639                }
640            }
641        }
642
643        // Add delta vectors (excluding tombstones)
644        for (i, v) in delta.vectors.iter().enumerate() {
645            let global_id = DELTA_ID_OFFSET + i as u64;
646            if !tombstones.contains(&global_id) {
647                all_vectors.push(v.clone());
648            }
649        }
650
651        drop(delta);
652        drop(tombstones);
653
654        if all_vectors.is_empty() {
655            return Err(DiskAnnError::IndexError(
656                "Cannot compact: no vectors remaining after removing tombstones".to_string()
657            ));
658        }
659
660        // Build new index
661        let new_base = DiskANN::<D>::build_index_default(&all_vectors, self.dist, new_path)?;
662
663        // Replace state
664        self.base = Some(new_base);
665        self.delta = RwLock::new(DeltaLayer::new(self.config.delta_params.max_degree));
666        self.tombstones = RwLock::new(HashSet::new());
667        self.base_path = Some(new_path.to_string());
668
669        Ok(())
670    }
671
672    /// Serialize the entire incremental index to bytes.
673    ///
674    /// Format:
675    /// ```text
676    /// [has_base:u8][base_len:u64][base_bytes...]
677    /// [dim:u64]
678    /// [num_delta:u64][delta_vectors flat: num_delta * dim * f32]
679    /// [num_delta_graph:u64][for each: [deg:u32][neighbors: deg * u32]]
680    /// [entry_point:i64] (-1 if None)
681    /// [max_degree:u64]
682    /// [num_tombstones:u64][tombstone_ids: num_tombstones * u64]
683    /// ```
684    pub fn to_bytes(&self) -> Vec<u8> {
685        let delta = self.delta.read().unwrap();
686        let tombstones = self.tombstones.read().unwrap();
687
688        let mut out = Vec::new();
689
690        // Base index
691        if let Some(ref base) = self.base {
692            out.push(1u8);
693            let base_bytes = base.to_bytes();
694            out.extend_from_slice(&(base_bytes.len() as u64).to_le_bytes());
695            out.extend_from_slice(&base_bytes);
696        } else {
697            out.push(0u8);
698        }
699
700        // Dimension
701        out.extend_from_slice(&(self.dim as u64).to_le_bytes());
702
703        // Delta vectors
704        out.extend_from_slice(&(delta.vectors.len() as u64).to_le_bytes());
705        for v in &delta.vectors {
706            let bytes: &[u8] = bytemuck::cast_slice(v);
707            out.extend_from_slice(bytes);
708        }
709
710        // Delta graph
711        out.extend_from_slice(&(delta.graph.len() as u64).to_le_bytes());
712        for neighbors in &delta.graph {
713            out.extend_from_slice(&(neighbors.len() as u32).to_le_bytes());
714            let bytes: &[u8] = bytemuck::cast_slice(neighbors);
715            out.extend_from_slice(bytes);
716        }
717
718        // Entry point
719        let ep = delta.entry_point.map(|e| e as i64).unwrap_or(-1);
720        out.extend_from_slice(&ep.to_le_bytes());
721
722        // Max degree
723        out.extend_from_slice(&(delta.max_degree as u64).to_le_bytes());
724
725        // Tombstones
726        out.extend_from_slice(&(tombstones.len() as u64).to_le_bytes());
727        for &id in tombstones.iter() {
728            out.extend_from_slice(&id.to_le_bytes());
729        }
730
731        out
732    }
733
734    /// Load an incremental index from bytes.
735    pub fn from_bytes(bytes: &[u8], dist: D, config: IncrementalConfig) -> Result<Self, DiskAnnError> {
736        let mut pos = 0;
737
738        // Helper to read a slice
739        macro_rules! read_bytes {
740            ($n:expr) => {{
741                if pos + $n > bytes.len() {
742                    return Err(DiskAnnError::IndexError("Incremental buffer truncated".into()));
743                }
744                let slice = &bytes[pos..pos + $n];
745                pos += $n;
746                slice
747            }};
748        }
749
750        // Base index
751        let has_base = read_bytes!(1)[0];
752        let base = if has_base == 1 {
753            let base_len = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
754            let base_data = read_bytes!(base_len).to_vec();
755            Some(DiskANN::<D>::from_bytes(base_data, dist)?)
756        } else {
757            None
758        };
759
760        // Dimension
761        let dim = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
762
763        // Delta vectors
764        let num_delta = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
765        let mut delta_vectors = Vec::with_capacity(num_delta);
766        for _ in 0..num_delta {
767            let vbytes = read_bytes!(dim * 4);
768            let floats: Vec<f32> = vbytes
769                .chunks_exact(4)
770                .map(|c| f32::from_le_bytes(c.try_into().unwrap()))
771                .collect();
772            delta_vectors.push(floats);
773        }
774
775        // Delta graph
776        let num_graph = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
777        let mut delta_graph = Vec::with_capacity(num_graph);
778        for _ in 0..num_graph {
779            let deg = u32::from_le_bytes(read_bytes!(4).try_into().unwrap()) as usize;
780            let nbytes = read_bytes!(deg * 4);
781            let neighbors: Vec<u32> = nbytes
782                .chunks_exact(4)
783                .map(|c| u32::from_le_bytes(c.try_into().unwrap()))
784                .collect();
785            delta_graph.push(neighbors);
786        }
787
788        // Entry point
789        let ep = i64::from_le_bytes(read_bytes!(8).try_into().unwrap());
790        let entry_point = if ep >= 0 { Some(ep as u32) } else { None };
791
792        // Max degree
793        let max_degree = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
794
795        // Tombstones
796        let num_tombstones = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
797        let mut tombstones = HashSet::with_capacity(num_tombstones);
798        for _ in 0..num_tombstones {
799            let id = u64::from_le_bytes(read_bytes!(8).try_into().unwrap());
800            tombstones.insert(id);
801        }
802
803        Ok(Self {
804            base,
805            delta: RwLock::new(DeltaLayer {
806                vectors: delta_vectors,
807                graph: delta_graph,
808                entry_point,
809                max_degree,
810            }),
811            tombstones: RwLock::new(tombstones),
812            dist,
813            config,
814            base_path: None,
815            dim,
816        })
817    }
818
819    /// Get statistics about the index
820    pub fn stats(&self) -> IncrementalStats {
821        let delta = self.delta.read().unwrap();
822        let tombstones = self.tombstones.read().unwrap();
823        let base_count = self.base.as_ref().map(|b| b.num_vectors).unwrap_or(0);
824
825        IncrementalStats {
826            base_vectors: base_count,
827            delta_vectors: delta.len(),
828            tombstones: tombstones.len(),
829            total_live: base_count + delta.len() - tombstones.len(),
830            dim: self.dim,
831        }
832    }
833
834    /// Get the dimensionality of vectors in this index
835    pub fn dim(&self) -> usize {
836        self.dim
837    }
838}
839
840/// Statistics about an incremental index
841#[derive(Debug, Clone)]
842pub struct IncrementalStats {
843    pub base_vectors: usize,
844    pub delta_vectors: usize,
845    pub tombstones: usize,
846    pub total_live: usize,
847    pub dim: usize,
848}
849
850#[cfg(test)]
851mod tests {
852    use super::*;
853    use anndists::dist::DistL2;
854    use std::fs;
855
856    fn euclid(a: &[f32], b: &[f32]) -> f32 {
857        a.iter().zip(b).map(|(x, y)| (x - y).powi(2)).sum::<f32>().sqrt()
858    }
859
860    #[test]
861    fn test_incremental_basic() {
862        let path = "test_incremental_basic.db";
863        let _ = fs::remove_file(path);
864
865        // Build initial index
866        let vectors = vec![
867            vec![0.0, 0.0],
868            vec![1.0, 0.0],
869            vec![0.0, 1.0],
870            vec![1.0, 1.0],
871        ];
872
873        let index = IncrementalDiskANN::<DistL2>::build_default(&vectors, path).unwrap();
874
875        // Search should work
876        let results = index.search(&[0.1, 0.1], 2, 8);
877        assert_eq!(results.len(), 2);
878
879        let _ = fs::remove_file(path);
880    }
881
882    #[test]
883    fn test_incremental_add() {
884        let path = "test_incremental_add.db";
885        let _ = fs::remove_file(path);
886
887        let vectors = vec![
888            vec![0.0, 0.0],
889            vec![1.0, 0.0],
890        ];
891
892        let index = IncrementalDiskANN::<DistL2>::build_default(&vectors, path).unwrap();
893
894        // Add new vectors
895        let new_vecs = vec![vec![0.5, 0.5], vec![2.0, 2.0]];
896        let new_ids = index.add_vectors(&new_vecs).unwrap();
897        assert_eq!(new_ids.len(), 2);
898        assert!(is_delta_id(new_ids[0]));
899
900        // Search should find the new vector
901        let results = index.search_with_dists(&[0.5, 0.5], 1, 8);
902        assert!(!results.is_empty());
903
904        // The closest should be the one we just added at [0.5, 0.5]
905        let (_best_id, best_dist) = results[0];
906        assert!(best_dist < 0.01, "Expected to find [0.5, 0.5], got dist {}", best_dist);
907
908        let _ = fs::remove_file(path);
909    }
910
911    #[test]
912    fn test_incremental_delete() {
913        let path = "test_incremental_delete.db";
914        let _ = fs::remove_file(path);
915
916        let vectors = vec![
917            vec![0.0, 0.0],  // id 0
918            vec![1.0, 0.0],  // id 1
919            vec![0.0, 1.0],  // id 2
920        ];
921
922        let index = IncrementalDiskANN::<DistL2>::build_default(&vectors, path).unwrap();
923
924        // Delete vector 0
925        index.delete_vectors(&[0]).unwrap();
926        assert!(index.is_deleted(0));
927
928        // Search near [0,0] should not return id 0
929        let results = index.search(&[0.0, 0.0], 3, 8);
930        assert!(!results.contains(&0), "Deleted vector should not appear in results");
931
932        let _ = fs::remove_file(path);
933    }
934
935    #[test]
936    fn test_incremental_compact() {
937        let path1 = "test_compact_v1.db";
938        let path2 = "test_compact_v2.db";
939        let _ = fs::remove_file(path1);
940        let _ = fs::remove_file(path2);
941
942        let vectors = vec![
943            vec![0.0, 0.0],
944            vec![1.0, 0.0],
945            vec![0.0, 1.0],
946            vec![1.0, 1.0],
947        ];
948
949        let mut index = IncrementalDiskANN::<DistL2>::build_default(&vectors, path1).unwrap();
950
951        // Add some vectors
952        index.add_vectors(&[vec![2.0, 2.0], vec![3.0, 3.0]]).unwrap();
953
954        // Delete some
955        index.delete_vectors(&[0, 1]).unwrap();
956
957        let stats_before = index.stats();
958        assert_eq!(stats_before.base_vectors, 4);
959        assert_eq!(stats_before.delta_vectors, 2);
960        assert_eq!(stats_before.tombstones, 2);
961
962        // Compact
963        index.compact(path2).unwrap();
964
965        let stats_after = index.stats();
966        assert_eq!(stats_after.base_vectors, 4); // 4 base - 2 deleted + 2 delta = 4
967        assert_eq!(stats_after.delta_vectors, 0);
968        assert_eq!(stats_after.tombstones, 0);
969
970        // Search should still work
971        let results = index.search(&[2.0, 2.0], 1, 8);
972        assert!(!results.is_empty());
973
974        let _ = fs::remove_file(path1);
975        let _ = fs::remove_file(path2);
976    }
977
978    #[test]
979    fn test_delta_only() {
980        // Test index with no base, only delta
981        let config = IncrementalConfig::default();
982        let index = IncrementalDiskANN::<DistL2>::new_empty(2, DistL2 {}, config);
983
984        // Add vectors
985        let vecs = vec![
986            vec![0.0, 0.0],
987            vec![1.0, 0.0],
988            vec![0.0, 1.0],
989            vec![1.0, 1.0],
990            vec![0.5, 0.5],
991        ];
992        index.add_vectors(&vecs).unwrap();
993
994        // Search
995        let results = index.search_with_dists(&[0.5, 0.5], 3, 8);
996        assert_eq!(results.len(), 3);
997
998        // Closest should be [0.5, 0.5]
999        let best_vec = index.get_vector(results[0].0).unwrap();
1000        let dist = euclid(&best_vec, &[0.5, 0.5]);
1001        assert!(dist < 0.01);
1002    }
1003
1004    #[test]
1005    fn test_incremental_to_bytes_from_bytes() {
1006        let path = "test_incr_bytes_rt.db";
1007        let _ = fs::remove_file(path);
1008
1009        let vectors = vec![
1010            vec![0.0, 0.0],
1011            vec![1.0, 0.0],
1012            vec![0.0, 1.0],
1013            vec![1.0, 1.0],
1014        ];
1015
1016        let index = IncrementalDiskANN::<DistL2>::build_default(&vectors, path).unwrap();
1017
1018        // Add delta vectors
1019        index.add_vectors(&[vec![0.5, 0.5], vec![2.0, 2.0]]).unwrap();
1020
1021        // Delete one
1022        index.delete_vectors(&[0]).unwrap();
1023
1024        let bytes = index.to_bytes();
1025
1026        let index2 = IncrementalDiskANN::<DistL2>::from_bytes(
1027            &bytes, DistL2 {}, IncrementalConfig::default()
1028        ).unwrap();
1029
1030        let stats = index2.stats();
1031        assert_eq!(stats.base_vectors, 4);
1032        assert_eq!(stats.delta_vectors, 2);
1033        assert_eq!(stats.tombstones, 1);
1034
1035        // Search should exclude tombstone and include delta
1036        let results = index2.search(&[0.5, 0.5], 3, 8);
1037        assert!(!results.contains(&0), "Deleted vector should not appear");
1038
1039        let _ = fs::remove_file(path);
1040    }
1041}