1use crate::filtered::Filter;
57use crate::quantized::{QuantizerState, quantized_distance_from_codes};
58use crate::pq::{ProductQuantizer, PQConfig};
59use crate::sq::{F16Quantizer, Int8Quantizer, VectorQuantizer};
60use crate::{beam_search, BeamSearchConfig, GraphIndex, DiskANN, DiskAnnError, DiskAnnParams, PAD_U32};
61use anndists::prelude::Distance;
62use rayon::prelude::*;
63use std::collections::{BinaryHeap, HashSet};
64use std::cmp::{Ordering, Reverse};
65use std::sync::RwLock;
66
67const INCR_MAGIC: u32 = 0x494E4352;
69const INCR_FORMAT_VERSION: u32 = 1;
71
72#[derive(Clone, Copy, Debug)]
74pub struct IncrementalConfig {
75 pub delta_threshold: usize,
77 pub tombstone_ratio_threshold: f32,
79 pub delta_params: DiskAnnParams,
81}
82
83impl Default for IncrementalConfig {
84 fn default() -> Self {
85 Self {
86 delta_threshold: 10_000,
87 tombstone_ratio_threshold: 0.1,
88 delta_params: DiskAnnParams {
89 max_degree: 32, build_beam_width: 64,
91 alpha: 1.2,
92 },
93 }
94 }
95}
96
97#[derive(Clone, Copy, Debug)]
99pub struct IncrementalQuantizedConfig {
100 pub rerank_size: usize,
102}
103
104impl Default for IncrementalQuantizedConfig {
105 fn default() -> Self {
106 Self { rerank_size: 0 }
107 }
108}
109
110#[derive(Clone, Copy)]
112struct Candidate {
113 dist: f32,
114 id: u64, }
116
117impl PartialEq for Candidate {
118 fn eq(&self, other: &Self) -> bool {
119 self.dist == other.dist && self.id == other.id
120 }
121}
122impl Eq for Candidate {}
123impl PartialOrd for Candidate {
124 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
125 self.dist.partial_cmp(&other.dist)
126 }
127}
128impl Ord for Candidate {
129 fn cmp(&self, other: &Self) -> Ordering {
130 self.partial_cmp(other).unwrap_or(Ordering::Equal)
131 }
132}
133
134pub(crate) struct DeltaLayer {
136 pub(crate) vectors: Vec<Vec<f32>>,
138 pub(crate) graph: Vec<Vec<u32>>,
141 pub(crate) entry_point: Option<u32>,
143 pub(crate) max_degree: usize,
145}
146
147#[allow(dead_code)]
148impl DeltaLayer {
149 fn new(max_degree: usize) -> Self {
150 Self {
151 vectors: Vec::new(),
152 graph: Vec::new(),
153 entry_point: None,
154 max_degree,
155 }
156 }
157
158 fn len(&self) -> usize {
159 self.vectors.len()
160 }
161
162 fn is_empty(&self) -> bool {
163 self.vectors.is_empty()
164 }
165
166 fn add_vectors<D: Distance<f32> + Copy + Sync>(
168 &mut self,
169 vectors: &[Vec<f32>],
170 dist: D,
171 ) -> Vec<u64> {
172 let start_idx = self.vectors.len();
173 let mut new_ids = Vec::with_capacity(vectors.len());
174
175 for (i, v) in vectors.iter().enumerate() {
176 let local_idx = start_idx + i;
177 let global_id = DELTA_ID_OFFSET + local_idx as u64;
179 new_ids.push(global_id);
180
181 self.vectors.push(v.clone());
182 self.graph.push(Vec::new());
183
184 if local_idx > 0 {
186 let neighbors = self.find_and_prune_neighbors(local_idx, dist);
187 self.graph[local_idx] = neighbors.clone();
188
189 for &nb in &neighbors {
191 let nb_idx = nb as usize;
192 if !self.graph[nb_idx].contains(&(local_idx as u32))
193 && self.graph[nb_idx].len() < self.max_degree
194 {
195 self.graph[nb_idx].push(local_idx as u32);
196 }
197 }
198 }
199
200 if self.entry_point.is_none() {
202 self.entry_point = Some(0);
203 }
204 }
205
206 if self.vectors.len() > 1 {
208 self.entry_point = Some(self.compute_medoid(dist));
209 }
210
211 new_ids
212 }
213
214 fn compute_medoid<D: Distance<f32> + Copy + Sync>(&self, dist: D) -> u32 {
215 if self.vectors.is_empty() {
216 return 0;
217 }
218
219 let dim = self.vectors[0].len();
221 let mut centroid = vec![0.0f32; dim];
222 for v in &self.vectors {
223 for (i, &val) in v.iter().enumerate() {
224 centroid[i] += val;
225 }
226 }
227 for val in &mut centroid {
228 *val /= self.vectors.len() as f32;
229 }
230
231 let (best_idx, _) = self.vectors
233 .iter()
234 .enumerate()
235 .map(|(idx, v)| (idx, dist.eval(¢roid, v)))
236 .min_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
237 .unwrap_or((0, f32::MAX));
238
239 best_idx as u32
240 }
241
242 fn find_and_prune_neighbors<D: Distance<f32> + Copy>(
243 &self,
244 node_idx: usize,
245 dist: D,
246 ) -> Vec<u32> {
247 let query = &self.vectors[node_idx];
248 let beam_width = (self.max_degree * 2).max(16);
249
250 let candidates = if let Some(entry) = self.entry_point {
252 self.greedy_search_internal(query, entry as usize, beam_width, dist)
253 } else {
254 self.vectors.iter()
256 .enumerate()
257 .filter(|(i, _)| *i != node_idx)
258 .map(|(i, v)| (i as u32, dist.eval(query, v)))
259 .collect()
260 };
261
262 self.prune_neighbors(node_idx, &candidates, dist)
264 }
265
266 fn greedy_search_internal<D: Distance<f32> + Copy>(
267 &self,
268 query: &[f32],
269 start: usize,
270 beam_width: usize,
271 dist: D,
272 ) -> Vec<(u32, f32)> {
273 if self.vectors.is_empty() || start >= self.vectors.len() {
274 return Vec::new();
275 }
276
277 let mut visited = HashSet::new();
278 let mut frontier: BinaryHeap<Reverse<Candidate>> = BinaryHeap::new();
279 let mut results: BinaryHeap<Candidate> = BinaryHeap::new();
280
281 let start_dist = dist.eval(query, &self.vectors[start]);
282 let start_cand = Candidate { dist: start_dist, id: start as u64 };
283 frontier.push(Reverse(start_cand));
284 results.push(start_cand);
285 visited.insert(start);
286
287 while let Some(Reverse(best)) = frontier.peek().copied() {
288 if results.len() >= beam_width {
289 if let Some(worst) = results.peek() {
290 if best.dist >= worst.dist {
291 break;
292 }
293 }
294 }
295 let Reverse(current) = frontier.pop().unwrap();
296 let cur_idx = current.id as usize;
297
298 if cur_idx < self.graph.len() {
299 for &nb in &self.graph[cur_idx] {
300 let nb_idx = nb as usize;
301 if !visited.insert(nb_idx) {
302 continue;
303 }
304 if nb_idx >= self.vectors.len() {
305 continue;
306 }
307
308 let d = dist.eval(query, &self.vectors[nb_idx]);
309 let cand = Candidate { dist: d, id: nb as u64 };
310
311 if results.len() < beam_width {
312 results.push(cand);
313 frontier.push(Reverse(cand));
314 } else if d < results.peek().unwrap().dist {
315 results.pop();
316 results.push(cand);
317 frontier.push(Reverse(cand));
318 }
319 }
320 }
321 }
322
323 results.into_vec()
324 .into_iter()
325 .map(|c| (c.id as u32, c.dist))
326 .collect()
327 }
328
329 fn prune_neighbors<D: Distance<f32> + Copy>(
330 &self,
331 node_idx: usize,
332 candidates: &[(u32, f32)],
333 dist: D,
334 ) -> Vec<u32> {
335 if candidates.is_empty() {
336 return Vec::new();
337 }
338
339 let alpha = 1.2f32;
340 let mut sorted = candidates.to_vec();
341 sorted.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
342
343 let mut pruned = Vec::new();
344
345 for &(cand_id, cand_dist) in &sorted {
346 if cand_id as usize == node_idx {
347 continue;
348 }
349
350 let mut ok = true;
351 for &sel in &pruned {
352 let d = dist.eval(
353 &self.vectors[cand_id as usize],
354 &self.vectors[sel as usize],
355 );
356 if d < alpha * cand_dist {
357 ok = false;
358 break;
359 }
360 }
361
362 if ok {
363 pruned.push(cand_id);
364 if pruned.len() >= self.max_degree {
365 break;
366 }
367 }
368 }
369
370 pruned
371 }
372
373 fn search<D: Distance<f32> + Copy>(
374 &self,
375 query: &[f32],
376 k: usize,
377 beam_width: usize,
378 dist: D,
379 ) -> Vec<(u64, f32)> {
380 if self.vectors.is_empty() {
381 return Vec::new();
382 }
383
384 let entry = self.entry_point.unwrap_or(0) as usize;
385 let mut results = self.greedy_search_internal(query, entry, beam_width, dist);
386 results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
387 results.truncate(k);
388
389 results.into_iter()
391 .map(|(local_id, d)| (DELTA_ID_OFFSET + local_id as u64, d))
392 .collect()
393 }
394
395 fn get_vector(&self, local_idx: usize) -> Option<&Vec<f32>> {
396 self.vectors.get(local_idx)
397 }
398}
399
400const DELTA_ID_OFFSET: u64 = 1u64 << 48;
402
403#[inline]
405pub fn is_delta_id(id: u64) -> bool {
406 id >= DELTA_ID_OFFSET
407}
408
409#[inline]
411pub fn delta_local_idx(id: u64) -> usize {
412 (id - DELTA_ID_OFFSET) as usize
413}
414
415pub(crate) struct UnifiedView<'a, D: Distance<f32> + Copy + Send + Sync + 'static> {
425 base: Option<&'a DiskANN<D>>,
426 delta: &'a DeltaLayer,
427 tombstones: &'a HashSet<u64>,
428 dist: D,
429 base_count: usize,
430}
431
432impl<'a, D: Distance<f32> + Copy + Send + Sync + 'static> UnifiedView<'a, D> {
433 fn new(
434 base: Option<&'a DiskANN<D>>,
435 delta: &'a DeltaLayer,
436 tombstones: &'a HashSet<u64>,
437 dist: D,
438 ) -> Self {
439 let base_count = base.map(|b| b.num_vectors).unwrap_or(0);
440 Self { base, delta, tombstones, dist, base_count }
441 }
442
443 fn entry_points(&self) -> Vec<u32> {
445 let mut seeds = Vec::with_capacity(2);
446 if let Some(base) = self.base {
447 seeds.push(base.medoid_id);
448 }
449 if let Some(ep) = self.delta.entry_point {
450 seeds.push(self.base_count as u32 + ep);
451 }
452 seeds
453 }
454
455 fn to_global_u64(&self, id: u32) -> u64 {
457 let id_usize = id as usize;
458 if id_usize < self.base_count {
459 id_usize as u64
460 } else {
461 DELTA_ID_OFFSET + (id_usize - self.base_count) as u64
462 }
463 }
464}
465
466impl<'a, D: Distance<f32> + Copy + Send + Sync + 'static> GraphIndex for UnifiedView<'a, D> {
467 fn num_vectors(&self) -> usize {
468 self.base_count + self.delta.len()
469 }
470
471 fn dim(&self) -> usize {
472 if let Some(base) = self.base {
473 base.dim
474 } else if !self.delta.vectors.is_empty() {
475 self.delta.vectors[0].len()
476 } else {
477 0
478 }
479 }
480
481 fn entry_point(&self) -> u32 {
482 if let Some(base) = self.base {
484 base.medoid_id
485 } else if let Some(ep) = self.delta.entry_point {
486 self.base_count as u32 + ep
487 } else {
488 0
489 }
490 }
491
492 fn distance_to(&self, query: &[f32], id: u32) -> f32 {
493 let id_usize = id as usize;
494 if id_usize < self.base_count {
495 self.base.unwrap().distance_to(query, id_usize)
496 } else {
497 let delta_idx = id_usize - self.base_count;
498 self.dist.eval(query, &self.delta.vectors[delta_idx])
499 }
500 }
501
502 fn get_neighbors(&self, id: u32) -> Vec<u32> {
503 let id_usize = id as usize;
504 if id_usize < self.base_count {
505 self.base
506 .unwrap()
507 .get_neighbors(id)
508 .iter()
509 .copied()
510 .filter(|&nb| nb != PAD_U32)
511 .collect()
512 } else {
513 let delta_idx = id_usize - self.base_count;
514 if delta_idx < self.delta.graph.len() {
515 self.delta.graph[delta_idx]
517 .iter()
518 .map(|&nb| nb + self.base_count as u32)
519 .collect()
520 } else {
521 Vec::new()
522 }
523 }
524 }
525
526 fn get_vector(&self, id: u32) -> Vec<f32> {
527 let id_usize = id as usize;
528 if id_usize < self.base_count {
529 self.base.unwrap().get_vector(id_usize)
530 } else {
531 let delta_idx = id_usize - self.base_count;
532 self.delta.vectors[delta_idx].clone()
533 }
534 }
535
536 fn is_live(&self, id: u32) -> bool {
537 let global = if (id as usize) < self.base_count {
538 id as u64
539 } else {
540 DELTA_ID_OFFSET + (id as usize - self.base_count) as u64
541 };
542 !self.tombstones.contains(&global)
543 }
544}
545
546pub enum QuantizerKind {
552 F16,
553 Int8,
554 PQ(PQConfig),
555}
556
557pub struct IncrementalDiskANN<D>
563where
564 D: Distance<f32> + Send + Sync + Copy + Clone + 'static,
565{
566 base: Option<DiskANN<D>>,
568 delta: RwLock<DeltaLayer>,
570 tombstones: RwLock<HashSet<u64>>,
572 dist: D,
574 config: IncrementalConfig,
576 base_path: Option<String>,
578 dim: usize,
580 base_labels: Option<Vec<Vec<u64>>>,
582 delta_labels: RwLock<Vec<Vec<u64>>>,
583 num_label_fields: usize,
584 quantizer: Option<QuantizerState>,
586 base_codes: Option<Vec<u8>>,
587 code_size: usize,
588 rerank_size: usize,
589}
590
591impl<D> IncrementalDiskANN<D>
592where
593 D: Distance<f32> + Send + Sync + Copy + Clone + Default + 'static,
594{
595 pub fn build_default(
597 vectors: &[Vec<f32>],
598 file_path: &str,
599 ) -> Result<Self, DiskAnnError> {
600 Self::build_with_config(vectors, file_path, IncrementalConfig::default())
601 }
602
603 pub fn open(path: &str) -> Result<Self, DiskAnnError> {
605 Self::open_with_config(path, IncrementalConfig::default())
606 }
607}
608
609impl<D> IncrementalDiskANN<D>
610where
611 D: Distance<f32> + Send + Sync + Copy + Clone + 'static,
612{
613 pub fn build_with_config(
615 vectors: &[Vec<f32>],
616 file_path: &str,
617 config: IncrementalConfig,
618 ) -> Result<Self, DiskAnnError>
619 where
620 D: Default,
621 {
622 let dist = D::default();
623 let dim = vectors.first().map(|v| v.len()).unwrap_or(0);
624
625 let base = DiskANN::<D>::build_index_default(vectors, dist, file_path)?;
626
627 Ok(Self {
628 base: Some(base),
629 delta: RwLock::new(DeltaLayer::new(config.delta_params.max_degree)),
630 tombstones: RwLock::new(HashSet::new()),
631 dist,
632 config,
633 base_path: Some(file_path.to_string()),
634 dim,
635 base_labels: None,
636 delta_labels: RwLock::new(Vec::new()),
637 num_label_fields: 0,
638 quantizer: None,
639 base_codes: None,
640 code_size: 0,
641 rerank_size: 0,
642 })
643 }
644
645 pub fn open_with_config(path: &str, config: IncrementalConfig) -> Result<Self, DiskAnnError>
647 where
648 D: Default,
649 {
650 let dist = D::default();
651 let base = DiskANN::<D>::open_index_default_metric(path)?;
652 let dim = base.dim;
653
654 Ok(Self {
655 base: Some(base),
656 delta: RwLock::new(DeltaLayer::new(config.delta_params.max_degree)),
657 tombstones: RwLock::new(HashSet::new()),
658 dist,
659 config,
660 base_path: Some(path.to_string()),
661 dim,
662 base_labels: None,
663 delta_labels: RwLock::new(Vec::new()),
664 num_label_fields: 0,
665 quantizer: None,
666 base_codes: None,
667 code_size: 0,
668 rerank_size: 0,
669 })
670 }
671
672 pub fn new_empty(dim: usize, dist: D, config: IncrementalConfig) -> Self {
674 Self {
675 base: None,
676 delta: RwLock::new(DeltaLayer::new(config.delta_params.max_degree)),
677 tombstones: RwLock::new(HashSet::new()),
678 dist,
679 config,
680 base_path: None,
681 dim,
682 base_labels: None,
683 delta_labels: RwLock::new(Vec::new()),
684 num_label_fields: 0,
685 quantizer: None,
686 base_codes: None,
687 code_size: 0,
688 rerank_size: 0,
689 }
690 }
691
692 pub fn build_with_labels(
698 vectors: &[Vec<f32>],
699 labels: &[Vec<u64>],
700 file_path: &str,
701 config: IncrementalConfig,
702 ) -> Result<Self, DiskAnnError>
703 where
704 D: Default,
705 {
706 if vectors.len() != labels.len() {
707 return Err(DiskAnnError::IndexError(format!(
708 "vectors.len() ({}) != labels.len() ({})",
709 vectors.len(),
710 labels.len()
711 )));
712 }
713 let num_fields = labels.first().map(|l| l.len()).unwrap_or(0);
714 let mut idx = Self::build_with_config(vectors, file_path, config)?;
715 idx.base_labels = Some(labels.to_vec());
716 idx.num_label_fields = num_fields;
717 Ok(idx)
718 }
719
720 pub fn build_quantized_f16(
726 vectors: &[Vec<f32>],
727 file_path: &str,
728 config: IncrementalConfig,
729 quant_config: IncrementalQuantizedConfig,
730 ) -> Result<Self, DiskAnnError>
731 where
732 D: Default,
733 {
734 let dim = vectors.first().map(|v| v.len()).unwrap_or(0);
735 let mut idx = Self::build_with_config(vectors, file_path, config)?;
736 let f16q = F16Quantizer::new(dim);
737 let code_size = dim * 2;
738 let codes = encode_all_vecs(vectors, &f16q, code_size);
739 idx.quantizer = Some(QuantizerState::F16(f16q));
740 idx.base_codes = Some(codes);
741 idx.code_size = code_size;
742 idx.rerank_size = quant_config.rerank_size;
743 Ok(idx)
744 }
745
746 pub fn build_quantized_int8(
748 vectors: &[Vec<f32>],
749 file_path: &str,
750 config: IncrementalConfig,
751 quant_config: IncrementalQuantizedConfig,
752 ) -> Result<Self, DiskAnnError>
753 where
754 D: Default,
755 {
756 let mut idx = Self::build_with_config(vectors, file_path, config)?;
757 let int8q = Int8Quantizer::train(vectors)?;
758 let code_size = int8q.dim();
759 let codes = encode_all_vecs(vectors, &int8q, code_size);
760 idx.quantizer = Some(QuantizerState::Int8(int8q));
761 idx.base_codes = Some(codes);
762 idx.code_size = code_size;
763 idx.rerank_size = quant_config.rerank_size;
764 Ok(idx)
765 }
766
767 pub fn build_quantized_pq(
769 vectors: &[Vec<f32>],
770 file_path: &str,
771 config: IncrementalConfig,
772 pq_config: PQConfig,
773 quant_config: IncrementalQuantizedConfig,
774 ) -> Result<Self, DiskAnnError>
775 where
776 D: Default,
777 {
778 let mut idx = Self::build_with_config(vectors, file_path, config)?;
779 let pq = ProductQuantizer::train(vectors, pq_config)?;
780 let code_size = pq.stats().code_size_bytes;
781 let codes = encode_all_pq_vecs(vectors, &pq, code_size);
782 idx.quantizer = Some(QuantizerState::PQ(pq));
783 idx.base_codes = Some(codes);
784 idx.code_size = code_size;
785 idx.rerank_size = quant_config.rerank_size;
786 Ok(idx)
787 }
788
789 pub fn build_full(
791 vectors: &[Vec<f32>],
792 labels: &[Vec<u64>],
793 file_path: &str,
794 config: IncrementalConfig,
795 quantizer_kind: QuantizerKind,
796 quant_config: IncrementalQuantizedConfig,
797 ) -> Result<Self, DiskAnnError>
798 where
799 D: Default,
800 {
801 if vectors.len() != labels.len() {
802 return Err(DiskAnnError::IndexError(format!(
803 "vectors.len() ({}) != labels.len() ({})",
804 vectors.len(),
805 labels.len()
806 )));
807 }
808 let num_fields = labels.first().map(|l| l.len()).unwrap_or(0);
809 let dim = vectors.first().map(|v| v.len()).unwrap_or(0);
810
811 let mut idx = Self::build_with_config(vectors, file_path, config)?;
812 idx.base_labels = Some(labels.to_vec());
813 idx.num_label_fields = num_fields;
814 idx.rerank_size = quant_config.rerank_size;
815
816 match quantizer_kind {
817 QuantizerKind::F16 => {
818 let f16q = F16Quantizer::new(dim);
819 let code_size = dim * 2;
820 let codes = encode_all_vecs(vectors, &f16q, code_size);
821 idx.quantizer = Some(QuantizerState::F16(f16q));
822 idx.base_codes = Some(codes);
823 idx.code_size = code_size;
824 }
825 QuantizerKind::Int8 => {
826 let int8q = Int8Quantizer::train(vectors)?;
827 let code_size = int8q.dim();
828 let codes = encode_all_vecs(vectors, &int8q, code_size);
829 idx.quantizer = Some(QuantizerState::Int8(int8q));
830 idx.base_codes = Some(codes);
831 idx.code_size = code_size;
832 }
833 QuantizerKind::PQ(pq_config) => {
834 let pq = ProductQuantizer::train(vectors, pq_config)?;
835 let code_size = pq.stats().code_size_bytes;
836 let codes = encode_all_pq_vecs(vectors, &pq, code_size);
837 idx.quantizer = Some(QuantizerState::PQ(pq));
838 idx.base_codes = Some(codes);
839 idx.code_size = code_size;
840 }
841 }
842
843 Ok(idx)
844 }
845
846 pub fn add_vectors(&self, vectors: &[Vec<f32>]) -> Result<Vec<u64>, DiskAnnError> {
852 if vectors.is_empty() {
853 return Ok(Vec::new());
854 }
855
856 for (i, v) in vectors.iter().enumerate() {
858 if v.len() != self.dim {
859 return Err(DiskAnnError::IndexError(format!(
860 "Vector {} has dimension {} but index expects {}",
861 i, v.len(), self.dim
862 )));
863 }
864 }
865
866 let mut delta = self.delta.write().unwrap();
868 if self.num_label_fields > 0 {
869 let mut delta_labels = self.delta_labels.write().unwrap();
870 for _ in 0..vectors.len() {
871 delta_labels.push(vec![0u64; self.num_label_fields]);
872 }
873 }
874 let ids = delta.add_vectors(vectors, self.dist);
875 Ok(ids)
876 }
877
878 pub fn add_vectors_with_labels(
880 &self,
881 vectors: &[Vec<f32>],
882 labels: &[Vec<u64>],
883 ) -> Result<Vec<u64>, DiskAnnError> {
884 if vectors.is_empty() {
885 return Ok(Vec::new());
886 }
887 if vectors.len() != labels.len() {
888 return Err(DiskAnnError::IndexError(format!(
889 "vectors.len() ({}) != labels.len() ({})",
890 vectors.len(),
891 labels.len()
892 )));
893 }
894
895 for (i, v) in vectors.iter().enumerate() {
897 if v.len() != self.dim {
898 return Err(DiskAnnError::IndexError(format!(
899 "Vector {} has dimension {} but index expects {}",
900 i, v.len(), self.dim
901 )));
902 }
903 }
904
905 for (i, l) in labels.iter().enumerate() {
907 if self.num_label_fields > 0 && l.len() != self.num_label_fields {
908 return Err(DiskAnnError::IndexError(format!(
909 "Label {} has {} fields, expected {}",
910 i, l.len(), self.num_label_fields
911 )));
912 }
913 }
914
915 let mut delta = self.delta.write().unwrap();
917 let mut delta_labels = self.delta_labels.write().unwrap();
918 delta_labels.extend_from_slice(labels);
919 let ids = delta.add_vectors(vectors, self.dist);
920 Ok(ids)
921 }
922
923 pub fn delete_vectors(&self, ids: &[u64]) -> Result<(), DiskAnnError> {
925 let mut tombstones = self.tombstones.write().unwrap();
926 for &id in ids {
927 tombstones.insert(id);
928 }
929 Ok(())
930 }
931
932 pub fn is_deleted(&self, id: u64) -> bool {
934 self.tombstones.read().unwrap().contains(&id)
935 }
936
937 pub fn search(&self, query: &[f32], k: usize, beam_width: usize) -> Vec<u64> {
943 self.search_with_dists(query, k, beam_width)
944 .into_iter()
945 .map(|(id, _)| id)
946 .collect()
947 }
948
949 pub fn search_with_dists(&self, query: &[f32], k: usize, beam_width: usize) -> Vec<(u64, f32)> {
951 let tombstones = self.tombstones.read().unwrap();
952 let delta = self.delta.read().unwrap();
953 let view = UnifiedView::new(self.base.as_ref(), &delta, &tombstones, self.dist);
954 let start_ids = view.entry_points();
955
956 if start_ids.is_empty() {
957 return Vec::new();
958 }
959
960 if let (Some(ref quantizer), Some(ref base_codes)) = (&self.quantizer, &self.base_codes) {
963 let base_count = view.base_count;
964 let code_size = self.code_size;
965 let rerank_size = self.rerank_size;
966
967 let pq_table: Option<Vec<f32>> = match quantizer {
969 QuantizerState::PQ(pq) => Some(pq.create_distance_table(query)),
970 _ => None,
971 };
972
973 let search_k = if rerank_size > 0 { rerank_size.max(k) } else { k };
974
975 let tombstone_count = tombstones.len();
977 let expanded = if tombstone_count > 0 {
978 Some((beam_width * 2).max(search_k + tombstone_count))
979 } else {
980 None
981 };
982
983 let mut results = beam_search(
984 &start_ids,
985 beam_width,
986 search_k,
987 |id| {
988 let id_usize = id as usize;
989 if id_usize < base_count {
990 quantized_distance_from_codes(
992 query, id_usize, base_codes, code_size, quantizer, pq_table.as_deref(),
993 )
994 } else {
995 view.distance_to(query, id)
997 }
998 },
999 |id| view.get_neighbors(id),
1000 |id| view.is_live(id),
1001 BeamSearchConfig {
1002 expanded_beam: expanded,
1003 max_iterations: expanded.map(|e| e * 2),
1004 early_term_factor: if tombstone_count > 0 { Some(1.5) } else { None },
1005 },
1006 );
1007
1008 if rerank_size > 0 {
1010 results = results
1011 .iter()
1012 .map(|&(id, _)| {
1013 let exact_dist = view.distance_to(query, id);
1014 (id, exact_dist)
1015 })
1016 .collect();
1017 results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
1018 results.truncate(k);
1019 }
1020
1021 return results
1022 .into_iter()
1023 .map(|(id, d)| (view.to_global_u64(id), d))
1024 .collect();
1025 }
1026
1027 let tombstone_count = tombstones.len();
1032 let expanded = if tombstone_count > 0 {
1033 Some((beam_width * 2).max(k + tombstone_count))
1034 } else {
1035 None
1036 };
1037
1038 let results = beam_search(
1039 &start_ids,
1040 beam_width,
1041 k,
1042 |id| view.distance_to(query, id),
1043 |id| view.get_neighbors(id),
1044 |id| view.is_live(id),
1045 BeamSearchConfig {
1046 expanded_beam: expanded,
1047 max_iterations: expanded.map(|e| e * 2),
1048 early_term_factor: if tombstone_count > 0 { Some(1.5) } else { None },
1049 },
1050 );
1051
1052 results
1053 .into_iter()
1054 .map(|(id, d)| (view.to_global_u64(id), d))
1055 .collect()
1056 }
1057
1058 pub fn search_filtered(
1060 &self,
1061 query: &[f32],
1062 k: usize,
1063 beam_width: usize,
1064 filter: &Filter,
1065 ) -> Vec<u64> {
1066 self.search_filtered_with_dists(query, k, beam_width, filter)
1067 .into_iter()
1068 .map(|(id, _)| id)
1069 .collect()
1070 }
1071
1072 pub fn search_filtered_with_dists(
1074 &self,
1075 query: &[f32],
1076 k: usize,
1077 beam_width: usize,
1078 filter: &Filter,
1079 ) -> Vec<(u64, f32)> {
1080 if matches!(filter, Filter::None) || self.base_labels.is_none() {
1082 return self.search_with_dists(query, k, beam_width);
1083 }
1084
1085 let tombstones = self.tombstones.read().unwrap();
1086 let delta = self.delta.read().unwrap();
1087 let delta_labels = self.delta_labels.read().unwrap();
1088 let view = UnifiedView::new(self.base.as_ref(), &delta, &tombstones, self.dist);
1089 let start_ids = view.entry_points();
1090
1091 if start_ids.is_empty() {
1092 return Vec::new();
1093 }
1094
1095 let base_labels = self.base_labels.as_ref().unwrap();
1097 let combined_labels: Vec<Vec<u64>> = base_labels
1098 .iter()
1099 .chain(delta_labels.iter())
1100 .cloned()
1101 .collect();
1102
1103 let expanded_beam = (beam_width * 4).max(k * 10);
1106
1107 let results = beam_search(
1108 &start_ids,
1109 beam_width,
1110 k,
1111 |id| view.distance_to(query, id),
1112 |id| view.get_neighbors(id),
1113 |id| {
1114 if !view.is_live(id) {
1115 return false;
1116 }
1117 let idx = id as usize;
1118 if idx < combined_labels.len() {
1119 filter.matches(&combined_labels[idx])
1120 } else {
1121 false
1122 }
1123 },
1124 BeamSearchConfig {
1125 expanded_beam: Some(expanded_beam),
1126 max_iterations: Some(expanded_beam * 2),
1127 early_term_factor: Some(1.5),
1128 },
1129 );
1130
1131 results
1132 .into_iter()
1133 .map(|(id, d)| (view.to_global_u64(id), d))
1134 .collect()
1135 }
1136
1137 pub fn search_batch(
1139 &self,
1140 queries: &[Vec<f32>],
1141 k: usize,
1142 beam_width: usize,
1143 ) -> Vec<Vec<u64>> {
1144 queries
1145 .par_iter()
1146 .map(|q| self.search(q, k, beam_width))
1147 .collect()
1148 }
1149
1150 pub fn get_vector(&self, id: u64) -> Option<Vec<f32>> {
1152 if is_delta_id(id) {
1153 let delta = self.delta.read().unwrap();
1154 delta.get_vector(delta_local_idx(id)).cloned()
1155 } else if let Some(ref base) = self.base {
1156 let idx = id as usize;
1157 if idx < base.num_vectors {
1158 Some(base.get_vector(idx))
1159 } else {
1160 None
1161 }
1162 } else {
1163 None
1164 }
1165 }
1166
1167 pub fn should_compact(&self) -> bool {
1169 let delta = self.delta.read().unwrap();
1170 let tombstones = self.tombstones.read().unwrap();
1171
1172 let base_size = self.base.as_ref().map(|b| b.num_vectors).unwrap_or(0);
1173 let total_size = base_size + delta.len();
1174
1175 if delta.len() >= self.config.delta_threshold {
1177 return true;
1178 }
1179
1180 if total_size > 0 {
1182 let tombstone_ratio = tombstones.len() as f32 / total_size as f32;
1183 if tombstone_ratio >= self.config.tombstone_ratio_threshold {
1184 return true;
1185 }
1186 }
1187
1188 false
1189 }
1190
1191 pub fn compact(&mut self, new_path: &str) -> Result<(), DiskAnnError>
1193 where
1194 D: Default,
1195 {
1196 let tombstones = self.tombstones.read().unwrap().clone();
1197 let delta = self.delta.read().unwrap();
1198 let delta_labels = self.delta_labels.read().unwrap();
1199
1200 let mut all_vectors: Vec<Vec<f32>> = Vec::new();
1202 let mut all_labels: Option<Vec<Vec<u64>>> = if self.base_labels.is_some() {
1203 Some(Vec::new())
1204 } else {
1205 None
1206 };
1207
1208 if let Some(ref base) = self.base {
1210 for i in 0..base.num_vectors {
1211 if !tombstones.contains(&(i as u64)) {
1212 all_vectors.push(base.get_vector(i));
1213 if let (Some(ref mut al), Some(ref bl)) = (&mut all_labels, &self.base_labels) {
1214 al.push(bl[i].clone());
1215 }
1216 }
1217 }
1218 }
1219
1220 for (i, v) in delta.vectors.iter().enumerate() {
1222 let global_id = DELTA_ID_OFFSET + i as u64;
1223 if !tombstones.contains(&global_id) {
1224 all_vectors.push(v.clone());
1225 if let Some(ref mut al) = all_labels {
1226 if i < delta_labels.len() {
1227 al.push(delta_labels[i].clone());
1228 } else {
1229 al.push(vec![0u64; self.num_label_fields]);
1230 }
1231 }
1232 }
1233 }
1234
1235 drop(delta);
1236 drop(delta_labels);
1237 drop(tombstones);
1238
1239 if all_vectors.is_empty() {
1240 return Err(DiskAnnError::IndexError(
1241 "Cannot compact: no vectors remaining after removing tombstones".to_string()
1242 ));
1243 }
1244
1245 let new_base = DiskANN::<D>::build_index_default(&all_vectors, self.dist, new_path)?;
1247
1248 let new_codes = if let Some(ref quantizer) = self.quantizer {
1250 let codes = match quantizer {
1251 QuantizerState::PQ(pq) => encode_all_pq_vecs(&all_vectors, pq, self.code_size),
1252 QuantizerState::F16(f16q) => encode_all_vecs(&all_vectors, f16q, self.code_size),
1253 QuantizerState::Int8(int8q) => encode_all_vecs(&all_vectors, int8q, self.code_size),
1254 };
1255 Some(codes)
1256 } else {
1257 None
1258 };
1259
1260 self.base = Some(new_base);
1262 self.delta = RwLock::new(DeltaLayer::new(self.config.delta_params.max_degree));
1263 self.tombstones = RwLock::new(HashSet::new());
1264 self.base_path = Some(new_path.to_string());
1265 self.base_labels = all_labels;
1266 self.delta_labels = RwLock::new(Vec::new());
1267 self.base_codes = new_codes;
1268
1269 Ok(())
1270 }
1271
1272 pub fn to_bytes(&self) -> Vec<u8> {
1302 let delta = self.delta.read().unwrap();
1303 let tombstones = self.tombstones.read().unwrap();
1304 let delta_labels = self.delta_labels.read().unwrap();
1305
1306 let mut out = Vec::new();
1307
1308 out.extend_from_slice(&INCR_MAGIC.to_le_bytes());
1310 out.extend_from_slice(&INCR_FORMAT_VERSION.to_le_bytes());
1311
1312 if let Some(ref base) = self.base {
1314 out.push(1u8);
1315 let base_bytes = base.to_bytes();
1316 out.extend_from_slice(&(base_bytes.len() as u64).to_le_bytes());
1317 out.extend_from_slice(&base_bytes);
1318 } else {
1319 out.push(0u8);
1320 }
1321
1322 out.extend_from_slice(&(self.dim as u64).to_le_bytes());
1324
1325 out.extend_from_slice(&(delta.vectors.len() as u64).to_le_bytes());
1327 for v in &delta.vectors {
1328 let bytes: &[u8] = bytemuck::cast_slice(v);
1329 out.extend_from_slice(bytes);
1330 }
1331
1332 out.extend_from_slice(&(delta.graph.len() as u64).to_le_bytes());
1334 for neighbors in &delta.graph {
1335 out.extend_from_slice(&(neighbors.len() as u32).to_le_bytes());
1336 let bytes: &[u8] = bytemuck::cast_slice(neighbors);
1337 out.extend_from_slice(bytes);
1338 }
1339
1340 let ep = delta.entry_point.map(|e| e as i64).unwrap_or(-1);
1342 out.extend_from_slice(&ep.to_le_bytes());
1343
1344 out.extend_from_slice(&(delta.max_degree as u64).to_le_bytes());
1346
1347 out.extend_from_slice(&(tombstones.len() as u64).to_le_bytes());
1349 for &id in tombstones.iter() {
1350 out.extend_from_slice(&id.to_le_bytes());
1351 }
1352
1353 if let Some(ref base_labels) = self.base_labels {
1355 out.push(1u8); out.extend_from_slice(&(self.num_label_fields as u64).to_le_bytes());
1357 out.extend_from_slice(&(base_labels.len() as u64).to_le_bytes());
1359 for lv in base_labels {
1360 for &val in lv {
1361 out.extend_from_slice(&val.to_le_bytes());
1362 }
1363 }
1364 out.extend_from_slice(&(delta_labels.len() as u64).to_le_bytes());
1366 for lv in delta_labels.iter() {
1367 for &val in lv {
1368 out.extend_from_slice(&val.to_le_bytes());
1369 }
1370 }
1371 } else {
1372 out.push(0u8); }
1374
1375 if let Some(ref quantizer) = self.quantizer {
1377 out.push(1u8); out.extend_from_slice(&(self.code_size as u64).to_le_bytes());
1379 out.extend_from_slice(&(self.rerank_size as u64).to_le_bytes());
1380 let qdata = bincode::serialize(quantizer).unwrap();
1381 out.extend_from_slice(&(qdata.len() as u64).to_le_bytes());
1382 out.extend_from_slice(&qdata);
1383 if let Some(ref base_codes) = self.base_codes {
1384 out.extend_from_slice(&(base_codes.len() as u64).to_le_bytes());
1385 out.extend_from_slice(base_codes);
1386 } else {
1387 out.extend_from_slice(&0u64.to_le_bytes());
1388 }
1389 } else {
1390 out.push(0u8); }
1392
1393 out
1394 }
1395
1396 pub fn from_bytes(bytes: &[u8], dist: D, config: IncrementalConfig) -> Result<Self, DiskAnnError> {
1400 if bytes.len() < 4 {
1401 return Err(DiskAnnError::IndexError("Incremental buffer too small".into()));
1402 }
1403
1404 let first_u32 = u32::from_le_bytes(bytes[0..4].try_into().unwrap());
1406 if first_u32 == INCR_MAGIC {
1407 Self::from_bytes_v1(bytes, dist, config)
1408 } else {
1409 Self::from_bytes_legacy(bytes, dist, config)
1411 }
1412 }
1413
1414 fn from_bytes_legacy(bytes: &[u8], dist: D, config: IncrementalConfig) -> Result<Self, DiskAnnError> {
1416 let mut pos = 0;
1417
1418 macro_rules! read_bytes {
1419 ($n:expr) => {{
1420 if pos + $n > bytes.len() {
1421 return Err(DiskAnnError::IndexError("Incremental buffer truncated".into()));
1422 }
1423 let slice = &bytes[pos..pos + $n];
1424 pos += $n;
1425 slice
1426 }};
1427 }
1428
1429 let has_base = read_bytes!(1)[0];
1431 let base = if has_base == 1 {
1432 let base_len = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1433 let base_data = read_bytes!(base_len).to_vec();
1434 Some(DiskANN::<D>::from_bytes(base_data, dist)?)
1435 } else {
1436 None
1437 };
1438
1439 let dim = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1441
1442 let num_delta = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1444 let mut delta_vectors = Vec::with_capacity(num_delta);
1445 for _ in 0..num_delta {
1446 let vbytes = read_bytes!(dim * 4);
1447 let floats: Vec<f32> = vbytes
1448 .chunks_exact(4)
1449 .map(|c| f32::from_le_bytes(c.try_into().unwrap()))
1450 .collect();
1451 delta_vectors.push(floats);
1452 }
1453
1454 let num_graph = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1456 let mut delta_graph = Vec::with_capacity(num_graph);
1457 for _ in 0..num_graph {
1458 let deg = u32::from_le_bytes(read_bytes!(4).try_into().unwrap()) as usize;
1459 let nbytes = read_bytes!(deg * 4);
1460 let neighbors: Vec<u32> = nbytes
1461 .chunks_exact(4)
1462 .map(|c| u32::from_le_bytes(c.try_into().unwrap()))
1463 .collect();
1464 delta_graph.push(neighbors);
1465 }
1466
1467 let ep = i64::from_le_bytes(read_bytes!(8).try_into().unwrap());
1469 let entry_point = if ep >= 0 { Some(ep as u32) } else { None };
1470
1471 let max_degree = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1473
1474 let num_tombstones = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1476 let mut tombstones = HashSet::with_capacity(num_tombstones);
1477 for _ in 0..num_tombstones {
1478 let id = u64::from_le_bytes(read_bytes!(8).try_into().unwrap());
1479 tombstones.insert(id);
1480 }
1481
1482 Ok(Self {
1483 base,
1484 delta: RwLock::new(DeltaLayer {
1485 vectors: delta_vectors,
1486 graph: delta_graph,
1487 entry_point,
1488 max_degree,
1489 }),
1490 tombstones: RwLock::new(tombstones),
1491 dist,
1492 config,
1493 base_path: None,
1494 dim,
1495 base_labels: None,
1496 delta_labels: RwLock::new(Vec::new()),
1497 num_label_fields: 0,
1498 quantizer: None,
1499 base_codes: None,
1500 code_size: 0,
1501 rerank_size: 0,
1502 })
1503 }
1504
1505 #[allow(unused_assignments)]
1507 fn from_bytes_v1(bytes: &[u8], dist: D, config: IncrementalConfig) -> Result<Self, DiskAnnError> {
1508 let mut pos = 0;
1509
1510 macro_rules! read_bytes {
1511 ($n:expr) => {{
1512 if pos + $n > bytes.len() {
1513 return Err(DiskAnnError::IndexError("Incremental buffer truncated".into()));
1514 }
1515 let slice = &bytes[pos..pos + $n];
1516 pos += $n;
1517 slice
1518 }};
1519 }
1520
1521 let magic = u32::from_le_bytes(read_bytes!(4).try_into().unwrap());
1523 if magic != INCR_MAGIC {
1524 return Err(DiskAnnError::IndexError(format!(
1525 "Invalid incremental magic: 0x{:08X}",
1526 magic
1527 )));
1528 }
1529
1530 let version = u32::from_le_bytes(read_bytes!(4).try_into().unwrap());
1532 if version != INCR_FORMAT_VERSION {
1533 return Err(DiskAnnError::IndexError(format!(
1534 "Unsupported incremental format version: {}",
1535 version
1536 )));
1537 }
1538
1539 let has_base = read_bytes!(1)[0];
1541 let base = if has_base == 1 {
1542 let base_len = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1543 let base_data = read_bytes!(base_len).to_vec();
1544 Some(DiskANN::<D>::from_bytes(base_data, dist)?)
1545 } else {
1546 None
1547 };
1548
1549 let dim = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1551
1552 let num_delta = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1554 let mut delta_vectors = Vec::with_capacity(num_delta);
1555 for _ in 0..num_delta {
1556 let vbytes = read_bytes!(dim * 4);
1557 let floats: Vec<f32> = vbytes
1558 .chunks_exact(4)
1559 .map(|c| f32::from_le_bytes(c.try_into().unwrap()))
1560 .collect();
1561 delta_vectors.push(floats);
1562 }
1563
1564 let num_graph = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1566 let mut delta_graph = Vec::with_capacity(num_graph);
1567 for _ in 0..num_graph {
1568 let deg = u32::from_le_bytes(read_bytes!(4).try_into().unwrap()) as usize;
1569 let nbytes = read_bytes!(deg * 4);
1570 let neighbors: Vec<u32> = nbytes
1571 .chunks_exact(4)
1572 .map(|c| u32::from_le_bytes(c.try_into().unwrap()))
1573 .collect();
1574 delta_graph.push(neighbors);
1575 }
1576
1577 let ep = i64::from_le_bytes(read_bytes!(8).try_into().unwrap());
1579 let entry_point = if ep >= 0 { Some(ep as u32) } else { None };
1580
1581 let max_degree = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1583
1584 let num_tombstones = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1586 let mut tombstones = HashSet::with_capacity(num_tombstones);
1587 for _ in 0..num_tombstones {
1588 let id = u64::from_le_bytes(read_bytes!(8).try_into().unwrap());
1589 tombstones.insert(id);
1590 }
1591
1592 let has_labels = read_bytes!(1)[0];
1594 let (base_labels, delta_labels_vec, num_label_fields) = if has_labels == 1 {
1595 let num_fields = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1596 let num_base = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1597 let mut bl = Vec::with_capacity(num_base);
1598 for _ in 0..num_base {
1599 let mut lv = Vec::with_capacity(num_fields);
1600 for _ in 0..num_fields {
1601 lv.push(u64::from_le_bytes(read_bytes!(8).try_into().unwrap()));
1602 }
1603 bl.push(lv);
1604 }
1605 let num_dl = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1606 let mut dl = Vec::with_capacity(num_dl);
1607 for _ in 0..num_dl {
1608 let mut lv = Vec::with_capacity(num_fields);
1609 for _ in 0..num_fields {
1610 lv.push(u64::from_le_bytes(read_bytes!(8).try_into().unwrap()));
1611 }
1612 dl.push(lv);
1613 }
1614 (Some(bl), dl, num_fields)
1615 } else {
1616 (None, Vec::new(), 0)
1617 };
1618
1619 let has_quantizer = read_bytes!(1)[0];
1621 let (quantizer, base_codes, code_size, rerank_size) = if has_quantizer == 1 {
1622 let cs = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1623 let rs = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1624 let qdata_len = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1625 let qdata = read_bytes!(qdata_len);
1626 let q: QuantizerState = bincode::deserialize(qdata)?;
1627 let codes_len = u64::from_le_bytes(read_bytes!(8).try_into().unwrap()) as usize;
1628 let codes = if codes_len > 0 {
1629 Some(read_bytes!(codes_len).to_vec())
1630 } else {
1631 None
1632 };
1633 (Some(q), codes, cs, rs)
1634 } else {
1635 (None, None, 0, 0)
1636 };
1637
1638 Ok(Self {
1639 base,
1640 delta: RwLock::new(DeltaLayer {
1641 vectors: delta_vectors,
1642 graph: delta_graph,
1643 entry_point,
1644 max_degree,
1645 }),
1646 tombstones: RwLock::new(tombstones),
1647 dist,
1648 config,
1649 base_path: None,
1650 dim,
1651 base_labels,
1652 delta_labels: RwLock::new(delta_labels_vec),
1653 num_label_fields,
1654 quantizer,
1655 base_codes,
1656 code_size,
1657 rerank_size,
1658 })
1659 }
1660
1661 pub fn stats(&self) -> IncrementalStats {
1663 let delta = self.delta.read().unwrap();
1664 let tombstones = self.tombstones.read().unwrap();
1665 let base_count = self.base.as_ref().map(|b| b.num_vectors).unwrap_or(0);
1666
1667 IncrementalStats {
1668 base_vectors: base_count,
1669 delta_vectors: delta.len(),
1670 tombstones: tombstones.len(),
1671 total_live: (base_count + delta.len()).saturating_sub(tombstones.len()),
1672 dim: self.dim,
1673 }
1674 }
1675
1676 pub fn dim(&self) -> usize {
1678 self.dim
1679 }
1680
1681 pub fn has_labels(&self) -> bool {
1683 self.base_labels.is_some()
1684 }
1685
1686 pub fn has_quantizer(&self) -> bool {
1688 self.quantizer.is_some()
1689 }
1690}
1691
1692#[derive(Debug, Clone)]
1694pub struct IncrementalStats {
1695 pub base_vectors: usize,
1696 pub delta_vectors: usize,
1697 pub tombstones: usize,
1698 pub total_live: usize,
1699 pub dim: usize,
1700}
1701
1702fn encode_all_vecs<Q: VectorQuantizer>(
1707 vectors: &[Vec<f32>],
1708 quantizer: &Q,
1709 code_size: usize,
1710) -> Vec<u8> {
1711 let encoded: Vec<Vec<u8>> = vectors.par_iter().map(|v| quantizer.encode(v)).collect();
1712 let mut flat = Vec::with_capacity(vectors.len() * code_size);
1713 for code in &encoded {
1714 flat.extend_from_slice(code);
1715 }
1716 flat
1717}
1718
1719fn encode_all_pq_vecs(
1720 vectors: &[Vec<f32>],
1721 pq: &ProductQuantizer,
1722 code_size: usize,
1723) -> Vec<u8> {
1724 let encoded: Vec<Vec<u8>> = vectors.par_iter().map(|v| pq.encode(v)).collect();
1725 let mut flat = Vec::with_capacity(vectors.len() * code_size);
1726 for code in &encoded {
1727 flat.extend_from_slice(code);
1728 }
1729 flat
1730}
1731
1732#[cfg(test)]
1737mod tests {
1738 use super::*;
1739 use anndists::dist::DistL2;
1740 use std::fs;
1741
1742 fn euclid(a: &[f32], b: &[f32]) -> f32 {
1743 a.iter().zip(b).map(|(x, y)| (x - y).powi(2)).sum::<f32>().sqrt()
1744 }
1745
1746 #[test]
1747 fn test_incremental_basic() {
1748 let path = "test_incremental_basic.db";
1749 let _ = fs::remove_file(path);
1750
1751 let vectors = vec![
1753 vec![0.0, 0.0],
1754 vec![1.0, 0.0],
1755 vec![0.0, 1.0],
1756 vec![1.0, 1.0],
1757 ];
1758
1759 let index = IncrementalDiskANN::<DistL2>::build_default(&vectors, path).unwrap();
1760
1761 let results = index.search(&[0.1, 0.1], 2, 8);
1763 assert_eq!(results.len(), 2);
1764
1765 let _ = fs::remove_file(path);
1766 }
1767
1768 #[test]
1769 fn test_incremental_add() {
1770 let path = "test_incremental_add.db";
1771 let _ = fs::remove_file(path);
1772
1773 let vectors = vec![
1774 vec![0.0, 0.0],
1775 vec![1.0, 0.0],
1776 ];
1777
1778 let index = IncrementalDiskANN::<DistL2>::build_default(&vectors, path).unwrap();
1779
1780 let new_vecs = vec![vec![0.5, 0.5], vec![2.0, 2.0]];
1782 let new_ids = index.add_vectors(&new_vecs).unwrap();
1783 assert_eq!(new_ids.len(), 2);
1784 assert!(is_delta_id(new_ids[0]));
1785
1786 let results = index.search_with_dists(&[0.5, 0.5], 1, 8);
1788 assert!(!results.is_empty());
1789
1790 let (_best_id, best_dist) = results[0];
1792 assert!(best_dist < 0.01, "Expected to find [0.5, 0.5], got dist {}", best_dist);
1793
1794 let _ = fs::remove_file(path);
1795 }
1796
1797 #[test]
1798 fn test_incremental_delete() {
1799 let path = "test_incremental_delete.db";
1800 let _ = fs::remove_file(path);
1801
1802 let vectors = vec![
1803 vec![0.0, 0.0], vec![1.0, 0.0], vec![0.0, 1.0], ];
1807
1808 let index = IncrementalDiskANN::<DistL2>::build_default(&vectors, path).unwrap();
1809
1810 index.delete_vectors(&[0]).unwrap();
1812 assert!(index.is_deleted(0));
1813
1814 let results = index.search(&[0.0, 0.0], 3, 8);
1816 assert!(!results.contains(&0), "Deleted vector should not appear in results");
1817
1818 let _ = fs::remove_file(path);
1819 }
1820
1821 #[test]
1822 fn test_incremental_compact() {
1823 let path1 = "test_compact_v1.db";
1824 let path2 = "test_compact_v2.db";
1825 let _ = fs::remove_file(path1);
1826 let _ = fs::remove_file(path2);
1827
1828 let vectors = vec![
1829 vec![0.0, 0.0],
1830 vec![1.0, 0.0],
1831 vec![0.0, 1.0],
1832 vec![1.0, 1.0],
1833 ];
1834
1835 let mut index = IncrementalDiskANN::<DistL2>::build_default(&vectors, path1).unwrap();
1836
1837 index.add_vectors(&[vec![2.0, 2.0], vec![3.0, 3.0]]).unwrap();
1839
1840 index.delete_vectors(&[0, 1]).unwrap();
1842
1843 let stats_before = index.stats();
1844 assert_eq!(stats_before.base_vectors, 4);
1845 assert_eq!(stats_before.delta_vectors, 2);
1846 assert_eq!(stats_before.tombstones, 2);
1847
1848 index.compact(path2).unwrap();
1850
1851 let stats_after = index.stats();
1852 assert_eq!(stats_after.base_vectors, 4); assert_eq!(stats_after.delta_vectors, 0);
1854 assert_eq!(stats_after.tombstones, 0);
1855
1856 let results = index.search(&[2.0, 2.0], 1, 8);
1858 assert!(!results.is_empty());
1859
1860 let _ = fs::remove_file(path1);
1861 let _ = fs::remove_file(path2);
1862 }
1863
1864 #[test]
1865 fn test_delta_only() {
1866 let config = IncrementalConfig::default();
1868 let index = IncrementalDiskANN::<DistL2>::new_empty(2, DistL2 {}, config);
1869
1870 let vecs = vec![
1872 vec![0.0, 0.0],
1873 vec![1.0, 0.0],
1874 vec![0.0, 1.0],
1875 vec![1.0, 1.0],
1876 vec![0.5, 0.5],
1877 ];
1878 index.add_vectors(&vecs).unwrap();
1879
1880 let results = index.search_with_dists(&[0.5, 0.5], 3, 8);
1882 assert_eq!(results.len(), 3);
1883
1884 let best_vec = index.get_vector(results[0].0).unwrap();
1886 let dist = euclid(&best_vec, &[0.5, 0.5]);
1887 assert!(dist < 0.01);
1888 }
1889
1890 #[test]
1891 fn test_incremental_to_bytes_from_bytes() {
1892 let path = "test_incr_bytes_rt.db";
1893 let _ = fs::remove_file(path);
1894
1895 let vectors = vec![
1896 vec![0.0, 0.0],
1897 vec![1.0, 0.0],
1898 vec![0.0, 1.0],
1899 vec![1.0, 1.0],
1900 ];
1901
1902 let index = IncrementalDiskANN::<DistL2>::build_default(&vectors, path).unwrap();
1903
1904 index.add_vectors(&[vec![0.5, 0.5], vec![2.0, 2.0]]).unwrap();
1906
1907 index.delete_vectors(&[0]).unwrap();
1909
1910 let bytes = index.to_bytes();
1911
1912 let index2 = IncrementalDiskANN::<DistL2>::from_bytes(
1913 &bytes, DistL2 {}, IncrementalConfig::default()
1914 ).unwrap();
1915
1916 let stats = index2.stats();
1917 assert_eq!(stats.base_vectors, 4);
1918 assert_eq!(stats.delta_vectors, 2);
1919 assert_eq!(stats.tombstones, 1);
1920
1921 let results = index2.search(&[0.5, 0.5], 3, 8);
1923 assert!(!results.contains(&0), "Deleted vector should not appear");
1924
1925 let _ = fs::remove_file(path);
1926 }
1927
1928 #[test]
1929 fn test_incremental_backward_compat_bytes() {
1930 let path = "test_incr_compat.db";
1931 let _ = fs::remove_file(path);
1932
1933 let vectors = vec![
1934 vec![0.0, 0.0],
1935 vec![1.0, 0.0],
1936 vec![0.0, 1.0],
1937 ];
1938
1939 let base = DiskANN::<DistL2>::build_index_default(&vectors, DistL2 {}, path).unwrap();
1941 let base_bytes = base.to_bytes();
1942 let mut old_bytes = Vec::new();
1943 old_bytes.push(1u8); old_bytes.extend_from_slice(&(base_bytes.len() as u64).to_le_bytes());
1945 old_bytes.extend_from_slice(&base_bytes);
1946 old_bytes.extend_from_slice(&(2u64).to_le_bytes()); old_bytes.extend_from_slice(&0u64.to_le_bytes()); old_bytes.extend_from_slice(&0u64.to_le_bytes()); old_bytes.extend_from_slice(&(-1i64).to_le_bytes()); old_bytes.extend_from_slice(&(32u64).to_le_bytes()); old_bytes.extend_from_slice(&0u64.to_le_bytes()); let loaded = IncrementalDiskANN::<DistL2>::from_bytes(
1954 &old_bytes, DistL2 {}, IncrementalConfig::default()
1955 ).unwrap();
1956
1957 assert_eq!(loaded.stats().base_vectors, 3);
1958 assert_eq!(loaded.stats().delta_vectors, 0);
1959 assert!(!loaded.has_labels());
1960 assert!(!loaded.has_quantizer());
1961
1962 let results = loaded.search(&[0.0, 0.0], 2, 8);
1963 assert_eq!(results.len(), 2);
1964
1965 let _ = fs::remove_file(path);
1966 }
1967}