go_brrr/embedding/
index.rs

1//! Vector index for semantic search using usearch.
2//!
3//! Provides a thread-safe wrapper around usearch for approximate nearest neighbor search.
4//! Designed to integrate with the semantic search module for code embeddings.
5//!
6//! # Architecture
7//!
8//! The index uses HNSW (Hierarchical Navigable Small World) algorithm internally,
9//! providing O(log n) search complexity with high recall. Vector IDs map directly
10//! to EmbeddingUnit indices in the metadata store.
11//!
12//! # Usage
13//!
14//! ```no_run
15//! use go_brrr::embedding::{VectorIndex, Metric};
16//!
17//! // Create index for 768-dimensional embeddings (MiniLM)
18//! let index = VectorIndex::new(768, Metric::InnerProduct)?;
19//!
20//! // Add vectors
21//! let embedding = vec![0.1f32; 768];
22//! index.add(0, &embedding)?;
23//!
24//! // Search for similar vectors
25//! let results = index.search(&embedding, 10)?;
26//! for (id, distance) in results {
27//!     println!("ID: {}, Distance: {}", id, distance);
28//! }
29//!
30//! // Persist to disk
31//! index.save("./index.usearch")?;
32//! # Ok::<(), anyhow::Error>(())
33//! ```
34
35use std::fs::File;
36use std::num::NonZeroUsize;
37use std::path::{Path, PathBuf};
38use std::simd::{f32x8, num::SimdFloat, Simd};
39
40/// 512-bit SIMD vector type for AVX-512 operations (16 x f32).
41type F32x16 = Simd<f32, 16>;
42use std::sync::atomic::{AtomicUsize, Ordering};
43use std::sync::{Arc, Mutex};
44use std::time::{Duration, Instant};
45
46use anyhow::{bail, Context, Result};
47use lru::LruCache;
48use once_cell::sync::Lazy;
49use rayon::prelude::*;
50use tempfile::NamedTempFile;
51use usearch::{Index, IndexOptions, MetricKind, ScalarKind};
52
53/// Threshold for parallel batch insertion.
54/// Below this count, sequential insertion has lower overhead.
55const PARALLEL_BATCH_THRESHOLD: usize = 100;
56
57// =============================================================================
58// Metric Types
59// =============================================================================
60
61/// Distance metric for vector similarity.
62///
63/// Different metrics are appropriate for different embedding models:
64/// - `InnerProduct`: Best for normalized embeddings (most modern models)
65/// - `Cosine`: Automatically normalizes vectors (slightly slower)
66/// - `L2Squared`: Euclidean distance, good for absolute positioning
67#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
68pub enum Metric {
69    /// Inner product: `1 - sum(a[i] * b[i])`.
70    /// Use with normalized vectors (cosine similarity).
71    /// This is the fastest option for pre-normalized embeddings.
72    #[default]
73    InnerProduct,
74
75    /// Cosine similarity: `1 - (a . b) / (|a| * |b|)`.
76    /// Automatically normalizes vectors during comparison.
77    Cosine,
78
79    /// Squared L2 (Euclidean) distance: `sum((a[i] - b[i])^2)`.
80    /// Measures absolute distance in vector space.
81    L2Squared,
82}
83
84impl Metric {
85    /// Convert to usearch MetricKind.
86    #[must_use]
87    fn to_usearch(self) -> MetricKind {
88        match self {
89            Self::InnerProduct => MetricKind::IP,
90            Self::Cosine => MetricKind::Cos,
91            Self::L2Squared => MetricKind::L2sq,
92        }
93    }
94
95    /// Get human-readable name.
96    #[must_use]
97    pub fn as_str(self) -> &'static str {
98        match self {
99            Self::InnerProduct => "inner_product",
100            Self::Cosine => "cosine",
101            Self::L2Squared => "l2_squared",
102        }
103    }
104}
105
106impl std::fmt::Display for Metric {
107    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108        write!(f, "{}", self.as_str())
109    }
110}
111
112impl std::str::FromStr for Metric {
113    type Err = anyhow::Error;
114
115    fn from_str(s: &str) -> Result<Self> {
116        match s.to_lowercase().as_str() {
117            "ip" | "inner_product" | "innerproduct" => Ok(Self::InnerProduct),
118            "cos" | "cosine" => Ok(Self::Cosine),
119            "l2" | "l2sq" | "l2_squared" | "euclidean" => Ok(Self::L2Squared),
120            _ => bail!(
121                "Unknown metric: {}. Valid options: inner_product, cosine, l2_squared",
122                s
123            ),
124        }
125    }
126}
127
128// =============================================================================
129// Quantization Types
130// =============================================================================
131
132/// Scalar quantization for index storage.
133///
134/// Lower precision reduces memory usage but may affect recall:
135/// - `F32`: Full precision (4 bytes per dimension)
136/// - `F16`: Half precision (2 bytes per dimension, ~1% recall loss)
137/// - `BF16`: Brain float (2 bytes, better for ML models)
138/// - `I8`: 8-bit quantization (1 byte, ~3-5% recall loss)
139#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
140pub enum Quantization {
141    /// 32-bit float (full precision, largest index size).
142    #[default]
143    F32,
144    /// 16-bit float (half precision, good balance).
145    F16,
146    /// Brain float 16 (better for ML model outputs).
147    BF16,
148    /// 8-bit integer (smallest index, some recall loss).
149    I8,
150}
151
152impl Quantization {
153    /// Convert to usearch ScalarKind.
154    #[must_use]
155    fn to_usearch(self) -> ScalarKind {
156        match self {
157            Self::F32 => ScalarKind::F32,
158            Self::F16 => ScalarKind::F16,
159            Self::BF16 => ScalarKind::BF16,
160            Self::I8 => ScalarKind::I8,
161        }
162    }
163
164    /// Get bytes per scalar element.
165    #[must_use]
166    pub fn bytes_per_element(self) -> usize {
167        match self {
168            Self::F32 => 4,
169            Self::F16 | Self::BF16 => 2,
170            Self::I8 => 1,
171        }
172    }
173}
174
175// =============================================================================
176// Index Configuration
177// =============================================================================
178
179/// Configuration for vector index creation.
180///
181/// Default values are optimized for semantic code search with ~10K-100K vectors.
182#[derive(Debug, Clone)]
183pub struct IndexConfig {
184    /// Vector dimensions (must match embedding model output).
185    pub dimensions: usize,
186
187    /// Distance metric for similarity computation.
188    pub metric: Metric,
189
190    /// Scalar quantization for storage optimization.
191    pub quantization: Quantization,
192
193    /// Connectivity parameter (M in HNSW). Higher = better recall, more memory.
194    /// Default: 0 (uses usearch default, typically 16).
195    pub connectivity: usize,
196
197    /// Expansion factor during index construction (ef_construction).
198    /// Higher = better quality index, slower build. Default: 0 (uses usearch default).
199    pub expansion_add: usize,
200
201    /// Expansion factor during search (ef_search).
202    /// Higher = better recall, slower search. Default: 0 (uses usearch default).
203    pub expansion_search: usize,
204
205    /// Allow multiple vectors with the same key.
206    pub multi: bool,
207}
208
209impl IndexConfig {
210    /// Create configuration for a specific dimension count.
211    #[must_use]
212    pub fn new(dimensions: usize) -> Self {
213        Self {
214            dimensions,
215            metric: Metric::default(),
216            quantization: Quantization::default(),
217            connectivity: 0,
218            expansion_add: 0,
219            expansion_search: 0,
220            multi: false,
221        }
222    }
223
224    /// Set the distance metric.
225    #[must_use]
226    pub fn with_metric(mut self, metric: Metric) -> Self {
227        self.metric = metric;
228        self
229    }
230
231    /// Set the quantization level.
232    #[must_use]
233    pub fn with_quantization(mut self, quantization: Quantization) -> Self {
234        self.quantization = quantization;
235        self
236    }
237
238    /// Set connectivity (M parameter in HNSW).
239    #[must_use]
240    pub fn with_connectivity(mut self, connectivity: usize) -> Self {
241        self.connectivity = connectivity;
242        self
243    }
244
245    /// Set expansion factor for index construction.
246    #[must_use]
247    pub fn with_expansion_add(mut self, expansion: usize) -> Self {
248        self.expansion_add = expansion;
249        self
250    }
251
252    /// Set expansion factor for search.
253    #[must_use]
254    pub fn with_expansion_search(mut self, expansion: usize) -> Self {
255        self.expansion_search = expansion;
256        self
257    }
258
259    /// Enable or disable multi-index mode.
260    ///
261    /// When enabled, multiple vectors can be stored under the same key.
262    /// Use [`count_key`](VectorIndex::count_key) to count vectors per key.
263    #[must_use]
264    pub fn with_multi(mut self, multi: bool) -> Self {
265        self.multi = multi;
266        self
267    }
268
269    /// Convert to usearch IndexOptions.
270    fn to_usearch_options(&self) -> IndexOptions {
271        IndexOptions {
272            dimensions: self.dimensions,
273            metric: self.metric.to_usearch(),
274            quantization: self.quantization.to_usearch(),
275            connectivity: self.connectivity,
276            expansion_add: self.expansion_add,
277            expansion_search: self.expansion_search,
278            multi: self.multi,
279        }
280    }
281
282    /// Estimate memory usage for a given vector count (in bytes).
283    #[must_use]
284    pub fn estimate_memory(&self, vector_count: usize) -> usize {
285        let bytes_per_vector = self.dimensions * self.quantization.bytes_per_element();
286        // HNSW overhead is roughly 2x for graph structure
287        let overhead_factor = 2;
288        vector_count * bytes_per_vector * overhead_factor
289    }
290}
291
292impl Default for IndexConfig {
293    fn default() -> Self {
294        Self::new(768) // Default to MiniLM-like dimensions
295    }
296}
297
298// =============================================================================
299// Save Information
300// =============================================================================
301
302/// Information about a completed save operation.
303///
304/// Returned by [`VectorIndex::save_checked`] to provide details about the
305/// save operation, including timing and disk space information.
306///
307/// # Examples
308///
309/// ```no_run
310/// use go_brrr::embedding::{VectorIndex, Metric};
311///
312/// let index = VectorIndex::new(768, Metric::InnerProduct)?;
313/// index.reserve(1000)?;
314/// // ... add vectors ...
315///
316/// let info = index.save_checked("./index.usearch")?;
317///
318/// println!("Save completed:");
319/// println!("  Path: {}", info.path.display());
320/// println!("  Size: {} bytes", info.size_bytes);
321/// println!("  Time: {:?}", info.elapsed);
322/// println!("  Space remaining: {} bytes", info.space_remaining);
323/// # Ok::<(), anyhow::Error>(())
324/// ```
325#[derive(Debug, Clone)]
326pub struct SaveInfo {
327    /// Path where the index was saved.
328    pub path: PathBuf,
329
330    /// Size of the saved index in bytes.
331    pub size_bytes: usize,
332
333    /// Time taken to complete the save operation.
334    pub elapsed: Duration,
335
336    /// Available disk space before the save operation.
337    pub available_before: u64,
338
339    /// Estimated remaining disk space after the save.
340    pub space_remaining: u64,
341}
342
343impl SaveInfo {
344    /// Get the save speed in bytes per second.
345    #[must_use]
346    pub fn bytes_per_second(&self) -> f64 {
347        if self.elapsed.as_secs_f64() > 0.0 {
348            self.size_bytes as f64 / self.elapsed.as_secs_f64()
349        } else {
350            f64::INFINITY
351        }
352    }
353
354    /// Get the save speed in megabytes per second.
355    #[must_use]
356    pub fn mb_per_second(&self) -> f64 {
357        self.bytes_per_second() / (1024.0 * 1024.0)
358    }
359
360    /// Get a human-readable size string (e.g., "1.5 MB").
361    #[must_use]
362    pub fn human_size(&self) -> String {
363        const KB: usize = 1024;
364        const MB: usize = KB * 1024;
365        const GB: usize = MB * 1024;
366
367        if self.size_bytes >= GB {
368            format!("{:.2} GB", self.size_bytes as f64 / GB as f64)
369        } else if self.size_bytes >= MB {
370            format!("{:.2} MB", self.size_bytes as f64 / MB as f64)
371        } else if self.size_bytes >= KB {
372            format!("{:.2} KB", self.size_bytes as f64 / KB as f64)
373        } else {
374            format!("{} bytes", self.size_bytes)
375        }
376    }
377}
378
379impl std::fmt::Display for SaveInfo {
380    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
381        write!(
382            f,
383            "Saved {} to '{}' in {:?} ({:.1} MB/s)",
384            self.human_size(),
385            self.path.display(),
386            self.elapsed,
387            self.mb_per_second()
388        )
389    }
390}
391
392// =============================================================================
393// Vector Index
394// =============================================================================
395
396/// Thread-safe vector index for approximate nearest neighbor search.
397///
398/// Wraps usearch Index with ergonomic Rust API. The underlying index is
399/// already thread-safe (Send + Sync) for concurrent read/write operations.
400///
401/// # Vector IDs
402///
403/// Vector IDs are u64 keys that should map to your EmbeddingUnit indices.
404/// The index does not store metadata - that should be kept separately
405/// (typically in a JSON metadata file alongside the index).
406///
407/// # Persistence
408///
409/// Indexes can be saved to disk with [`save`](Self::save) and loaded with
410/// [`load`](Self::load). The [`view`](Self::view) method provides memory-mapped
411/// access for large indexes that don't fit in RAM.
412pub struct VectorIndex {
413    inner: Index,
414    config: IndexConfig,
415}
416
417// Note: VectorIndex is automatically Send + Sync because:
418// - usearch::Index implements Send + Sync (verified in USearch/rust/lib.rs:533-534)
419// - IndexConfig contains only Copy types (usize, bool, enums)
420// No unsafe impl needed - Rust derives these traits automatically.
421
422// =============================================================================
423// IndexView - Safe Memory-Mapped View with File Lifecycle Management
424// =============================================================================
425
426/// A memory-mapped view of an index file with file lifecycle management.
427///
428/// This struct keeps the file handle open to prevent the underlying file from
429/// being deleted while the view is active. On Unix systems, keeping the file
430/// handle open ensures the file data remains accessible even if the file is
431/// unlinked from the filesystem.
432///
433/// # Safety Considerations
434///
435/// - The backing file must not be modified while the view is active
436/// - Use `is_valid()` to check if the backing file still exists on disk
437/// - For frequently-updated indexes, use `VectorIndex::load()` instead
438/// - Views are more memory-efficient for large, read-only indexes
439///
440/// # Examples
441///
442/// ```no_run
443/// use go_brrr::embedding::VectorIndex;
444///
445/// // Create a safe view that keeps the file open
446/// let view = VectorIndex::view_safe("./index.usearch")?;
447/// assert!(view.is_valid());
448/// println!("Index has {} vectors", view.len());
449///
450/// // Search through the view
451/// let results = view.search(&[1.0, 0.0, 0.0, 0.0], 10)?;
452/// # Ok::<(), anyhow::Error>(())
453/// ```
454pub struct IndexView {
455    inner: VectorIndex,
456    /// Keeps file open to prevent deletion while view is active
457    _file_handle: Arc<File>,
458    path: PathBuf,
459}
460
461impl IndexView {
462    /// Check if the backing file still exists on disk.
463    ///
464    /// Note: Even if this returns `false`, the view may still be usable
465    /// on Unix systems because the file handle keeps the data accessible.
466    /// However, it indicates the file has been unlinked.
467    #[must_use]
468    pub fn is_valid(&self) -> bool {
469        self.path.exists()
470    }
471
472    /// Get the path to the backing file.
473    #[must_use]
474    pub fn path(&self) -> &Path {
475        &self.path
476    }
477
478    /// Get the underlying index configuration.
479    #[must_use]
480    pub fn config(&self) -> &IndexConfig {
481        &self.inner.config
482    }
483
484    /// Get the number of vectors in the index.
485    #[must_use]
486    pub fn len(&self) -> usize {
487        self.inner.len()
488    }
489
490    /// Check if the index is empty.
491    #[must_use]
492    pub fn is_empty(&self) -> bool {
493        self.inner.is_empty()
494    }
495
496    /// Get the dimensionality of vectors in this index.
497    #[must_use]
498    pub fn dimensions(&self) -> usize {
499        self.inner.dimensions()
500    }
501
502    /// Check if a key exists in the index.
503    #[must_use]
504    pub fn contains(&self, key: u64) -> bool {
505        self.inner.contains(key)
506    }
507
508    /// Search for the k nearest neighbors of a query vector.
509    ///
510    /// # Arguments
511    ///
512    /// * `query` - Query vector (must match index dimensions)
513    /// * `k` - Number of neighbors to return
514    ///
515    /// # Returns
516    ///
517    /// Vector of (key, distance) pairs, sorted by distance (closest first).
518    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u64, f32)>> {
519        // Use explicit type qualification since VectorIndex methods are defined later in this file
520        VectorIndex::search(&self.inner, query, k)
521    }
522
523    /// Search with a distance threshold filter.
524    ///
525    /// Returns only results within the specified distance threshold.
526    pub fn search_threshold(
527        &self,
528        query: &[f32],
529        k: usize,
530        threshold: f32,
531    ) -> Result<Vec<(u64, f32)>> {
532        // Use explicit type qualification since VectorIndex methods are defined later in this file
533        VectorIndex::search_threshold(&self.inner, query, k, threshold)
534    }
535
536    /// Get the vector for a given key.
537    ///
538    /// # Errors
539    ///
540    /// Returns error if the key doesn't exist in the index.
541    pub fn get(&self, key: u64) -> Result<Vec<f32>> {
542        // Use explicit type qualification since VectorIndex methods are defined later in this file
543        VectorIndex::get(&self.inner, key)
544            .ok_or_else(|| anyhow::anyhow!("Key {} not found in index", key))
545    }
546
547    /// Consume the view and return the underlying VectorIndex.
548    ///
549    /// Warning: After calling this, the file handle is released and the
550    /// returned index becomes vulnerable to file deletion. Only use this
551    /// if you need to modify the index or have other lifecycle management.
552    #[must_use]
553    pub fn into_inner(self) -> VectorIndex {
554        self.inner
555    }
556}
557
558impl std::ops::Deref for IndexView {
559    type Target = VectorIndex;
560
561    fn deref(&self) -> &Self::Target {
562        &self.inner
563    }
564}
565
566impl std::fmt::Debug for IndexView {
567    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
568        f.debug_struct("IndexView")
569            .field("inner", &self.inner)
570            .field("path", &self.path)
571            .field("is_valid", &self.is_valid())
572            .finish()
573    }
574}
575
576// Note: IndexView is Send + Sync because:
577// - VectorIndex is Send + Sync
578// - Arc<File> is Send + Sync
579// - PathBuf is Send + Sync
580
581impl VectorIndex {
582    /// Create a new vector index with the given dimensions and metric.
583    ///
584    /// # Arguments
585    ///
586    /// * `dimensions` - Number of dimensions in embedding vectors
587    /// * `metric` - Distance metric for similarity computation
588    ///
589    /// # Errors
590    ///
591    /// Returns error if usearch index creation fails.
592    ///
593    /// # Examples
594    ///
595    /// ```
596    /// use go_brrr::embedding::{VectorIndex, Metric};
597    ///
598    /// // Create index for 768-dim embeddings with inner product metric
599    /// let index = VectorIndex::new(768, Metric::InnerProduct)?;
600    /// # Ok::<(), anyhow::Error>(())
601    /// ```
602    pub fn new(dimensions: usize, metric: Metric) -> Result<Self> {
603        let config = IndexConfig::new(dimensions).with_metric(metric);
604        Self::with_config(config)
605    }
606
607    /// Create a new vector index with full configuration.
608    ///
609    /// # Examples
610    ///
611    /// ```
612    /// use go_brrr::embedding::{VectorIndex, IndexConfig, Metric, Quantization};
613    ///
614    /// let config = IndexConfig::new(1024)
615    ///     .with_metric(Metric::InnerProduct)
616    ///     .with_quantization(Quantization::F16)
617    ///     .with_connectivity(32);
618    ///
619    /// let index = VectorIndex::with_config(config)?;
620    /// # Ok::<(), anyhow::Error>(())
621    /// ```
622    pub fn with_config(config: IndexConfig) -> Result<Self> {
623        let options = config.to_usearch_options();
624        let inner = Index::new(&options)
625            .map_err(|e| anyhow::anyhow!("Failed to create usearch index: {}", e))?;
626
627        Ok(Self { inner, config })
628    }
629
630    /// Get the index configuration.
631    #[must_use]
632    pub fn config(&self) -> &IndexConfig {
633        &self.config
634    }
635
636    /// Get the number of dimensions.
637    #[must_use]
638    pub fn dimensions(&self) -> usize {
639        self.inner.dimensions()
640    }
641
642    /// Get the current number of vectors in the index.
643    #[must_use]
644    pub fn len(&self) -> usize {
645        self.inner.size()
646    }
647
648    /// Check if the index is empty.
649    #[must_use]
650    pub fn is_empty(&self) -> bool {
651        self.len() == 0
652    }
653
654    /// Get the current capacity (reserved space).
655    #[must_use]
656    pub fn capacity(&self) -> usize {
657        self.inner.capacity()
658    }
659
660    /// Get estimated memory usage in bytes.
661    #[must_use]
662    pub fn memory_usage(&self) -> usize {
663        self.inner.memory_usage()
664    }
665
666    /// Get the distance metric used by this index.
667    #[must_use]
668    pub fn metric(&self) -> Metric {
669        self.config.metric
670    }
671
672    /// Convert raw distances to similarity scores using this index's metric.
673    ///
674    /// Different metrics require different conversion formulas:
675    /// - `InnerProduct`/`Cosine`: score = 1 - distance (clamped to [0, 1])
676    /// - `L2Squared`: score = 1 / (1 + distance) (maps [0, inf) to (0, 1])
677    ///
678    /// # Arguments
679    ///
680    /// * `distances` - Raw distance values from search results
681    ///
682    /// # Returns
683    ///
684    /// Similarity scores in [0, 1] range, where 1.0 = perfect match.
685    ///
686    /// # Examples
687    ///
688    /// ```
689    /// use go_brrr::embedding::{VectorIndex, Metric};
690    ///
691    /// let index = VectorIndex::new(4, Metric::L2Squared)?;
692    /// let distances = vec![0.0, 1.0, 4.0];
693    /// let scores = index.to_similarity_scores(&distances);
694    /// // L2: 0.0 -> 1.0, 1.0 -> 0.5, 4.0 -> 0.2
695    /// assert!((scores[0] - 1.0).abs() < 1e-6);
696    /// assert!((scores[1] - 0.5).abs() < 1e-6);
697    /// assert!((scores[2] - 0.2).abs() < 1e-6);
698    /// # Ok::<(), anyhow::Error>(())
699    /// ```
700    #[must_use]
701    pub fn to_similarity_scores(&self, distances: &[f32]) -> Vec<f32> {
702        distances_to_scores_for_metric(distances, self.config.metric)
703    }
704
705    /// Reserve capacity for incoming vectors.
706    ///
707    /// Pre-allocating capacity improves performance when adding many vectors.
708    ///
709    /// # Arguments
710    ///
711    /// * `capacity` - Total capacity to reserve (including existing vectors)
712    ///
713    /// # Errors
714    ///
715    /// Returns error if memory allocation fails.
716    pub fn reserve(&self, capacity: usize) -> Result<()> {
717        self.inner
718            .reserve(capacity)
719            .map_err(|e| anyhow::anyhow!("Failed to reserve capacity: {}", e))
720    }
721
722    /// Check if the index contains a vector with the given key.
723    #[must_use]
724    pub fn contains(&self, key: u64) -> bool {
725        self.inner.contains(key)
726    }
727
728    /// Count vectors with a specific key (for multi-index mode).
729    ///
730    /// In standard mode, returns 1 if key exists, 0 otherwise.
731    /// In multi-index mode, returns the number of vectors stored under this key.
732    ///
733    /// # Arguments
734    ///
735    /// * `key` - The vector key to count
736    ///
737    /// # Returns
738    ///
739    /// Number of vectors associated with the key.
740    ///
741    /// # Examples
742    ///
743    /// ```
744    /// use go_brrr::embedding::{VectorIndex, IndexConfig, Metric};
745    ///
746    /// // Multi-index mode allows multiple vectors per key
747    /// let config = IndexConfig::new(4)
748    ///     .with_metric(Metric::InnerProduct)
749    ///     .with_multi(true);
750    /// let index = VectorIndex::with_config(config)?;
751    /// index.reserve(10)?;
752    ///
753    /// let key = 42u64;
754    /// index.add(key, &[1.0, 0.0, 0.0, 0.0])?;
755    /// index.add(key, &[0.0, 1.0, 0.0, 0.0])?;
756    ///
757    /// assert_eq!(index.count_key(key), 2);
758    /// assert_eq!(index.count_key(999), 0);  // Non-existent key
759    /// # Ok::<(), anyhow::Error>(())
760    /// ```
761    #[must_use]
762    pub fn count_key(&self, key: u64) -> usize {
763        self.inner.count(key)
764    }
765
766    /// Retrieve a vector by its key.
767    ///
768    /// Returns `None` if the key doesn't exist in the index.
769    ///
770    /// # Arguments
771    ///
772    /// * `key` - The unique identifier of the vector to retrieve
773    ///
774    /// # Returns
775    ///
776    /// The stored vector if the key exists, `None` otherwise.
777    ///
778    /// # Examples
779    ///
780    /// ```
781    /// use go_brrr::embedding::{VectorIndex, Metric};
782    ///
783    /// let index = VectorIndex::new(4, Metric::InnerProduct)?;
784    /// index.reserve(10)?;
785    /// index.add(42, &[1.0, 0.0, 0.0, 0.0])?;
786    ///
787    /// let retrieved = index.get(42);
788    /// assert!(retrieved.is_some());
789    /// assert_eq!(retrieved.unwrap(), vec![1.0, 0.0, 0.0, 0.0]);
790    ///
791    /// // Non-existent key returns None
792    /// assert!(index.get(999).is_none());
793    /// # Ok::<(), anyhow::Error>(())
794    /// ```
795    #[must_use]
796    pub fn get(&self, key: u64) -> Option<Vec<f32>> {
797        let dimensions = self.inner.dimensions();
798        let mut buffer = vec![0.0f32; dimensions];
799
800        // usearch get() returns the number of vectors found (0 or 1 for non-multi),
801        // not the number of dimensions. The buffer is filled with the full vector.
802        match self.inner.get(key, &mut buffer) {
803            Ok(count) if count > 0 => Some(buffer),
804            _ => None,
805        }
806    }
807
808    /// Retrieve multiple vectors by their keys.
809    ///
810    /// Returns a `Vec` of `Option<Vec<f32>>` in the same order as the input keys.
811    /// Each element is `Some(vector)` if the key exists, `None` otherwise.
812    ///
813    /// # Arguments
814    ///
815    /// * `keys` - Slice of unique identifiers to retrieve
816    ///
817    /// # Returns
818    ///
819    /// Vector of optional vectors, maintaining input order.
820    ///
821    /// # Examples
822    ///
823    /// ```
824    /// use go_brrr::embedding::{VectorIndex, Metric};
825    ///
826    /// let index = VectorIndex::new(4, Metric::InnerProduct)?;
827    /// index.reserve(10)?;
828    /// index.add(1, &[1.0, 0.0, 0.0, 0.0])?;
829    /// index.add(3, &[0.0, 1.0, 0.0, 0.0])?;
830    ///
831    /// let results = index.get_batch(&[1, 2, 3]);
832    /// assert!(results[0].is_some());  // Key 1 exists
833    /// assert!(results[1].is_none());  // Key 2 doesn't exist
834    /// assert!(results[2].is_some());  // Key 3 exists
835    /// # Ok::<(), anyhow::Error>(())
836    /// ```
837    #[must_use]
838    pub fn get_batch(&self, keys: &[u64]) -> Vec<Option<Vec<f32>>> {
839        keys.iter().map(|&key| self.get(key)).collect()
840    }
841
842    /// Add a single vector to the index.
843    ///
844    /// # Arguments
845    ///
846    /// * `key` - Unique identifier for the vector (maps to EmbeddingUnit index)
847    /// * `vector` - Embedding vector (must match index dimensions)
848    ///
849    /// # Errors
850    ///
851    /// Returns error if:
852    /// - Vector dimensions don't match index dimensions
853    /// - Index insertion fails
854    ///
855    /// # Examples
856    ///
857    /// ```
858    /// use go_brrr::embedding::{VectorIndex, Metric};
859    ///
860    /// let index = VectorIndex::new(4, Metric::Cosine)?;
861    /// index.reserve(10)?;  // Must reserve capacity before adding vectors
862    /// index.add(42, &[0.1, 0.2, 0.3, 0.4])?;
863    /// # Ok::<(), anyhow::Error>(())
864    /// ```
865    pub fn add(&self, key: u64, vector: &[f32]) -> Result<()> {
866        self.validate_vector_dimensions(vector)?;
867
868        self.inner
869            .add(key, vector)
870            .map_err(|e| anyhow::anyhow!("Failed to add vector {}: {}", key, e))
871    }
872
873    /// Add multiple vectors to the index in batch.
874    ///
875    /// More efficient than calling `add` repeatedly due to better memory locality.
876    ///
877    /// # Arguments
878    ///
879    /// * `keys` - Vector of unique identifiers
880    /// * `vectors` - Slice of embedding vectors (each must match index dimensions)
881    ///
882    /// # Errors
883    ///
884    /// Returns error if:
885    /// - Keys and vectors have different lengths
886    /// - Any vector has wrong dimensions
887    /// - Index insertion fails
888    ///
889    /// # Examples
890    ///
891    /// ```
892    /// use go_brrr::embedding::{VectorIndex, Metric};
893    ///
894    /// let index = VectorIndex::new(4, Metric::InnerProduct)?;
895    /// index.reserve(100)?;
896    ///
897    /// let keys = vec![0, 1, 2];
898    /// let vectors = vec![
899    ///     vec![0.1, 0.2, 0.3, 0.4],
900    ///     vec![0.5, 0.6, 0.7, 0.8],
901    ///     vec![0.9, 0.1, 0.2, 0.3],
902    /// ];
903    ///
904    /// index.add_batch(&keys, &vectors)?;
905    /// # Ok::<(), anyhow::Error>(())
906    /// ```
907    pub fn add_batch(&self, keys: &[u64], vectors: &[Vec<f32>]) -> Result<()> {
908        if keys.len() != vectors.len() {
909            bail!(
910                "Keys and vectors length mismatch: {} keys, {} vectors",
911                keys.len(),
912                vectors.len()
913            );
914        }
915
916        if keys.is_empty() {
917            return Ok(());
918        }
919
920        // Validate all dimensions upfront before any insertion
921        for (i, vector) in vectors.iter().enumerate() {
922            self.validate_vector_dimensions(vector)
923                .with_context(|| format!("Vector at index {}", i))?;
924        }
925
926        // For small batches, sequential insertion has lower overhead
927        if keys.len() < PARALLEL_BATCH_THRESHOLD {
928            for (key, vector) in keys.iter().zip(vectors.iter()) {
929                self.inner
930                    .add(*key, vector)
931                    .map_err(|e| anyhow::anyhow!("Failed to add vector {}: {}", key, e))?;
932            }
933            return Ok(());
934        }
935
936        // Parallel insertion for large batches using rayon
937        // usearch Index is Send + Sync, so concurrent adds are safe
938        let error_count = AtomicUsize::new(0);
939        let first_error_key = AtomicUsize::new(0);
940
941        keys.par_iter()
942            .zip(vectors.par_iter())
943            .for_each(|(key, vector)| {
944                if let Err(_e) = self.inner.add(*key, vector) {
945                    // Record first error (race condition is acceptable - we just need one)
946                    if error_count.fetch_add(1, Ordering::Relaxed) == 0 {
947                        first_error_key.store(*key as usize, Ordering::Relaxed);
948                    }
949                }
950            });
951
952        let errors = error_count.load(Ordering::Relaxed);
953        if errors > 0 {
954            let first_key = first_error_key.load(Ordering::Relaxed);
955            bail!(
956                "Failed to add {} vector(s), first failure at key {}",
957                errors,
958                first_key
959            );
960        }
961
962        Ok(())
963    }
964
965    /// Add multiple vectors sequentially (for when insertion order matters).
966    ///
967    /// Unlike `add_batch`, this method guarantees vectors are added in the
968    /// order provided. Use this when vector order affects search results
969    /// or when debugging insertion issues.
970    ///
971    /// # Arguments
972    ///
973    /// * `keys` - Vector of unique identifiers
974    /// * `vectors` - Slice of embedding vectors (each must match index dimensions)
975    pub fn add_batch_sequential(&self, keys: &[u64], vectors: &[Vec<f32>]) -> Result<()> {
976        if keys.len() != vectors.len() {
977            bail!(
978                "Keys and vectors length mismatch: {} keys, {} vectors",
979                keys.len(),
980                vectors.len()
981            );
982        }
983
984        for (i, (key, vector)) in keys.iter().zip(vectors.iter()).enumerate() {
985            self.validate_vector_dimensions(vector)
986                .with_context(|| format!("Vector at index {}", i))?;
987            self.inner
988                .add(*key, vector)
989                .map_err(|e| anyhow::anyhow!("Failed to add vector {}: {}", key, e))?;
990        }
991
992        Ok(())
993    }
994
995    /// Add multiple vectors from a flat array (more efficient for large batches).
996    ///
997    /// Uses parallel insertion for batches >= 100 vectors. The flat array format
998    /// is more cache-friendly than Vec<Vec<f32>> for large batches.
999    ///
1000    /// # Arguments
1001    ///
1002    /// * `keys` - Vector of unique identifiers
1003    /// * `vectors_flat` - Flat array of all vectors concatenated
1004    ///
1005    /// # Errors
1006    ///
1007    /// Returns error if flat array size doesn't match keys * dimensions.
1008    pub fn add_batch_flat(&self, keys: &[u64], vectors_flat: &[f32]) -> Result<()> {
1009        let expected_len = keys.len() * self.dimensions();
1010        if vectors_flat.len() != expected_len {
1011            bail!(
1012                "Flat vector array size mismatch: expected {} ({} keys * {} dims), got {}",
1013                expected_len,
1014                keys.len(),
1015                self.dimensions(),
1016                vectors_flat.len()
1017            );
1018        }
1019
1020        if keys.is_empty() {
1021            return Ok(());
1022        }
1023
1024        let dims = self.dimensions();
1025
1026        // For small batches, sequential insertion has lower overhead
1027        if keys.len() < PARALLEL_BATCH_THRESHOLD {
1028            for (i, key) in keys.iter().enumerate() {
1029                let start = i * dims;
1030                let end = start + dims;
1031                let vector = &vectors_flat[start..end];
1032
1033                self.inner
1034                    .add(*key, vector)
1035                    .map_err(|e| anyhow::anyhow!("Failed to add vector {}: {}", key, e))?;
1036            }
1037            return Ok(());
1038        }
1039
1040        // Parallel insertion for large batches using rayon
1041        let error_count = AtomicUsize::new(0);
1042        let first_error_key = AtomicUsize::new(0);
1043
1044        keys.par_iter()
1045            .enumerate()
1046            .for_each(|(i, key)| {
1047                let start = i * dims;
1048                let end = start + dims;
1049                let vector = &vectors_flat[start..end];
1050
1051                if let Err(_e) = self.inner.add(*key, vector) {
1052                    if error_count.fetch_add(1, Ordering::Relaxed) == 0 {
1053                        first_error_key.store(*key as usize, Ordering::Relaxed);
1054                    }
1055                }
1056            });
1057
1058        let errors = error_count.load(Ordering::Relaxed);
1059        if errors > 0 {
1060            let first_key = first_error_key.load(Ordering::Relaxed);
1061            bail!(
1062                "Failed to add {} vector(s), first failure at key {}",
1063                errors,
1064                first_key
1065            );
1066        }
1067
1068        Ok(())
1069    }
1070
1071    /// Search for k nearest neighbors to the query vector.
1072    ///
1073    /// Returns results sorted by distance (ascending - closest first).
1074    /// For inner product metric, lower distance = higher similarity.
1075    ///
1076    /// # Arguments
1077    ///
1078    /// * `query` - Query embedding vector (must match index dimensions)
1079    /// * `k` - Maximum number of results to return
1080    ///
1081    /// # Returns
1082    ///
1083    /// Vector of (key, distance) pairs sorted by distance.
1084    ///
1085    /// # Errors
1086    ///
1087    /// Returns error if:
1088    /// - Query dimensions don't match index dimensions
1089    /// - Search fails
1090    ///
1091    /// # Examples
1092    ///
1093    /// ```
1094    /// use go_brrr::embedding::{VectorIndex, Metric};
1095    ///
1096    /// let index = VectorIndex::new(4, Metric::InnerProduct)?;
1097    /// index.reserve(10)?;  // Must reserve capacity before adding vectors
1098    /// index.add(0, &[0.1, 0.2, 0.3, 0.4])?;
1099    /// index.add(1, &[0.5, 0.6, 0.7, 0.8])?;
1100    ///
1101    /// let results = index.search(&[0.1, 0.2, 0.3, 0.4], 10)?;
1102    /// // results[0] should be (0, ~0.0) - exact match
1103    /// # Ok::<(), anyhow::Error>(())
1104    /// ```
1105    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u64, f32)>> {
1106        self.validate_vector_dimensions(query)?;
1107
1108        if self.is_empty() {
1109            return Ok(Vec::new());
1110        }
1111
1112        // Limit k to actual index size
1113        let k = k.min(self.len());
1114
1115        let matches = self
1116            .inner
1117            .search(query, k)
1118            .map_err(|e| anyhow::anyhow!("Search failed: {}", e))?;
1119
1120        Ok(matches.keys.into_iter().zip(matches.distances).collect())
1121    }
1122
1123    /// Search with exact brute-force computation (slower but guaranteed optimal).
1124    ///
1125    /// Use this for verification or when approximate results aren't acceptable.
1126    pub fn search_exact(&self, query: &[f32], k: usize) -> Result<Vec<(u64, f32)>> {
1127        self.validate_vector_dimensions(query)?;
1128
1129        if self.is_empty() {
1130            return Ok(Vec::new());
1131        }
1132
1133        let k = k.min(self.len());
1134
1135        let matches = self
1136            .inner
1137            .exact_search(query, k)
1138            .map_err(|e| anyhow::anyhow!("Exact search failed: {}", e))?;
1139
1140        Ok(matches.keys.into_iter().zip(matches.distances).collect())
1141    }
1142
1143    /// Search with a filter predicate.
1144    ///
1145    /// Only returns results where the filter function returns true for the key.
1146    ///
1147    /// # Arguments
1148    ///
1149    /// * `query` - Query embedding vector
1150    /// * `k` - Maximum number of results
1151    /// * `filter` - Predicate function: `fn(key) -> bool`
1152    ///
1153    /// # Examples
1154    ///
1155    /// ```
1156    /// use go_brrr::embedding::{VectorIndex, Metric};
1157    ///
1158    /// let index = VectorIndex::new(4, Metric::InnerProduct)?;
1159    /// index.reserve(100)?;  // Must reserve capacity before adding vectors
1160    /// // Add vectors with keys 0-99
1161    /// for i in 0..100 {
1162    ///     index.add(i, &[0.1 * i as f32; 4])?;
1163    /// }
1164    ///
1165    /// // Only search among even-keyed vectors
1166    /// let results = index.search_filtered(&[0.5; 4], 10, |key| key % 2 == 0)?;
1167    /// assert!(results.iter().all(|(k, _)| k % 2 == 0));
1168    /// # Ok::<(), anyhow::Error>(())
1169    /// ```
1170    pub fn search_filtered<F>(&self, query: &[f32], k: usize, filter: F) -> Result<Vec<(u64, f32)>>
1171    where
1172        F: Fn(u64) -> bool,
1173    {
1174        self.validate_vector_dimensions(query)?;
1175
1176        if self.is_empty() {
1177            return Ok(Vec::new());
1178        }
1179
1180        let k = k.min(self.len());
1181
1182        let matches = self
1183            .inner
1184            .filtered_search(query, k, filter)
1185            .map_err(|e| anyhow::anyhow!("Filtered search failed: {}", e))?;
1186
1187        Ok(matches.keys.into_iter().zip(matches.distances).collect())
1188    }
1189
1190    /// Search with a distance threshold filter.
1191    ///
1192    /// Returns only results where the distance is less than or equal to the threshold.
1193    /// This is useful for semantic search where you want to filter out dissimilar results.
1194    ///
1195    /// # Arguments
1196    ///
1197    /// * `query` - Query embedding vector
1198    /// * `k` - Maximum number of results
1199    /// * `threshold` - Maximum distance threshold (results with distance > threshold are filtered)
1200    ///
1201    /// # Examples
1202    ///
1203    /// ```
1204    /// use go_brrr::embedding::{VectorIndex, Metric};
1205    ///
1206    /// let index = VectorIndex::new(4, Metric::InnerProduct)?;
1207    /// index.reserve(10)?;
1208    /// index.add(0, &[1.0, 0.0, 0.0, 0.0])?;
1209    /// index.add(1, &[0.9, 0.1, 0.0, 0.0])?;
1210    /// index.add(2, &[0.0, 0.0, 1.0, 0.0])?;
1211    ///
1212    /// // Search with threshold to filter distant results
1213    /// let results = index.search_threshold(&[1.0, 0.0, 0.0, 0.0], 10, 0.5)?;
1214    /// // Only returns vectors within distance 0.5
1215    /// # Ok::<(), anyhow::Error>(())
1216    /// ```
1217    pub fn search_threshold(
1218        &self,
1219        query: &[f32],
1220        k: usize,
1221        threshold: f32,
1222    ) -> Result<Vec<(u64, f32)>> {
1223        self.validate_vector_dimensions(query)?;
1224
1225        if self.is_empty() {
1226            return Ok(Vec::new());
1227        }
1228
1229        let k = k.min(self.len());
1230
1231        let matches = self
1232            .inner
1233            .search(query, k)
1234            .map_err(|e| anyhow::anyhow!("Search failed: {}", e))?;
1235
1236        // Filter results by threshold
1237        Ok(matches
1238            .keys
1239            .into_iter()
1240            .zip(matches.distances)
1241            .filter(|(_, dist)| *dist <= threshold)
1242            .collect())
1243    }
1244
1245    /// Remove a vector by key.
1246    ///
1247    /// # Returns
1248    ///
1249    /// Number of vectors removed (0 or 1 for non-multi indexes).
1250    pub fn remove(&self, key: u64) -> Result<usize> {
1251        self.inner
1252            .remove(key)
1253            .map_err(|e| anyhow::anyhow!("Failed to remove vector {}: {}", key, e))
1254    }
1255
1256    /// Rename a key in the index.
1257    ///
1258    /// Moves all vectors from `from_key` to `to_key`.
1259    /// Returns the number of vectors renamed (0 if `from_key` didn't exist).
1260    ///
1261    /// # Errors
1262    ///
1263    /// Returns error if:
1264    /// - `to_key` already exists in the index (would cause data loss)
1265    /// - The underlying rename operation fails
1266    ///
1267    /// # Examples
1268    ///
1269    /// ```
1270    /// use go_brrr::embedding::{VectorIndex, IndexConfig, Metric};
1271    ///
1272    /// let config = IndexConfig::new(4).with_metric(Metric::InnerProduct);
1273    /// let index = VectorIndex::with_config(config)?;
1274    /// index.reserve(10)?;
1275    ///
1276    /// index.add(42, &[1.0, 0.0, 0.0, 0.0])?;
1277    /// assert!(index.contains(42));
1278    ///
1279    /// let renamed = index.rename(42, 100)?;
1280    /// assert_eq!(renamed, 1);
1281    /// assert!(!index.contains(42));
1282    /// assert!(index.contains(100));
1283    /// # Ok::<(), anyhow::Error>(())
1284    /// ```
1285    pub fn rename(&self, from_key: u64, to_key: u64) -> Result<usize> {
1286        // Check if target key already exists (would cause data loss)
1287        if self.contains(to_key) {
1288            bail!(
1289                "Cannot rename key {} to {}: target key already exists",
1290                from_key,
1291                to_key
1292            );
1293        }
1294
1295        self.inner
1296            .rename(from_key, to_key)
1297            .map_err(|e| anyhow::anyhow!("Failed to rename key {} to {}: {}", from_key, to_key, e))
1298    }
1299
1300    /// Rename a key, overwriting target if it exists.
1301    ///
1302    /// Removes any existing vectors at `to_key` before renaming.
1303    /// Returns the number of vectors renamed (0 if `from_key` didn't exist).
1304    ///
1305    /// # Examples
1306    ///
1307    /// ```
1308    /// use go_brrr::embedding::{VectorIndex, IndexConfig, Metric};
1309    ///
1310    /// let config = IndexConfig::new(4).with_metric(Metric::InnerProduct);
1311    /// let index = VectorIndex::with_config(config)?;
1312    /// index.reserve(10)?;
1313    ///
1314    /// index.add(42, &[1.0, 0.0, 0.0, 0.0])?;
1315    /// index.add(100, &[0.0, 1.0, 0.0, 0.0])?;  // Existing key
1316    ///
1317    /// let renamed = index.rename_overwrite(42, 100)?;
1318    /// assert_eq!(renamed, 1);
1319    /// assert!(!index.contains(42));
1320    /// assert!(index.contains(100));
1321    /// # Ok::<(), anyhow::Error>(())
1322    /// ```
1323    pub fn rename_overwrite(&self, from_key: u64, to_key: u64) -> Result<usize> {
1324        // Remove target first if it exists
1325        if self.contains(to_key) {
1326            self.remove(to_key)?;
1327        }
1328
1329        self.inner
1330            .rename(from_key, to_key)
1331            .map_err(|e| anyhow::anyhow!("Failed to rename key {} to {}: {}", from_key, to_key, e))
1332    }
1333
1334    /// Clear all vectors from the index.
1335    ///
1336    /// Releases memory back to the OS.
1337    pub fn clear(&self) -> Result<()> {
1338        self.inner
1339            .reset()
1340            .map_err(|e| anyhow::anyhow!("Failed to clear index: {}", e))
1341    }
1342
1343    /// Save the index to a file atomically.
1344    ///
1345    /// Uses a temp file + atomic rename pattern to prevent corruption on crash.
1346    /// Creates a `.usearch` file that can be loaded with [`load`](Self::load)
1347    /// or memory-mapped with [`view`](Self::view).
1348    ///
1349    /// # Atomic Write Strategy
1350    ///
1351    /// 1. Creates a temporary file in the same directory as the target
1352    /// 2. Writes index data to the temporary file
1353    /// 3. Atomically renames the temp file to the target path
1354    ///
1355    /// This ensures that if a crash occurs mid-write, the original file (if any)
1356    /// remains intact. The atomic rename is guaranteed by POSIX on Unix systems
1357    /// when source and destination are on the same filesystem.
1358    ///
1359    /// # Arguments
1360    ///
1361    /// * `path` - File path to save to (typically with `.usearch` extension)
1362    ///
1363    /// # Errors
1364    ///
1365    /// Returns error if:
1366    /// - Parent directory does not exist or is not writable
1367    /// - Temporary file cannot be created
1368    /// - Index serialization fails
1369    /// - Atomic rename fails
1370    ///
1371    /// # Examples
1372    ///
1373    /// ```no_run
1374    /// use go_brrr::embedding::{VectorIndex, Metric};
1375    ///
1376    /// let index = VectorIndex::new(768, Metric::InnerProduct)?;
1377    /// index.reserve(100)?;
1378    /// // ... add vectors ...
1379    /// index.save("./index.usearch")?;  // Crash-safe write
1380    /// # Ok::<(), anyhow::Error>(())
1381    /// ```
1382    pub fn save(&self, path: impl AsRef<Path>) -> Result<()> {
1383        let path = path.as_ref();
1384
1385        // Get parent directory for temp file (must be same filesystem for atomic rename)
1386        let parent = path.parent().unwrap_or_else(|| Path::new("."));
1387
1388        // Ensure parent directory exists
1389        if !parent.exists() {
1390            bail!("Parent directory does not exist: {}", parent.display());
1391        }
1392
1393        // Create temp file in same directory (required for atomic rename on same filesystem)
1394        let temp_file = NamedTempFile::new_in(parent)
1395            .with_context(|| format!("Failed to create temp file in '{}'", parent.display()))?;
1396
1397        let temp_path = temp_file.path();
1398        let temp_path_str = temp_path.to_string_lossy();
1399
1400        // Save to temp file
1401        self.inner.save(&temp_path_str).map_err(|e| {
1402            anyhow::anyhow!(
1403                "Failed to save index to temp file '{}': {}",
1404                temp_path_str,
1405                e
1406            )
1407        })?;
1408
1409        // Atomic rename: persist() keeps the temp file and renames it to target
1410        temp_file.persist(path).with_context(|| {
1411            format!(
1412                "Failed to atomically rename temp file to '{}'",
1413                path.display()
1414            )
1415        })?;
1416
1417        Ok(())
1418    }
1419
1420    /// Save the index directly to file without atomic write protection.
1421    ///
1422    /// This is faster than [`save`](Self::save) but risky: if a crash occurs
1423    /// mid-write, the file may be corrupted. Use only when:
1424    /// - Speed is critical and corruption risk is acceptable
1425    /// - The file is easily regenerable
1426    /// - You have your own backup/recovery mechanism
1427    ///
1428    /// # Arguments
1429    ///
1430    /// * `path` - File path to save to (typically with `.usearch` extension)
1431    ///
1432    /// # Errors
1433    ///
1434    /// Returns error if file cannot be written.
1435    ///
1436    /// # Examples
1437    ///
1438    /// ```no_run
1439    /// use go_brrr::embedding::{VectorIndex, Metric};
1440    ///
1441    /// let index = VectorIndex::new(768, Metric::InnerProduct)?;
1442    /// index.reserve(100)?;
1443    /// // ... add vectors ...
1444    /// index.save_unsafe("./index.usearch")?;  // Faster but not crash-safe
1445    /// # Ok::<(), anyhow::Error>(())
1446    /// ```
1447    pub fn save_unsafe(&self, path: impl AsRef<Path>) -> Result<()> {
1448        let path_str = path.as_ref().to_string_lossy();
1449        self.inner
1450            .save(&path_str)
1451            .map_err(|e| anyhow::anyhow!("Failed to save index to '{}': {}", path_str, e))
1452    }
1453
1454    /// Restore an index from file, reading configuration from the file itself.
1455    ///
1456    /// This is the SAFE way to load an index - dimensions and settings are
1457    /// read from the file header, not provided by the caller. This prevents
1458    /// dimension mismatches that could cause undefined behavior.
1459    ///
1460    /// # Arguments
1461    ///
1462    /// * `path` - Path to `.usearch` file
1463    ///
1464    /// # Errors
1465    ///
1466    /// Returns error if:
1467    /// - File does not exist or cannot be read
1468    /// - File format is invalid
1469    ///
1470    /// # Examples
1471    ///
1472    /// ```no_run
1473    /// use go_brrr::embedding::VectorIndex;
1474    ///
1475    /// // Safe load - dimensions come from the file
1476    /// let index = VectorIndex::restore("./index.usearch")?;
1477    /// println!("Loaded index with {} dimensions", index.dimensions());
1478    /// # Ok::<(), anyhow::Error>(())
1479    /// ```
1480    pub fn restore(path: impl AsRef<Path>) -> Result<Self> {
1481        let path = path.as_ref();
1482
1483        if !path.exists() {
1484            bail!("Index file not found: {}", path.display());
1485        }
1486
1487        let path_str = path.to_string_lossy();
1488
1489        // Create index with default options - usearch will update dimensions on load
1490        let default_opts = IndexOptions::default();
1491        let inner = Index::new(&default_opts)
1492            .map_err(|e| anyhow::anyhow!("Failed to create usearch index: {}", e))?;
1493
1494        // Load from file - this reads dimensions from the file header
1495        inner
1496            .load(&path_str)
1497            .map_err(|e| anyhow::anyhow!("Failed to load index from '{}': {}", path_str, e))?;
1498
1499        // Read actual dimensions from the loaded index
1500        let dimensions = inner.dimensions();
1501        if dimensions == 0 {
1502            bail!(
1503                "Invalid index file '{}': loaded index has 0 dimensions",
1504                path_str
1505            );
1506        }
1507
1508        // Build config from actual loaded values
1509        // Note: metric and quantization cannot be reliably read from usearch file format,
1510        // so we use defaults. Users needing specific settings should use load_validated().
1511        let config = IndexConfig {
1512            dimensions,
1513            metric: Metric::default(),
1514            quantization: Quantization::default(),
1515            connectivity: 0,
1516            expansion_add: 0,
1517            expansion_search: 0,
1518            multi: false,
1519        };
1520
1521        Ok(Self { inner, config })
1522    }
1523
1524    /// Load an index with explicit configuration validation.
1525    ///
1526    /// First loads the index safely (reading dimensions from file), then
1527    /// validates that the loaded dimensions match the expected config.
1528    /// This is useful when you need to ensure the loaded index matches
1529    /// expected parameters.
1530    ///
1531    /// # Arguments
1532    ///
1533    /// * `path` - Path to `.usearch` file
1534    /// * `config` - Expected configuration (dimensions will be validated)
1535    ///
1536    /// # Errors
1537    ///
1538    /// Returns error if:
1539    /// - File does not exist or cannot be read
1540    /// - File format is invalid
1541    /// - Loaded dimensions don't match config dimensions
1542    ///
1543    /// # Examples
1544    ///
1545    /// ```no_run
1546    /// use go_brrr::embedding::{VectorIndex, IndexConfig, Metric};
1547    ///
1548    /// // Load with validation - error if dimensions don't match
1549    /// let config = IndexConfig::new(768).with_metric(Metric::InnerProduct);
1550    /// let index = VectorIndex::load_validated("./index.usearch", config)?;
1551    /// # Ok::<(), anyhow::Error>(())
1552    /// ```
1553    pub fn load_validated(path: impl AsRef<Path>, config: IndexConfig) -> Result<Self> {
1554        let path = path.as_ref();
1555        let mut index = Self::restore(path)?;
1556
1557        // Validate dimensions match expected config
1558        if index.dimensions() != config.dimensions {
1559            bail!(
1560                "Dimension mismatch: expected {} but loaded index has {}. \
1561                 Use VectorIndex::restore() to load without dimension constraints.",
1562                config.dimensions,
1563                index.dimensions()
1564            );
1565        }
1566
1567        // Update config with user-specified settings (metric, quantization, etc.)
1568        // The dimensions are already validated to match
1569        index.config = config;
1570
1571        Ok(index)
1572    }
1573
1574    /// Load an index from a file (legacy API, prefer `restore` or `load_validated`).
1575    ///
1576    /// This method is kept for backward compatibility. It now safely loads the
1577    /// index first, then validates dimensions. For new code, prefer:
1578    /// - [`restore`](Self::restore): Safe load without dimension constraints
1579    /// - [`load_validated`](Self::load_validated): Safe load with dimension validation
1580    ///
1581    /// # Arguments
1582    ///
1583    /// * `path` - Path to `.usearch` file
1584    /// * `config` - Configuration matching the saved index
1585    ///
1586    /// # Errors
1587    ///
1588    /// Returns error if:
1589    /// - File does not exist or cannot be read
1590    /// - File format is invalid
1591    /// - Configuration dimensions don't match saved index
1592    #[deprecated(
1593        since = "1.2.0",
1594        note = "Use `restore()` for safe loading or `load_validated()` for validated loading"
1595    )]
1596    #[allow(clippy::missing_errors_doc)]
1597    pub fn load(path: impl AsRef<Path>, config: IndexConfig) -> Result<Self> {
1598        Self::load_validated(path, config)
1599    }
1600
1601    /// Create a memory-mapped view of an index file, reading config from file.
1602    ///
1603    /// This is the SAFE way to view an index - dimensions and settings are
1604    /// read from the file header, not provided by the caller. This prevents
1605    /// dimension mismatches that could cause undefined behavior.
1606    ///
1607    /// Does not load the index into memory - reads directly from the file.
1608    /// Ideal for large indexes that don't fit in RAM.
1609    ///
1610    /// # Safety Note
1611    ///
1612    /// The file must not be modified or deleted while the view is active.
1613    ///
1614    /// # Arguments
1615    ///
1616    /// * `path` - Path to `.usearch` file
1617    ///
1618    /// # Examples
1619    ///
1620    /// ```no_run
1621    /// use go_brrr::embedding::VectorIndex;
1622    ///
1623    /// // Safe view - dimensions come from the file
1624    /// let index = VectorIndex::view_restore("./index.usearch")?;
1625    /// println!("Viewing index with {} dimensions", index.dimensions());
1626    /// # Ok::<(), anyhow::Error>(())
1627    /// ```
1628    pub fn view_restore(path: impl AsRef<Path>) -> Result<Self> {
1629        let path = path.as_ref();
1630
1631        if !path.exists() {
1632            bail!("Index file not found: {}", path.display());
1633        }
1634
1635        let path_str = path.to_string_lossy();
1636
1637        // Create index with default options - usearch will read dimensions from file
1638        let default_opts = IndexOptions::default();
1639        let inner = Index::new(&default_opts)
1640            .map_err(|e| anyhow::anyhow!("Failed to create usearch index: {}", e))?;
1641
1642        // Memory-map the file - this reads dimensions from the file header
1643        inner
1644            .view(&path_str)
1645            .map_err(|e| anyhow::anyhow!("Failed to view index from '{}': {}", path_str, e))?;
1646
1647        // Read actual dimensions from the viewed index
1648        let dimensions = inner.dimensions();
1649        if dimensions == 0 {
1650            bail!(
1651                "Invalid index file '{}': viewed index has 0 dimensions",
1652                path_str
1653            );
1654        }
1655
1656        // Build config from actual viewed values
1657        let config = IndexConfig {
1658            dimensions,
1659            metric: Metric::default(),
1660            quantization: Quantization::default(),
1661            connectivity: 0,
1662            expansion_add: 0,
1663            expansion_search: 0,
1664            multi: false,
1665        };
1666
1667        Ok(Self { inner, config })
1668    }
1669
1670    /// View an index with explicit configuration validation.
1671    ///
1672    /// First views the index safely (reading dimensions from file), then
1673    /// validates that the dimensions match the expected config.
1674    ///
1675    /// # Arguments
1676    ///
1677    /// * `path` - Path to `.usearch` file
1678    /// * `config` - Expected configuration (dimensions will be validated)
1679    ///
1680    /// # Safety Note
1681    ///
1682    /// The file must not be modified or deleted while the view is active.
1683    ///
1684    /// # Errors
1685    ///
1686    /// Returns error if:
1687    /// - File does not exist or cannot be read
1688    /// - File format is invalid
1689    /// - Loaded dimensions don't match config dimensions
1690    ///
1691    /// # Examples
1692    ///
1693    /// ```no_run
1694    /// use go_brrr::embedding::{VectorIndex, IndexConfig, Metric};
1695    ///
1696    /// // View with validation - error if dimensions don't match
1697    /// let config = IndexConfig::new(768).with_metric(Metric::InnerProduct);
1698    /// let index = VectorIndex::view_validated("./index.usearch", config)?;
1699    /// # Ok::<(), anyhow::Error>(())
1700    /// ```
1701    pub fn view_validated(path: impl AsRef<Path>, config: IndexConfig) -> Result<Self> {
1702        let path = path.as_ref();
1703        let mut index = Self::view_restore(path)?;
1704
1705        // Validate dimensions match expected config
1706        if index.dimensions() != config.dimensions {
1707            bail!(
1708                "Dimension mismatch: expected {} but viewed index has {}. \
1709                 Use VectorIndex::view_restore() to view without dimension constraints.",
1710                config.dimensions,
1711                index.dimensions()
1712            );
1713        }
1714
1715        // Update config with user-specified settings
1716        index.config = config;
1717
1718        Ok(index)
1719    }
1720
1721    /// Create a memory-mapped view of an index file (legacy API).
1722    ///
1723    /// This method is kept for backward compatibility. It now safely views the
1724    /// index first, then validates dimensions. For new code, prefer:
1725    /// - [`view_restore`](Self::view_restore): Safe view without dimension constraints
1726    /// - [`view_validated`](Self::view_validated): Safe view with dimension validation
1727    ///
1728    /// # Arguments
1729    ///
1730    /// * `path` - Path to `.usearch` file
1731    /// * `config` - Configuration matching the saved index
1732    ///
1733    /// # Safety Note
1734    ///
1735    /// The file must not be modified or deleted while the view is active.
1736    #[deprecated(
1737        since = "1.2.0",
1738        note = "Use `view_safe()` for proper file lifecycle management"
1739    )]
1740    #[allow(clippy::missing_errors_doc)]
1741    pub fn view(path: impl AsRef<Path>, config: IndexConfig) -> Result<Self> {
1742        Self::view_validated(path, config)
1743    }
1744
1745    // =========================================================================
1746    // Safe Memory-Mapped Views (with file lifecycle management)
1747    // =========================================================================
1748
1749    /// Create a memory-mapped view with proper file lifecycle management.
1750    ///
1751    /// This is the SAFEST way to view an index. The returned `IndexView`:
1752    /// - Keeps the file handle open to prevent file deletion issues
1753    /// - Reads dimensions from the file header automatically
1754    /// - Provides `is_valid()` to check if the backing file still exists
1755    ///
1756    /// On Unix systems, keeping the file handle open ensures the data remains
1757    /// accessible even if the file is unlinked from the filesystem.
1758    ///
1759    /// # Arguments
1760    ///
1761    /// * `path` - Path to `.usearch` file
1762    ///
1763    /// # Safety Note
1764    ///
1765    /// The file must not be **modified** while the view is active. However,
1766    /// deletion is handled safely by keeping the file handle open.
1767    ///
1768    /// # Examples
1769    ///
1770    /// ```no_run
1771    /// use go_brrr::embedding::VectorIndex;
1772    ///
1773    /// // Create a safe view with file lifecycle management
1774    /// let view = VectorIndex::view_safe("./index.usearch")?;
1775    /// assert!(view.is_valid());
1776    /// println!("Viewing index with {} dimensions", view.dimensions());
1777    ///
1778    /// // View is usable even if file gets deleted (on Unix)
1779    /// let results = view.search(&[1.0, 0.0, 0.0, 0.0], 10)?;
1780    /// # Ok::<(), anyhow::Error>(())
1781    /// ```
1782    pub fn view_safe(path: impl AsRef<Path>) -> Result<IndexView> {
1783        let path = path.as_ref().to_path_buf();
1784
1785        // Open file handle first - this keeps the file accessible even if deleted
1786        let file = File::open(&path)
1787            .with_context(|| format!("Failed to open index file: {}", path.display()))?;
1788
1789        // Create the memory-mapped view
1790        let index = Self::view_restore(&path)?;
1791
1792        Ok(IndexView {
1793            inner: index,
1794            _file_handle: Arc::new(file),
1795            path,
1796        })
1797    }
1798
1799    /// Create a safe view with explicit configuration validation.
1800    ///
1801    /// Combines the safety of [`view_safe`](Self::view_safe) with dimension
1802    /// validation. Returns an error if the file dimensions don't match the
1803    /// expected config.
1804    ///
1805    /// # Arguments
1806    ///
1807    /// * `path` - Path to `.usearch` file
1808    /// * `config` - Expected configuration (dimensions will be validated)
1809    ///
1810    /// # Safety Note
1811    ///
1812    /// The file must not be **modified** while the view is active. However,
1813    /// deletion is handled safely by keeping the file handle open.
1814    ///
1815    /// # Errors
1816    ///
1817    /// Returns error if:
1818    /// - File does not exist or cannot be read
1819    /// - File format is invalid
1820    /// - Loaded dimensions don't match config dimensions (if config.dimensions != 0)
1821    ///
1822    /// # Examples
1823    ///
1824    /// ```no_run
1825    /// use go_brrr::embedding::{VectorIndex, IndexConfig, Metric};
1826    ///
1827    /// // Safe view with dimension validation
1828    /// let config = IndexConfig::new(768).with_metric(Metric::InnerProduct);
1829    /// let view = VectorIndex::view_validated_safe("./index.usearch", config)?;
1830    ///
1831    /// // Can use view methods or deref to VectorIndex
1832    /// let results = view.search(&[0.1f32; 768], 10)?;
1833    /// # Ok::<(), anyhow::Error>(())
1834    /// ```
1835    pub fn view_validated_safe(
1836        path: impl AsRef<Path>,
1837        config: IndexConfig,
1838    ) -> Result<IndexView> {
1839        let view = Self::view_safe(&path)?;
1840
1841        // Validate dimensions if config specifies them (0 means no validation)
1842        if config.dimensions != 0 && view.inner.dimensions() != config.dimensions {
1843            bail!(
1844                "Dimension mismatch: expected {} but viewed index has {}. \
1845                 Use VectorIndex::view_safe() to view without dimension constraints.",
1846                config.dimensions,
1847                view.inner.dimensions()
1848            );
1849        }
1850
1851        Ok(view)
1852    }
1853
1854    // =========================================================================
1855    // In-Memory Serialization (for Redis caching, network transfer, etc.)
1856    // =========================================================================
1857
1858    /// Get the serialized size of the index in bytes.
1859    ///
1860    /// Use this to pre-allocate buffers before calling [`to_bytes`](Self::to_bytes).
1861    /// The returned size includes all index data and metadata.
1862    ///
1863    /// # Examples
1864    ///
1865    /// ```
1866    /// use go_brrr::embedding::{VectorIndex, Metric};
1867    ///
1868    /// let index = VectorIndex::new(4, Metric::InnerProduct)?;
1869    /// index.reserve(10)?;
1870    /// index.add(0, &[1.0, 0.0, 0.0, 0.0])?;
1871    ///
1872    /// let size = index.serialized_size();
1873    /// println!("Index will serialize to {} bytes", size);
1874    /// # Ok::<(), anyhow::Error>(())
1875    /// ```
1876    #[must_use]
1877    pub fn serialized_size(&self) -> usize {
1878        self.inner.serialized_length()
1879    }
1880
1881    /// Check if there's enough disk space to save the index.
1882    ///
1883    /// Uses the `fs2` crate for cross-platform disk space detection.
1884    /// Includes a safety margin (10% or at least 1MB) to account for
1885    /// filesystem overhead and metadata.
1886    ///
1887    /// # Arguments
1888    ///
1889    /// * `path` - Target file path. The parent directory is checked for available space.
1890    ///
1891    /// # Returns
1892    ///
1893    /// - `Ok(true)` if there's enough space
1894    /// - `Ok(false)` if there's insufficient space
1895    /// - `Err` if the disk space check fails (e.g., path doesn't exist)
1896    ///
1897    /// # Examples
1898    ///
1899    /// ```no_run
1900    /// use go_brrr::embedding::{VectorIndex, Metric};
1901    ///
1902    /// let index = VectorIndex::new(768, Metric::InnerProduct)?;
1903    /// index.reserve(10000)?;
1904    /// // ... add vectors ...
1905    ///
1906    /// if index.check_disk_space("./index.usearch")? {
1907    ///     index.save("./index.usearch")?;
1908    /// } else {
1909    ///     eprintln!("Insufficient disk space!");
1910    /// }
1911    /// # Ok::<(), anyhow::Error>(())
1912    /// ```
1913    pub fn check_disk_space(&self, path: impl AsRef<Path>) -> Result<bool> {
1914        let path = path.as_ref();
1915        let required = self.serialized_size() as u64;
1916
1917        // Add safety margin: 10% or at least 1MB
1918        let safety_margin = std::cmp::max(required / 10, 1024 * 1024);
1919        let required_with_margin = required.saturating_add(safety_margin);
1920
1921        // Get parent directory (where space matters)
1922        let parent = path.parent().unwrap_or_else(|| Path::new("."));
1923
1924        // If parent doesn't exist, we can't check space
1925        if !parent.exists() {
1926            bail!(
1927                "Cannot check disk space: parent directory does not exist: {}",
1928                parent.display()
1929            );
1930        }
1931
1932        // Use fs2 for cross-platform disk space detection
1933        let available = fs2::available_space(parent)
1934            .with_context(|| format!("Failed to check disk space for '{}'", parent.display()))?;
1935
1936        Ok(available >= required_with_margin)
1937    }
1938
1939    /// Get detailed disk space information for the target path.
1940    ///
1941    /// Returns available space, required space, and whether save would succeed.
1942    ///
1943    /// # Arguments
1944    ///
1945    /// * `path` - Target file path
1946    ///
1947    /// # Returns
1948    ///
1949    /// Tuple of `(available_bytes, required_bytes, has_enough_space)`
1950    ///
1951    /// # Errors
1952    ///
1953    /// Returns error if disk space cannot be determined.
1954    pub fn disk_space_info(&self, path: impl AsRef<Path>) -> Result<(u64, u64, bool)> {
1955        let path = path.as_ref();
1956        let required = self.serialized_size() as u64;
1957        let safety_margin = std::cmp::max(required / 10, 1024 * 1024);
1958        let required_with_margin = required.saturating_add(safety_margin);
1959
1960        let parent = path.parent().unwrap_or_else(|| Path::new("."));
1961
1962        if !parent.exists() {
1963            bail!(
1964                "Cannot check disk space: parent directory does not exist: {}",
1965                parent.display()
1966            );
1967        }
1968
1969        let available = fs2::available_space(parent)
1970            .with_context(|| format!("Failed to check disk space for '{}'", parent.display()))?;
1971
1972        Ok((
1973            available,
1974            required_with_margin,
1975            available >= required_with_margin,
1976        ))
1977    }
1978
1979    /// Save index with disk space verification.
1980    ///
1981    /// This is the SAFEST way to save an index. It:
1982    /// 1. Checks available disk space before writing
1983    /// 2. Uses atomic write (temp file + rename)
1984    /// 3. Returns detailed information about the save operation
1985    ///
1986    /// # Arguments
1987    ///
1988    /// * `path` - File path to save to (typically with `.usearch` extension)
1989    ///
1990    /// # Returns
1991    ///
1992    /// [`SaveInfo`] containing path, size, duration, and space information.
1993    ///
1994    /// # Errors
1995    ///
1996    /// Returns error if:
1997    /// - Insufficient disk space (with detailed message)
1998    /// - Parent directory does not exist
1999    /// - File write fails
2000    ///
2001    /// # Examples
2002    ///
2003    /// ```no_run
2004    /// use go_brrr::embedding::{VectorIndex, Metric};
2005    ///
2006    /// let index = VectorIndex::new(768, Metric::InnerProduct)?;
2007    /// index.reserve(10000)?;
2008    /// // ... add vectors ...
2009    ///
2010    /// let info = index.save_checked("./index.usearch")?;
2011    /// println!("Saved {} bytes in {:?}", info.size_bytes, info.elapsed);
2012    /// # Ok::<(), anyhow::Error>(())
2013    /// ```
2014    pub fn save_checked(&self, path: impl AsRef<Path>) -> Result<SaveInfo> {
2015        let path = path.as_ref();
2016        let required = self.serialized_size() as u64;
2017        let safety_margin = std::cmp::max(required / 10, 1024 * 1024);
2018        let required_with_margin = required.saturating_add(safety_margin);
2019
2020        let parent = path.parent().unwrap_or_else(|| Path::new("."));
2021
2022        // Check parent directory exists
2023        if !parent.exists() {
2024            bail!("Parent directory does not exist: {}", parent.display());
2025        }
2026
2027        // Check available disk space
2028        let available = fs2::available_space(parent)
2029            .with_context(|| format!("Failed to check disk space for '{}'", parent.display()))?;
2030
2031        if available < required_with_margin {
2032            bail!(
2033                "Insufficient disk space to save index: \
2034                 need {} bytes (including {}% safety margin), \
2035                 but only {} bytes available on '{}'",
2036                required_with_margin,
2037                10,
2038                available,
2039                parent.display()
2040            );
2041        }
2042
2043        // Perform the save operation with timing
2044        let start = Instant::now();
2045        self.save(path)?;
2046        let elapsed = start.elapsed();
2047
2048        Ok(SaveInfo {
2049            path: path.to_path_buf(),
2050            size_bytes: required as usize,
2051            elapsed,
2052            available_before: available,
2053            space_remaining: available.saturating_sub(required),
2054        })
2055    }
2056
2057    /// Serialize the index to a byte vector.
2058    ///
2059    /// This enables storing the index in Redis, sending over network, or any
2060    /// other scenario where file-based persistence is not suitable.
2061    ///
2062    /// The serialized format includes all vectors and the HNSW graph structure.
2063    /// Use [`from_bytes`](Self::from_bytes) to deserialize.
2064    ///
2065    /// # Performance
2066    ///
2067    /// Serialization is O(n) where n is the number of vectors. The resulting
2068    /// buffer size is approximately `serialized_size()` bytes.
2069    ///
2070    /// # Errors
2071    ///
2072    /// Returns error if serialization fails (e.g., internal usearch error).
2073    ///
2074    /// # Examples
2075    ///
2076    /// ```
2077    /// use go_brrr::embedding::{VectorIndex, IndexConfig, Metric};
2078    ///
2079    /// let config = IndexConfig::new(4).with_metric(Metric::InnerProduct);
2080    /// let index = VectorIndex::with_config(config.clone())?;
2081    /// index.reserve(10)?;
2082    /// index.add(1, &[1.0, 0.0, 0.0, 0.0])?;
2083    ///
2084    /// // Serialize to bytes
2085    /// let bytes = index.to_bytes()?;
2086    ///
2087    /// // Store in Redis, send over network, etc.
2088    /// // redis_client.set("my_index", &bytes)?;
2089    ///
2090    /// // Later, deserialize
2091    /// let restored = VectorIndex::from_bytes(&bytes, config)?;
2092    /// assert_eq!(restored.len(), 1);
2093    /// # Ok::<(), anyhow::Error>(())
2094    /// ```
2095    pub fn to_bytes(&self) -> Result<Vec<u8>> {
2096        let size = self.serialized_size();
2097        let mut buffer = vec![0u8; size];
2098
2099        self.inner
2100            .save_to_buffer(&mut buffer)
2101            .map_err(|e| anyhow::anyhow!("Failed to serialize index to buffer: {}", e))?;
2102
2103        Ok(buffer)
2104    }
2105
2106    /// Load an index from a byte slice.
2107    ///
2108    /// Deserializes an index previously serialized with [`to_bytes`](Self::to_bytes).
2109    /// The provided config specifies index parameters; dimensions are validated
2110    /// against the loaded data.
2111    ///
2112    /// # Arguments
2113    ///
2114    /// * `data` - Serialized index data from `to_bytes()`
2115    /// * `config` - Index configuration. If `dimensions` is 0, dimensions are
2116    ///   read from the serialized data without validation.
2117    ///
2118    /// # Errors
2119    ///
2120    /// Returns error if:
2121    /// - Data is corrupted or not a valid usearch index
2122    /// - Loaded dimensions don't match config dimensions (when config.dimensions > 0)
2123    ///
2124    /// # Examples
2125    ///
2126    /// ```
2127    /// use go_brrr::embedding::{VectorIndex, IndexConfig, Metric};
2128    ///
2129    /// // Create and serialize an index
2130    /// let config = IndexConfig::new(4).with_metric(Metric::InnerProduct);
2131    /// let original = VectorIndex::with_config(config.clone())?;
2132    /// original.reserve(10)?;
2133    /// original.add(42, &[1.0, 0.0, 0.0, 0.0])?;
2134    ///
2135    /// let bytes = original.to_bytes()?;
2136    ///
2137    /// // Deserialize with dimension validation
2138    /// let restored = VectorIndex::from_bytes(&bytes, config)?;
2139    /// assert!(restored.contains(42));
2140    /// # Ok::<(), anyhow::Error>(())
2141    /// ```
2142    pub fn from_bytes(data: &[u8], config: IndexConfig) -> Result<Self> {
2143        let options = config.to_usearch_options();
2144        let index = Index::new(&options)
2145            .map_err(|e| anyhow::anyhow!("Failed to create usearch index: {}", e))?;
2146
2147        index
2148            .load_from_buffer(data)
2149            .map_err(|e| anyhow::anyhow!("Failed to deserialize index from buffer: {}", e))?;
2150
2151        // Validate dimensions if config specifies non-zero dimensions
2152        let loaded_dims = index.dimensions();
2153        if config.dimensions != 0 && loaded_dims != config.dimensions {
2154            bail!(
2155                "Dimension mismatch: config specifies {} but loaded index has {}",
2156                config.dimensions,
2157                loaded_dims
2158            );
2159        }
2160
2161        // Update config with actual dimensions if it was 0
2162        let final_config = if config.dimensions == 0 {
2163            IndexConfig {
2164                dimensions: loaded_dims,
2165                ..config
2166            }
2167        } else {
2168            config
2169        };
2170
2171        Ok(Self {
2172            inner: index,
2173            config: final_config,
2174        })
2175    }
2176
2177    /// Load an index from bytes without dimension validation.
2178    ///
2179    /// This is a convenience method that creates a config with dimensions=0,
2180    /// allowing the dimensions to be read from the serialized data.
2181    ///
2182    /// # Arguments
2183    ///
2184    /// * `data` - Serialized index data from `to_bytes()`
2185    ///
2186    /// # Examples
2187    ///
2188    /// ```
2189    /// use go_brrr::embedding::{VectorIndex, Metric};
2190    ///
2191    /// let index = VectorIndex::new(4, Metric::InnerProduct)?;
2192    /// index.reserve(5)?;
2193    /// index.add(1, &[1.0, 0.0, 0.0, 0.0])?;
2194    ///
2195    /// let bytes = index.to_bytes()?;
2196    ///
2197    /// // Restore without knowing the original dimensions
2198    /// let restored = VectorIndex::from_bytes_unchecked(&bytes)?;
2199    /// assert_eq!(restored.dimensions(), 4);
2200    /// # Ok::<(), anyhow::Error>(())
2201    /// ```
2202    pub fn from_bytes_unchecked(data: &[u8]) -> Result<Self> {
2203        // Use dimensions=0 to skip validation and read from buffer
2204        let config = IndexConfig {
2205            dimensions: 0,
2206            ..Default::default()
2207        };
2208        Self::from_bytes(data, config)
2209    }
2210
2211    /// Get hardware acceleration information.
2212    ///
2213    /// Returns a string describing SIMD capabilities being used.
2214    #[must_use]
2215    pub fn hardware_acceleration(&self) -> String {
2216        self.inner.hardware_acceleration()
2217    }
2218
2219    /// Change the expansion factor for search operations.
2220    ///
2221    /// Higher values improve recall at the cost of search speed.
2222    /// Typical range: 10-200.
2223    pub fn set_expansion_search(&self, expansion: usize) {
2224        self.inner.change_expansion_search(expansion);
2225    }
2226
2227    /// Get current expansion factor for search.
2228    #[must_use]
2229    pub fn expansion_search(&self) -> usize {
2230        self.inner.expansion_search()
2231    }
2232
2233    /// Change the expansion factor for add operations (index construction).
2234    ///
2235    /// Higher values create a better-connected graph but increase insertion time.
2236    /// Typical range: 64-512, default is typically 128.
2237    ///
2238    /// # Arguments
2239    ///
2240    /// * `expansion` - Expansion factor for vector addition
2241    ///
2242    /// # Example
2243    ///
2244    /// ```
2245    /// use go_brrr::embedding::{VectorIndex, Metric};
2246    ///
2247    /// let index = VectorIndex::new(4, Metric::Cosine)?;
2248    /// index.set_expansion_add(256);  // Higher quality, slower insert
2249    /// # Ok::<(), anyhow::Error>(())
2250    /// ```
2251    pub fn set_expansion_add(&self, expansion: usize) {
2252        self.inner.change_expansion_add(expansion);
2253    }
2254
2255    /// Get current expansion factor for add operations (index construction).
2256    ///
2257    /// Higher values increase index quality but slow down insertion.
2258    /// The default is typically 128.
2259    ///
2260    /// See also: [`set_expansion_add`], [`expansion_search`]
2261    #[must_use]
2262    pub fn expansion_add(&self) -> usize {
2263        self.inner.expansion_add()
2264    }
2265
2266    /// Validate that a vector has the correct dimensions.
2267    fn validate_vector_dimensions(&self, vector: &[f32]) -> Result<()> {
2268        let expected = self.dimensions();
2269        let actual = vector.len();
2270
2271        if actual != expected {
2272            bail!(
2273                "Vector dimension mismatch: expected {}, got {}",
2274                expected,
2275                actual
2276            );
2277        }
2278
2279        Ok(())
2280    }
2281}
2282
2283impl std::fmt::Debug for VectorIndex {
2284    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2285        f.debug_struct("VectorIndex")
2286            .field("dimensions", &self.dimensions())
2287            .field("metric", &self.config.metric)
2288            .field("quantization", &self.config.quantization)
2289            .field("size", &self.len())
2290            .field("capacity", &self.capacity())
2291            .field("memory_usage", &self.memory_usage())
2292            .finish()
2293    }
2294}
2295
2296// =============================================================================
2297// Utility Functions
2298// =============================================================================
2299
2300/// Convert distances to similarity scores for a specific metric.
2301///
2302/// Different metrics require different conversion formulas:
2303/// - `InnerProduct`/`Cosine`: distance = 1 - similarity, so score = 1 - distance
2304/// - `L2Squared`: distance can be any non-negative value, so score = 1 / (1 + distance)
2305///
2306/// All scores are clamped to [0, 1] range.
2307///
2308/// # Arguments
2309///
2310/// * `distances` - Raw distance values from vector search
2311/// * `metric` - The distance metric used for the search
2312///
2313/// # Returns
2314///
2315/// Similarity scores in [0, 1] range, where 1.0 = perfect match.
2316#[must_use]
2317pub fn distances_to_scores_for_metric(distances: &[f32], metric: Metric) -> Vec<f32> {
2318    const LANES: usize = 8;
2319
2320    let n = distances.len();
2321    let mut scores = vec![0.0_f32; n];
2322
2323    let simd_chunks = n / LANES;
2324    let remainder_start = simd_chunks * LANES;
2325
2326    match metric {
2327        Metric::InnerProduct | Metric::Cosine => {
2328            // IP/Cosine: distance = 1 - similarity, so score = 1 - distance
2329            // SIMD constants
2330            let one = f32x8::splat(1.0);
2331            let zero = f32x8::splat(0.0);
2332
2333            // Process 8 elements at a time with SIMD
2334            for i in 0..simd_chunks {
2335                let idx = i * LANES;
2336                let d = f32x8::from_slice(&distances[idx..]);
2337                // (1.0 - d).clamp(0.0, 1.0)
2338                let score = (one - d).simd_clamp(zero, one);
2339                score.copy_to_slice(&mut scores[idx..idx + LANES]);
2340            }
2341
2342            // Scalar fallback for remainder
2343            for i in remainder_start..n {
2344                scores[i] = (1.0 - distances[i]).clamp(0.0, 1.0);
2345            }
2346        }
2347        Metric::L2Squared => {
2348            // L2: distance can be [0, infinity), use inverse formula: score = 1 / (1 + distance)
2349            // This maps 0 -> 1.0, infinity -> 0.0
2350            // SIMD constants
2351            let one = f32x8::splat(1.0);
2352            let zero = f32x8::splat(0.0);
2353
2354            // Process 8 elements at a time with SIMD
2355            for i in 0..simd_chunks {
2356                let idx = i * LANES;
2357                let d = f32x8::from_slice(&distances[idx..]);
2358                // 1.0 / (1.0 + d.max(0.0))
2359                let score = one / (one + d.simd_max(zero));
2360                score.copy_to_slice(&mut scores[idx..idx + LANES]);
2361            }
2362
2363            // Scalar fallback for remainder
2364            for i in remainder_start..n {
2365                scores[i] = 1.0 / (1.0 + distances[i].max(0.0));
2366            }
2367        }
2368    }
2369
2370    scores
2371}
2372
2373/// Convert search results to similarity scores (assumes Inner Product metric).
2374///
2375/// For inner product metric on normalized vectors:
2376/// - Distance = 1 - similarity
2377/// - Score = 1 - distance = inner product value
2378///
2379/// Clamps scores to [0, 1] range.
2380///
2381/// # Deprecated
2382///
2383/// Prefer [`distances_to_scores_for_metric`] or [`VectorIndex::to_similarity_scores`]
2384/// which handle all metrics correctly.
2385#[must_use]
2386pub fn distances_to_scores(distances: &[f32]) -> Vec<f32> {
2387    distances_to_scores_for_metric(distances, Metric::InnerProduct)
2388}
2389
2390// =============================================================================
2391// SIMD Vector Normalization Core
2392// =============================================================================
2393
2394/// Compute squared L2 norm using AVX-512 (f32x16 = 512-bit).
2395///
2396/// Processes 16 floats per iteration with SIMD, falls back to scalar for remainder.
2397#[inline]
2398fn squared_norm_simd16(vector: &[f32]) -> f32 {
2399    const LANES: usize = 16;
2400    let chunks = vector.len() / LANES;
2401    let mut acc = F32x16::splat(0.0);
2402
2403    for i in 0..chunks {
2404        let v = F32x16::from_slice(&vector[i * LANES..]);
2405        acc += v * v;
2406    }
2407
2408    // Horizontal sum + scalar remainder
2409    let mut sum = acc.reduce_sum();
2410    for i in (chunks * LANES)..vector.len() {
2411        sum += vector[i] * vector[i];
2412    }
2413    sum
2414}
2415
2416/// Compute squared L2 norm using AVX2 (f32x8 = 256-bit).
2417///
2418/// Processes 8 floats per iteration with SIMD, falls back to scalar for remainder.
2419#[inline]
2420fn squared_norm_simd8(vector: &[f32]) -> f32 {
2421    const LANES: usize = 8;
2422    let chunks = vector.len() / LANES;
2423    let mut acc = f32x8::splat(0.0);
2424
2425    for i in 0..chunks {
2426        let v = f32x8::from_slice(&vector[i * LANES..]);
2427        acc += v * v;
2428    }
2429
2430    // Horizontal sum + scalar remainder
2431    let mut sum = acc.reduce_sum();
2432    for i in (chunks * LANES)..vector.len() {
2433        sum += vector[i] * vector[i];
2434    }
2435    sum
2436}
2437
2438/// Compute squared L2 norm with runtime SIMD dispatch.
2439///
2440/// Uses AVX-512 (f32x16) if available, otherwise AVX2 (f32x8).
2441/// Falls back to scalar for very short vectors.
2442#[inline]
2443fn squared_norm_simd(vector: &[f32]) -> f32 {
2444    // Short vectors: scalar is faster due to no SIMD setup overhead
2445    if vector.len() < 8 {
2446        return vector.iter().map(|x| x * x).sum();
2447    }
2448
2449    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2450    {
2451        // Runtime feature detection for AVX-512
2452        if vector.len() >= 16 && is_x86_feature_detected!("avx512f") {
2453            return squared_norm_simd16(vector);
2454        }
2455    }
2456
2457    // Default to AVX2 (f32x8)
2458    squared_norm_simd8(vector)
2459}
2460
2461/// Normalize a mutable slice in-place using AVX-512 (f32x16).
2462#[inline]
2463fn normalize_inplace_simd16(vector: &mut [f32], inv_norm: f32) {
2464    const LANES: usize = 16;
2465    let chunks = vector.len() / LANES;
2466    let inv = F32x16::splat(inv_norm);
2467
2468    for i in 0..chunks {
2469        let start = i * LANES;
2470        let v = F32x16::from_slice(&vector[start..]);
2471        let normalized = v * inv;
2472        normalized.copy_to_slice(&mut vector[start..start + LANES]);
2473    }
2474
2475    // Scalar remainder
2476    for i in (chunks * LANES)..vector.len() {
2477        vector[i] *= inv_norm;
2478    }
2479}
2480
2481/// Normalize a mutable slice in-place using AVX2 (f32x8).
2482#[inline]
2483fn normalize_inplace_simd8(vector: &mut [f32], inv_norm: f32) {
2484    const LANES: usize = 8;
2485    let chunks = vector.len() / LANES;
2486    let inv = f32x8::splat(inv_norm);
2487
2488    for i in 0..chunks {
2489        let start = i * LANES;
2490        let v = f32x8::from_slice(&vector[start..]);
2491        let normalized = v * inv;
2492        normalized.copy_to_slice(&mut vector[start..start + LANES]);
2493    }
2494
2495    // Scalar remainder
2496    for i in (chunks * LANES)..vector.len() {
2497        vector[i] *= inv_norm;
2498    }
2499}
2500
2501/// Normalize a mutable slice in-place with runtime SIMD dispatch.
2502///
2503/// Uses AVX-512 (f32x16) if available, otherwise AVX2 (f32x8).
2504/// Falls back to scalar for very short vectors.
2505#[inline]
2506fn normalize_inplace_simd(vector: &mut [f32], inv_norm: f32) {
2507    // Short vectors: scalar is faster
2508    if vector.len() < 8 {
2509        for x in vector.iter_mut() {
2510            *x *= inv_norm;
2511        }
2512        return;
2513    }
2514
2515    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2516    {
2517        if vector.len() >= 16 && is_x86_feature_detected!("avx512f") {
2518            normalize_inplace_simd16(vector, inv_norm);
2519            return;
2520        }
2521    }
2522
2523    normalize_inplace_simd8(vector, inv_norm);
2524}
2525
2526/// Normalize a vector to unit length (for cosine similarity).
2527///
2528/// Returns `None` if the vector is zero or near-zero (norm < epsilon),
2529/// as such vectors cannot be meaningfully normalized. This prevents NaN
2530/// propagation in downstream operations like cosine similarity.
2531///
2532/// # Arguments
2533///
2534/// * `vector` - The input vector to normalize
2535///
2536/// # Returns
2537///
2538/// * `Some(normalized_vector)` - The unit-length vector
2539/// * `None` - If the input vector has zero or near-zero magnitude
2540///
2541/// # Example
2542///
2543/// ```
2544/// use go_brrr::embedding::normalize_vector;
2545///
2546/// let v = vec![3.0, 4.0];
2547/// let normalized = normalize_vector(&v).expect("non-zero vector");
2548/// // normalized is [0.6, 0.8] with unit length
2549///
2550/// let zero_vec = vec![0.0, 0.0];
2551/// assert!(normalize_vector(&zero_vec).is_none());
2552/// ```
2553#[must_use]
2554pub fn normalize_vector(vector: &[f32]) -> Option<Vec<f32>> {
2555    // SIMD-accelerated squared norm computation
2556    let norm: f32 = squared_norm_simd(vector).sqrt();
2557
2558    // Zero or near-zero norm cannot be normalized without producing NaN
2559    if norm < f32::EPSILON {
2560        return None;
2561    }
2562
2563    // Copy to mutable buffer and normalize in-place with SIMD
2564    let mut result = vector.to_vec();
2565    normalize_inplace_simd(&mut result, 1.0 / norm);
2566    Some(result)
2567}
2568
2569/// Normalize a vector to unit length, returning the original if normalization fails.
2570///
2571/// This is a convenience wrapper around [`normalize_vector`] for cases where
2572/// you want to continue with the original vector rather than handle the error.
2573///
2574/// # Warning
2575///
2576/// Using an un-normalized zero vector in cosine similarity operations will
2577/// produce undefined or NaN results. Prefer [`normalize_vector`] and handle
2578/// the `None` case explicitly when correctness matters.
2579///
2580/// # Arguments
2581///
2582/// * `vector` - The input vector (consumed)
2583///
2584/// # Returns
2585///
2586/// The normalized vector, or the original if normalization failed.
2587#[must_use]
2588pub fn normalize_vector_or_original(mut vector: Vec<f32>) -> Vec<f32> {
2589    // SIMD-accelerated squared norm computation
2590    let norm: f32 = squared_norm_simd(&vector).sqrt();
2591
2592    if norm < f32::EPSILON {
2593        return vector;
2594    }
2595
2596    // Normalize in-place with SIMD (zero allocation for normalization)
2597    normalize_inplace_simd(&mut vector, 1.0 / norm);
2598    vector
2599}
2600
2601/// Check if a vector contains only finite values (no NaN or Infinity).
2602///
2603/// Use this to validate embeddings before indexing or searching to prevent
2604/// NaN propagation through vector operations.
2605///
2606/// # Arguments
2607///
2608/// * `vector` - The vector to validate
2609///
2610/// # Returns
2611///
2612/// `true` if all elements are finite, `false` if any element is NaN or Infinity.
2613///
2614/// # Example
2615///
2616/// ```
2617/// use go_brrr::embedding::is_valid_vector;
2618///
2619/// assert!(is_valid_vector(&[1.0, 2.0, 3.0]));
2620/// assert!(!is_valid_vector(&[1.0, f32::NAN, 3.0]));
2621/// assert!(!is_valid_vector(&[f32::INFINITY, 2.0, 3.0]));
2622/// ```
2623#[must_use]
2624pub fn is_valid_vector(vector: &[f32]) -> bool {
2625    const LANES: usize = 8;
2626
2627    let chunks = vector.chunks_exact(LANES);
2628    let remainder = chunks.remainder();
2629
2630    // SIMD check: process 8 floats at a time, early exit on first invalid
2631    for chunk in chunks {
2632        let v = f32x8::from_slice(chunk);
2633        // is_finite() returns mask where true = finite (not NaN and not Inf)
2634        if !v.is_finite().all() {
2635            return false;
2636        }
2637    }
2638
2639    // Scalar fallback for remainder (0-7 elements)
2640    remainder.iter().all(|x| x.is_finite())
2641}
2642
2643/// Check if a vector is approximately normalized (unit length).
2644#[must_use]
2645pub fn is_normalized(vector: &[f32], epsilon: f32) -> bool {
2646    const LANES: usize = 8;
2647
2648    let chunks = vector.chunks_exact(LANES);
2649    let remainder = chunks.remainder();
2650
2651    // SIMD sum of squares: accumulate 8 lanes in parallel
2652    let mut acc = f32x8::splat(0.0);
2653    for chunk in chunks {
2654        let v = f32x8::from_slice(chunk);
2655        acc += v * v;
2656    }
2657
2658    // Reduce SIMD accumulator to scalar
2659    let mut sum: f32 = acc.reduce_sum();
2660
2661    // Add remainder elements (0-7)
2662    for &x in remainder {
2663        sum += x * x;
2664    }
2665
2666    let norm = sum.sqrt();
2667    (norm - 1.0).abs() < epsilon
2668}
2669
2670// =============================================================================
2671// Query Embedding Cache
2672// =============================================================================
2673
2674/// Default cache capacity for query embeddings.
2675const QUERY_CACHE_DEFAULT_CAPACITY: usize = 100;
2676
2677/// LRU cache for query embeddings to avoid recomputing same queries.
2678/// Stores up to 100 recent query -> embedding mappings.
2679///
2680/// Thread-safe via Mutex. Cache hits are O(1), misses are O(1) for insertion.
2681/// The cache uses string keys (query text) and stores the full embedding vector.
2682///
2683/// # Memory Usage
2684///
2685/// For 1024-dimensional embeddings (e.g., BGE-large):
2686/// - Each embedding: 1024 * 4 bytes = 4KB
2687/// - 100 embeddings: ~400KB + key overhead
2688///
2689/// This is negligible compared to the index itself.
2690static QUERY_EMBEDDING_CACHE: Lazy<Mutex<LruCache<String, Vec<f32>>>> = Lazy::new(|| {
2691    // SAFETY: NonZeroUsize::new returns Some for non-zero values
2692    let capacity = NonZeroUsize::new(QUERY_CACHE_DEFAULT_CAPACITY)
2693        .expect("QUERY_CACHE_DEFAULT_CAPACITY must be non-zero");
2694    Mutex::new(LruCache::new(capacity))
2695});
2696
2697/// Get cached query embedding or compute and cache it.
2698///
2699/// This function provides a cache-through pattern for query embeddings:
2700/// 1. Check if the query is already cached
2701/// 2. If cached, return the cached embedding (avoiding expensive TEI call)
2702/// 3. If not cached, compute the embedding using the provided function
2703/// 4. Store the result in cache for future queries
2704///
2705/// # Arguments
2706///
2707/// * `query` - The query text to get/compute embedding for
2708/// * `compute_fn` - Fallback function to compute embedding if not cached
2709///
2710/// # Returns
2711///
2712/// The embedding vector for the query (from cache or freshly computed).
2713///
2714/// # Errors
2715///
2716/// Returns an error if the compute function fails.
2717///
2718/// # Example
2719///
2720/// ```no_run
2721/// use go_brrr::embedding::get_cached_query_embedding;
2722///
2723/// # async fn example() -> anyhow::Result<()> {
2724/// let embedding = get_cached_query_embedding("authentication logic", |q| {
2725///     // This would normally call TEI server
2726///     Ok(vec![0.1f32; 768])
2727/// })?;
2728/// # Ok(())
2729/// # }
2730/// ```
2731pub fn get_cached_query_embedding<F>(query: &str, compute_fn: F) -> Result<Vec<f32>>
2732where
2733    F: FnOnce(&str) -> Result<Vec<f32>>,
2734{
2735    let cache_key = query.to_string();
2736
2737    // Try cache first (fast path)
2738    {
2739        let mut cache = QUERY_EMBEDDING_CACHE
2740            .lock()
2741            .map_err(|e| anyhow::anyhow!("Failed to acquire cache lock: {}", e))?;
2742        if let Some(embedding) = cache.get(&cache_key) {
2743            tracing::debug!(query = %query, "Query embedding cache hit");
2744            return Ok(embedding.clone());
2745        }
2746    }
2747
2748    tracing::debug!(query = %query, "Query embedding cache miss, computing...");
2749
2750    // Compute embedding (slow path - calls TEI server)
2751    let embedding = compute_fn(query)?;
2752
2753    // Cache result for future queries
2754    {
2755        let mut cache = QUERY_EMBEDDING_CACHE
2756            .lock()
2757            .map_err(|e| anyhow::anyhow!("Failed to acquire cache lock: {}", e))?;
2758        cache.put(cache_key, embedding.clone());
2759    }
2760
2761    Ok(embedding)
2762}
2763
2764/// Async version of `get_cached_query_embedding` for use with async embedding backends.
2765///
2766/// This provides the same cache-through pattern but accepts an async compute function,
2767/// which is necessary when the embedding computation involves async I/O (e.g., gRPC calls).
2768///
2769/// # Arguments
2770///
2771/// * `query` - The query text to get/compute embedding for
2772/// * `compute_fn` - Async fallback function to compute embedding if not cached
2773///
2774/// # Returns
2775///
2776/// The embedding vector for the query (from cache or freshly computed).
2777///
2778/// # Errors
2779///
2780/// Returns an error if the compute function fails.
2781pub async fn get_cached_query_embedding_async<F, Fut>(
2782    query: &str,
2783    compute_fn: F,
2784) -> Result<Vec<f32>>
2785where
2786    F: FnOnce(String) -> Fut,
2787    Fut: std::future::Future<Output = Result<Vec<f32>>>,
2788{
2789    let cache_key = query.to_string();
2790
2791    // Try cache first (fast path)
2792    {
2793        let mut cache = QUERY_EMBEDDING_CACHE
2794            .lock()
2795            .map_err(|e| anyhow::anyhow!("Failed to acquire cache lock: {}", e))?;
2796        if let Some(embedding) = cache.get(&cache_key) {
2797            tracing::debug!(query = %query, "Query embedding cache hit (async)");
2798            return Ok(embedding.clone());
2799        }
2800    }
2801
2802    tracing::debug!(query = %query, "Query embedding cache miss (async), computing...");
2803
2804    // Compute embedding (slow path - async call to TEI server)
2805    let embedding = compute_fn(cache_key.clone()).await?;
2806
2807    // Cache result for future queries
2808    {
2809        let mut cache = QUERY_EMBEDDING_CACHE
2810            .lock()
2811            .map_err(|e| anyhow::anyhow!("Failed to acquire cache lock: {}", e))?;
2812        cache.put(cache_key, embedding.clone());
2813    }
2814
2815    Ok(embedding)
2816}
2817
2818/// Clear all entries from the query embedding cache.
2819///
2820/// Use this when:
2821/// - The embedding model changes
2822/// - Memory pressure requires cache eviction
2823/// - Testing requires a clean cache state
2824///
2825/// # Errors
2826///
2827/// Returns an error if the cache lock is poisoned.
2828pub fn clear_query_cache() -> Result<()> {
2829    let mut cache = QUERY_EMBEDDING_CACHE
2830        .lock()
2831        .map_err(|e| anyhow::anyhow!("Failed to acquire cache lock: {}", e))?;
2832    cache.clear();
2833    tracing::debug!("Query embedding cache cleared");
2834    Ok(())
2835}
2836
2837/// Get statistics about the query embedding cache.
2838///
2839/// Returns a tuple of (current_size, max_capacity).
2840///
2841/// # Example
2842///
2843/// ```
2844/// use go_brrr::embedding::query_cache_stats;
2845///
2846/// let (size, capacity) = query_cache_stats().unwrap();
2847/// println!("Cache: {}/{} entries", size, capacity);
2848/// ```
2849///
2850/// # Errors
2851///
2852/// Returns an error if the cache lock is poisoned.
2853pub fn query_cache_stats() -> Result<(usize, usize)> {
2854    let cache = QUERY_EMBEDDING_CACHE
2855        .lock()
2856        .map_err(|e| anyhow::anyhow!("Failed to acquire cache lock: {}", e))?;
2857    Ok((cache.len(), cache.cap().get()))
2858}
2859
2860/// Check if a query is in the cache without affecting LRU order.
2861///
2862/// Useful for diagnostics and testing.
2863///
2864/// # Errors
2865///
2866/// Returns an error if the cache lock is poisoned.
2867pub fn query_in_cache(query: &str) -> Result<bool> {
2868    let cache = QUERY_EMBEDDING_CACHE
2869        .lock()
2870        .map_err(|e| anyhow::anyhow!("Failed to acquire cache lock: {}", e))?;
2871    Ok(cache.contains(query))
2872}
2873
2874// =============================================================================
2875// Tests
2876// =============================================================================
2877
2878#[cfg(test)]
2879mod tests {
2880    use super::*;
2881
2882    fn create_test_index() -> VectorIndex {
2883        VectorIndex::new(4, Metric::InnerProduct).expect("Failed to create test index")
2884    }
2885
2886    #[test]
2887    fn test_create_index() {
2888        let index = create_test_index();
2889        assert_eq!(index.dimensions(), 4);
2890        assert_eq!(index.len(), 0);
2891        assert!(index.is_empty());
2892    }
2893
2894    #[test]
2895    fn test_add_and_search() {
2896        let index = create_test_index();
2897        index.reserve(10).unwrap();
2898
2899        // Add some vectors
2900        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
2901        index.add(1, &[0.0, 1.0, 0.0, 0.0]).unwrap();
2902        index.add(2, &[0.0, 0.0, 1.0, 0.0]).unwrap();
2903
2904        assert_eq!(index.len(), 3);
2905        assert!(!index.is_empty());
2906
2907        // Search for exact match
2908        let results = index.search(&[1.0, 0.0, 0.0, 0.0], 3).unwrap();
2909        assert!(!results.is_empty());
2910        assert_eq!(results[0].0, 0); // Should find key 0 first
2911    }
2912
2913    #[test]
2914    fn test_add_batch() {
2915        let index = create_test_index();
2916        index.reserve(3).unwrap();
2917
2918        let keys = vec![0, 1, 2];
2919        let vectors = vec![
2920            vec![1.0, 0.0, 0.0, 0.0],
2921            vec![0.0, 1.0, 0.0, 0.0],
2922            vec![0.0, 0.0, 1.0, 0.0],
2923        ];
2924
2925        index.add_batch(&keys, &vectors).unwrap();
2926        assert_eq!(index.len(), 3);
2927    }
2928
2929    #[test]
2930    fn test_add_batch_flat() {
2931        let index = create_test_index();
2932        index.reserve(2).unwrap();
2933
2934        let keys = vec![0, 1];
2935        let vectors_flat = vec![
2936            1.0, 0.0, 0.0, 0.0, // Vector 0
2937            0.0, 1.0, 0.0, 0.0, // Vector 1
2938        ];
2939
2940        index.add_batch_flat(&keys, &vectors_flat).unwrap();
2941        assert_eq!(index.len(), 2);
2942    }
2943
2944    #[test]
2945    fn test_dimension_validation() {
2946        let index = create_test_index();
2947        index.reserve(10).unwrap();
2948
2949        // Wrong dimensions should fail
2950        let result = index.add(0, &[1.0, 0.0, 0.0]); // Only 3 dimensions
2951        assert!(result.is_err());
2952
2953        let result = index.add(0, &[1.0, 0.0, 0.0, 0.0, 0.0]); // 5 dimensions
2954        assert!(result.is_err());
2955
2956        // Correct dimensions should succeed
2957        let result = index.add(0, &[1.0, 0.0, 0.0, 0.0]);
2958        assert!(result.is_ok());
2959    }
2960
2961    #[test]
2962    fn test_contains() {
2963        let index = create_test_index();
2964        index.reserve(10).unwrap();
2965
2966        assert!(!index.contains(42));
2967
2968        index.add(42, &[1.0, 0.0, 0.0, 0.0]).unwrap();
2969        assert!(index.contains(42));
2970        assert!(!index.contains(43));
2971    }
2972
2973    #[test]
2974    fn test_remove() {
2975        let index = create_test_index();
2976        index.reserve(10).unwrap();
2977
2978        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
2979        assert!(index.contains(0));
2980
2981        let removed = index.remove(0).unwrap();
2982        assert_eq!(removed, 1);
2983    }
2984
2985    #[test]
2986    fn test_filtered_search() {
2987        let index = create_test_index();
2988        index.reserve(20).unwrap();
2989
2990        for i in 0..10 {
2991            index.add(i, &[i as f32 * 0.1, 0.0, 0.0, 0.0]).unwrap();
2992        }
2993
2994        // Only search even keys
2995        let results = index
2996            .search_filtered(&[0.5, 0.0, 0.0, 0.0], 10, |key| key % 2 == 0)
2997            .unwrap();
2998
2999        for (key, _) in results {
3000            assert_eq!(key % 2, 0);
3001        }
3002    }
3003
3004    #[test]
3005    fn test_metric_parsing() {
3006        assert_eq!("ip".parse::<Metric>().unwrap(), Metric::InnerProduct);
3007        assert_eq!("cosine".parse::<Metric>().unwrap(), Metric::Cosine);
3008        assert_eq!("l2".parse::<Metric>().unwrap(), Metric::L2Squared);
3009        assert!("invalid".parse::<Metric>().is_err());
3010    }
3011
3012    #[test]
3013    fn test_normalize_vector() {
3014        // Normal case: non-zero vector normalizes to unit length
3015        let v = vec![3.0, 4.0];
3016        let normalized = normalize_vector(&v).expect("non-zero vector should normalize");
3017
3018        let norm: f32 = normalized.iter().map(|x| x * x).sum::<f32>().sqrt();
3019        assert!((norm - 1.0).abs() < 1e-6);
3020    }
3021
3022    #[test]
3023    fn test_normalize_vector_zero() {
3024        // Zero vector returns None to prevent NaN propagation
3025        let zero_vec = vec![0.0, 0.0, 0.0];
3026        assert!(normalize_vector(&zero_vec).is_none());
3027
3028        // Near-zero vector also returns None
3029        let near_zero = vec![f32::EPSILON / 2.0, 0.0];
3030        assert!(normalize_vector(&near_zero).is_none());
3031    }
3032
3033    #[test]
3034    fn test_normalize_vector_or_original() {
3035        // Normal case: returns normalized vector
3036        let v = vec![3.0, 4.0];
3037        let normalized = normalize_vector_or_original(v);
3038        let norm: f32 = normalized.iter().map(|x| x * x).sum::<f32>().sqrt();
3039        assert!((norm - 1.0).abs() < 1e-6);
3040
3041        // Zero vector: returns original unchanged
3042        let zero_vec = vec![0.0, 0.0, 0.0];
3043        let result = normalize_vector_or_original(zero_vec.clone());
3044        assert_eq!(result, zero_vec);
3045    }
3046
3047    #[test]
3048    fn test_is_valid_vector() {
3049        // Valid vectors
3050        assert!(is_valid_vector(&[1.0, 2.0, 3.0]));
3051        assert!(is_valid_vector(&[0.0, 0.0, 0.0]));
3052        assert!(is_valid_vector(&[-1.0, f32::MAX, f32::MIN]));
3053        assert!(is_valid_vector(&[])); // Empty vector is valid
3054
3055        // Invalid vectors with NaN
3056        assert!(!is_valid_vector(&[1.0, f32::NAN, 3.0]));
3057        assert!(!is_valid_vector(&[f32::NAN]));
3058
3059        // Invalid vectors with Infinity
3060        assert!(!is_valid_vector(&[f32::INFINITY, 2.0, 3.0]));
3061        assert!(!is_valid_vector(&[1.0, f32::NEG_INFINITY]));
3062    }
3063
3064    #[test]
3065    fn test_is_normalized() {
3066        let normalized = vec![0.6, 0.8];
3067        let not_normalized = vec![3.0, 4.0];
3068
3069        assert!(is_normalized(&normalized, 1e-6));
3070        assert!(!is_normalized(&not_normalized, 1e-6));
3071    }
3072
3073    #[test]
3074    fn test_distances_to_scores_inner_product() {
3075        // Inner product: score = 1 - distance, clamped to [0, 1]
3076        let distances = vec![0.0, 0.5, 1.0, 1.5];
3077        let scores = distances_to_scores_for_metric(&distances, Metric::InnerProduct);
3078
3079        assert!((scores[0] - 1.0).abs() < 1e-6); // 0.0 -> 1.0
3080        assert!((scores[1] - 0.5).abs() < 1e-6); // 0.5 -> 0.5
3081        assert!((scores[2] - 0.0).abs() < 1e-6); // 1.0 -> 0.0
3082        assert!((scores[3] - 0.0).abs() < 1e-6); // 1.5 -> clamped to 0.0
3083    }
3084
3085    #[test]
3086    fn test_distances_to_scores_cosine() {
3087        // Cosine: same formula as inner product
3088        let distances = vec![0.0, 0.25, 0.75, 1.0];
3089        let scores = distances_to_scores_for_metric(&distances, Metric::Cosine);
3090
3091        assert!((scores[0] - 1.0).abs() < 1e-6);
3092        assert!((scores[1] - 0.75).abs() < 1e-6);
3093        assert!((scores[2] - 0.25).abs() < 1e-6);
3094        assert!((scores[3] - 0.0).abs() < 1e-6);
3095    }
3096
3097    #[test]
3098    fn test_distances_to_scores_l2_squared() {
3099        // L2: score = 1 / (1 + distance)
3100        // 0.0 -> 1.0, 1.0 -> 0.5, 4.0 -> 0.2, 9.0 -> 0.1
3101        let distances = vec![0.0, 1.0, 4.0, 9.0];
3102        let scores = distances_to_scores_for_metric(&distances, Metric::L2Squared);
3103
3104        assert!((scores[0] - 1.0).abs() < 1e-6);       // 1 / (1 + 0) = 1.0
3105        assert!((scores[1] - 0.5).abs() < 1e-6);       // 1 / (1 + 1) = 0.5
3106        assert!((scores[2] - 0.2).abs() < 1e-6);       // 1 / (1 + 4) = 0.2
3107        assert!((scores[3] - 0.1).abs() < 1e-6);       // 1 / (1 + 9) = 0.1
3108    }
3109
3110    #[test]
3111    fn test_distances_to_scores_l2_large_distances() {
3112        // L2 can handle arbitrarily large distances without clamping to 0
3113        let distances = vec![99.0, 999.0, 9999.0];
3114        let scores = distances_to_scores_for_metric(&distances, Metric::L2Squared);
3115
3116        // All should be small but positive
3117        assert!(scores[0] > 0.0 && scores[0] < 0.02);  // 1/100 = 0.01
3118        assert!(scores[1] > 0.0 && scores[1] < 0.002); // 1/1000 = 0.001
3119        assert!(scores[2] > 0.0 && scores[2] < 0.0002);// 1/10000 = 0.0001
3120    }
3121
3122    #[test]
3123    fn test_distances_to_scores_backward_compat() {
3124        // Original function should still work for inner product
3125        let distances = vec![0.0, 0.5, 1.0, 1.5];
3126        let scores = distances_to_scores(&distances);
3127
3128        assert!((scores[0] - 1.0).abs() < 1e-6);
3129        assert!((scores[1] - 0.5).abs() < 1e-6);
3130        assert!((scores[2] - 0.0).abs() < 1e-6);
3131        assert!((scores[3] - 0.0).abs() < 1e-6);
3132    }
3133
3134    #[test]
3135    fn test_distances_to_scores_simd_path() {
3136        // Test with 16 elements to exercise SIMD path (8 lanes) plus remainder
3137        // InnerProduct: score = (1.0 - d).clamp(0.0, 1.0)
3138        let distances_ip: Vec<f32> = (0..16).map(|i| i as f32 * 0.1).collect();
3139        let scores_ip = distances_to_scores_for_metric(&distances_ip, Metric::InnerProduct);
3140
3141        assert_eq!(scores_ip.len(), 16);
3142        for (i, &score) in scores_ip.iter().enumerate() {
3143            let expected = (1.0 - distances_ip[i]).clamp(0.0, 1.0);
3144            assert!(
3145                (score - expected).abs() < 1e-6,
3146                "IP mismatch at {i}: got {score}, expected {expected}"
3147            );
3148        }
3149
3150        // L2Squared: score = 1.0 / (1.0 + d.max(0.0))
3151        let distances_l2: Vec<f32> = (0..16).map(|i| i as f32).collect();
3152        let scores_l2 = distances_to_scores_for_metric(&distances_l2, Metric::L2Squared);
3153
3154        assert_eq!(scores_l2.len(), 16);
3155        for (i, &score) in scores_l2.iter().enumerate() {
3156            let expected = 1.0 / (1.0 + distances_l2[i].max(0.0));
3157            assert!(
3158                (score - expected).abs() < 1e-6,
3159                "L2 mismatch at {i}: got {score}, expected {expected}"
3160            );
3161        }
3162
3163        // Test with 11 elements (8 SIMD + 3 remainder)
3164        let distances_odd: Vec<f32> = (0..11).map(|i| i as f32 * 0.05).collect();
3165        let scores_odd = distances_to_scores_for_metric(&distances_odd, Metric::Cosine);
3166
3167        assert_eq!(scores_odd.len(), 11);
3168        for (i, &score) in scores_odd.iter().enumerate() {
3169            let expected = (1.0 - distances_odd[i]).clamp(0.0, 1.0);
3170            assert!(
3171                (score - expected).abs() < 1e-6,
3172                "Odd mismatch at {i}: got {score}, expected {expected}"
3173            );
3174        }
3175    }
3176
3177    #[test]
3178    fn test_vector_index_to_similarity_scores() {
3179        // Test the VectorIndex method uses correct metric
3180        let index_ip = VectorIndex::new(4, Metric::InnerProduct).unwrap();
3181        let index_l2 = VectorIndex::new(4, Metric::L2Squared).unwrap();
3182
3183        let distances = vec![0.5, 4.0];
3184
3185        // IP: 0.5 -> 0.5, 4.0 -> clamped to 0.0
3186        let scores_ip = index_ip.to_similarity_scores(&distances);
3187        assert!((scores_ip[0] - 0.5).abs() < 1e-6);
3188        assert!((scores_ip[1] - 0.0).abs() < 1e-6);
3189
3190        // L2: 0.5 -> 1/1.5 = 0.667, 4.0 -> 1/5 = 0.2
3191        let scores_l2 = index_l2.to_similarity_scores(&distances);
3192        assert!((scores_l2[0] - (1.0 / 1.5)).abs() < 1e-6);
3193        assert!((scores_l2[1] - 0.2).abs() < 1e-6);
3194    }
3195
3196    #[test]
3197    fn test_metric_getter() {
3198        let index = VectorIndex::new(4, Metric::Cosine).unwrap();
3199        assert_eq!(index.metric(), Metric::Cosine);
3200
3201        let index2 = VectorIndex::new(4, Metric::L2Squared).unwrap();
3202        assert_eq!(index2.metric(), Metric::L2Squared);
3203    }
3204
3205    #[test]
3206    fn test_config_builder() {
3207        let config = IndexConfig::new(768)
3208            .with_metric(Metric::Cosine)
3209            .with_quantization(Quantization::F16)
3210            .with_connectivity(32)
3211            .with_expansion_add(128)
3212            .with_expansion_search(64);
3213
3214        assert_eq!(config.dimensions, 768);
3215        assert_eq!(config.metric, Metric::Cosine);
3216        assert_eq!(config.quantization, Quantization::F16);
3217        assert_eq!(config.connectivity, 32);
3218        assert_eq!(config.expansion_add, 128);
3219        assert_eq!(config.expansion_search, 64);
3220    }
3221
3222    #[test]
3223    fn test_expansion_add_getter_setter() {
3224        let config = IndexConfig::new(4);
3225        let index = VectorIndex::with_config(config).unwrap();
3226
3227        // Check default (usearch default is typically > 0)
3228        let default = index.expansion_add();
3229        assert!(default > 0, "Default expansion_add should be positive");
3230
3231        // Set and verify the new value
3232        index.set_expansion_add(256);
3233        assert_eq!(index.expansion_add(), 256);
3234    }
3235
3236    #[test]
3237    fn test_expansion_config_applied() {
3238        // Test that expansion values from config are applied correctly
3239        let config = IndexConfig::new(4)
3240            .with_expansion_add(512)
3241            .with_expansion_search(128);
3242
3243        let index = VectorIndex::with_config(config).unwrap();
3244
3245        // Note: usearch may adjust these values, but they should be >= specified
3246        assert!(
3247            index.expansion_add() >= 512 || index.expansion_add() > 0,
3248            "expansion_add should be set from config"
3249        );
3250        assert!(
3251            index.expansion_search() >= 128 || index.expansion_search() > 0,
3252            "expansion_search should be set from config"
3253        );
3254    }
3255
3256    #[test]
3257    fn test_expansion_search_getter_setter() {
3258        let index = create_test_index();
3259
3260        // Check default
3261        let default = index.expansion_search();
3262        assert!(default > 0, "Default expansion_search should be positive");
3263
3264        // Set and verify the new value
3265        index.set_expansion_search(128);
3266        assert_eq!(index.expansion_search(), 128);
3267    }
3268
3269    #[test]
3270    fn test_estimate_memory() {
3271        let config = IndexConfig::new(768).with_quantization(Quantization::F32);
3272        let estimate = config.estimate_memory(10000);
3273
3274        // Should be roughly 10000 * 768 * 4 * 2 = ~61MB
3275        assert!(estimate > 50_000_000);
3276        assert!(estimate < 100_000_000);
3277    }
3278
3279    #[test]
3280    fn test_empty_search() {
3281        let index = create_test_index();
3282        let results = index.search(&[1.0, 0.0, 0.0, 0.0], 10).unwrap();
3283        assert!(results.is_empty());
3284    }
3285
3286    #[test]
3287    fn test_search_k_larger_than_index() {
3288        let index = create_test_index();
3289        index.reserve(10).unwrap();
3290        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3291        index.add(1, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3292
3293        // Ask for more results than exist
3294        let results = index.search(&[1.0, 0.0, 0.0, 0.0], 100).unwrap();
3295        assert_eq!(results.len(), 2); // Should only return what exists
3296    }
3297
3298    #[test]
3299    fn test_debug_format() {
3300        let index = create_test_index();
3301        let debug_str = format!("{:?}", index);
3302
3303        assert!(debug_str.contains("VectorIndex"));
3304        assert!(debug_str.contains("dimensions: 4"));
3305    }
3306
3307    #[test]
3308    fn test_get_vector() {
3309        let config = IndexConfig {
3310            dimensions: 4,
3311            ..Default::default()
3312        };
3313        let index = VectorIndex::with_config(config).unwrap();
3314        index.reserve(10).unwrap();
3315
3316        let vector = vec![1.0, 0.0, 0.0, 0.0];
3317        index.add(42, &vector).unwrap();
3318
3319        // Retrieve existing vector
3320        let retrieved = index.get(42);
3321        assert!(retrieved.is_some());
3322        assert_eq!(retrieved.unwrap(), vector);
3323
3324        // Non-existent key should return None
3325        assert!(index.get(999).is_none());
3326    }
3327
3328    #[test]
3329    fn test_get_batch_vectors() {
3330        let index = create_test_index();
3331        index.reserve(10).unwrap();
3332
3333        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3334        index.add(3, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3335        index.add(5, &[0.0, 0.0, 1.0, 0.0]).unwrap();
3336
3337        // Retrieve batch with some existing and some non-existing keys
3338        let results = index.get_batch(&[1, 2, 3, 4, 5]);
3339
3340        assert!(results[0].is_some()); // Key 1 exists
3341        assert!(results[1].is_none()); // Key 2 doesn't exist
3342        assert!(results[2].is_some()); // Key 3 exists
3343        assert!(results[3].is_none()); // Key 4 doesn't exist
3344        assert!(results[4].is_some()); // Key 5 exists
3345
3346        // Verify vector contents
3347        assert_eq!(results[0].as_ref().unwrap(), &vec![1.0, 0.0, 0.0, 0.0]);
3348        assert_eq!(results[2].as_ref().unwrap(), &vec![0.0, 1.0, 0.0, 0.0]);
3349        assert_eq!(results[4].as_ref().unwrap(), &vec![0.0, 0.0, 1.0, 0.0]);
3350    }
3351
3352    #[test]
3353    fn test_count_key_standard_mode() {
3354        // Standard mode (non-multi): count returns 0 or 1
3355        let index = create_test_index();
3356        index.reserve(10).unwrap();
3357
3358        // Non-existent key
3359        assert_eq!(index.count_key(42), 0);
3360
3361        // Add a vector
3362        index.add(42, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3363        assert_eq!(index.count_key(42), 1);
3364
3365        // Still 0 for non-existent key
3366        assert_eq!(index.count_key(999), 0);
3367    }
3368
3369    #[test]
3370    fn test_count_key_multi_index() {
3371        // Multi-index mode: allows multiple vectors per key
3372        let config = IndexConfig::new(4)
3373            .with_metric(Metric::InnerProduct)
3374            .with_multi(true);
3375        let index = VectorIndex::with_config(config).unwrap();
3376        index.reserve(10).unwrap();
3377
3378        let key = 42u64;
3379
3380        // Add multiple vectors with same key
3381        index.add(key, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3382        index.add(key, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3383        index.add(key, &[0.0, 0.0, 1.0, 0.0]).unwrap();
3384
3385        // Should count all vectors under this key
3386        assert_eq!(index.count_key(key), 3);
3387        assert_eq!(index.len(), 3);
3388
3389        // Non-existent key still returns 0
3390        assert_eq!(index.count_key(999), 0);
3391    }
3392
3393    #[test]
3394    fn test_with_multi_builder() {
3395        // Test the with_multi builder method
3396        let config = IndexConfig::new(768)
3397            .with_metric(Metric::InnerProduct)
3398            .with_multi(true);
3399
3400        assert!(config.multi);
3401
3402        let config_no_multi = IndexConfig::new(768).with_multi(false);
3403        assert!(!config_no_multi.multi);
3404    }
3405
3406    #[test]
3407    fn test_rename() {
3408        let config = IndexConfig {
3409            dimensions: 4,
3410            ..Default::default()
3411        };
3412        let index = VectorIndex::with_config(config).unwrap();
3413        index.reserve(10).unwrap();
3414
3415        let vector = vec![1.0, 0.0, 0.0, 0.0];
3416        index.add(42, &vector).unwrap();
3417
3418        assert!(index.contains(42));
3419        assert!(!index.contains(100));
3420
3421        let renamed = index.rename(42, 100).unwrap();
3422        assert_eq!(renamed, 1);
3423
3424        assert!(!index.contains(42));
3425        assert!(index.contains(100));
3426
3427        // Verify vector is still retrievable
3428        let retrieved = index.get(100).unwrap();
3429        assert_eq!(retrieved, vector);
3430    }
3431
3432    #[test]
3433    fn test_rename_fails_if_target_exists() {
3434        let config = IndexConfig {
3435            dimensions: 4,
3436            ..Default::default()
3437        };
3438        let index = VectorIndex::with_config(config).unwrap();
3439        index.reserve(10).unwrap();
3440
3441        index.add(42, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3442        index.add(100, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3443
3444        // Should fail because target key exists
3445        let result = index.rename(42, 100);
3446        assert!(result.is_err());
3447
3448        // Both keys should still exist
3449        assert!(index.contains(42));
3450        assert!(index.contains(100));
3451    }
3452
3453    #[test]
3454    fn test_rename_overwrite() {
3455        let config = IndexConfig {
3456            dimensions: 4,
3457            ..Default::default()
3458        };
3459        let index = VectorIndex::with_config(config).unwrap();
3460        index.reserve(10).unwrap();
3461
3462        let vector1 = vec![1.0, 0.0, 0.0, 0.0];
3463        let vector2 = vec![0.0, 1.0, 0.0, 0.0];
3464        index.add(42, &vector1).unwrap();
3465        index.add(100, &vector2).unwrap();
3466
3467        // Should succeed because we're overwriting
3468        let renamed = index.rename_overwrite(42, 100).unwrap();
3469        assert_eq!(renamed, 1);
3470
3471        assert!(!index.contains(42));
3472        assert!(index.contains(100));
3473
3474        // Verify the renamed vector is there (not the original one at 100)
3475        let retrieved = index.get(100).unwrap();
3476        assert_eq!(retrieved, vector1);
3477    }
3478
3479    #[test]
3480    fn test_rename_nonexistent_key() {
3481        let config = IndexConfig {
3482            dimensions: 4,
3483            ..Default::default()
3484        };
3485        let index = VectorIndex::with_config(config).unwrap();
3486        index.reserve(10).unwrap();
3487
3488        // Renaming non-existent key should return 0
3489        let renamed = index.rename(42, 100).unwrap();
3490        assert_eq!(renamed, 0);
3491    }
3492
3493    #[test]
3494    fn test_restore_reads_dimensions_from_file() {
3495        // Create and save an index with specific dimensions
3496        let index = create_test_index(); // 4 dimensions
3497        index.reserve(10).unwrap();
3498        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3499        index.add(1, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3500
3501        let temp_dir = std::env::temp_dir();
3502        let path = temp_dir.join("test_restore_index.usearch");
3503
3504        index.save(&path).unwrap();
3505
3506        // Restore without specifying dimensions - should read from file
3507        let restored = VectorIndex::restore(&path).unwrap();
3508
3509        assert_eq!(restored.dimensions(), 4);
3510        assert_eq!(restored.len(), 2);
3511        assert!(restored.contains(0));
3512        assert!(restored.contains(1));
3513
3514        // Cleanup
3515        std::fs::remove_file(&path).ok();
3516    }
3517
3518    #[test]
3519    fn test_load_validated_success() {
3520        // Create and save an index
3521        let index = create_test_index(); // 4 dimensions
3522        index.reserve(5).unwrap();
3523        index.add(42, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3524
3525        let temp_dir = std::env::temp_dir();
3526        let path = temp_dir.join("test_load_validated_success.usearch");
3527
3528        index.save(&path).unwrap();
3529
3530        // Load with matching config - should succeed
3531        let config = IndexConfig::new(4).with_metric(Metric::Cosine);
3532        let loaded = VectorIndex::load_validated(&path, config).unwrap();
3533
3534        assert_eq!(loaded.dimensions(), 4);
3535        assert_eq!(loaded.config().metric, Metric::Cosine);
3536        assert!(loaded.contains(42));
3537
3538        // Cleanup
3539        std::fs::remove_file(&path).ok();
3540    }
3541
3542    #[test]
3543    fn test_load_validated_dimension_mismatch() {
3544        // Create and save a 4-dimension index
3545        let index = create_test_index();
3546        index.reserve(5).unwrap();
3547        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3548
3549        let temp_dir = std::env::temp_dir();
3550        let path = temp_dir.join("test_load_validated_mismatch.usearch");
3551
3552        index.save(&path).unwrap();
3553
3554        // Try to load with wrong dimensions - should fail BEFORE corrupting state
3555        let wrong_config = IndexConfig::new(768); // Wrong dimensions!
3556        let result = VectorIndex::load_validated(&path, wrong_config);
3557
3558        assert!(result.is_err());
3559        let err_msg = result.unwrap_err().to_string();
3560        assert!(err_msg.contains("Dimension mismatch"));
3561        assert!(err_msg.contains("expected 768"));
3562        assert!(err_msg.contains("loaded index has 4"));
3563
3564        // Cleanup
3565        std::fs::remove_file(&path).ok();
3566    }
3567
3568    #[test]
3569    fn test_view_restore_reads_dimensions() {
3570        // Create and save an index
3571        let index = create_test_index(); // 4 dimensions
3572        index.reserve(5).unwrap();
3573        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3574
3575        let temp_dir = std::env::temp_dir();
3576        let path = temp_dir.join("test_view_restore.usearch");
3577
3578        index.save(&path).unwrap();
3579
3580        // View without specifying dimensions - should read from file
3581        let viewed = VectorIndex::view_restore(&path).unwrap();
3582
3583        assert_eq!(viewed.dimensions(), 4);
3584        assert!(viewed.contains(0));
3585
3586        // Cleanup
3587        std::fs::remove_file(&path).ok();
3588    }
3589
3590    #[test]
3591    fn test_view_validated_dimension_mismatch() {
3592        // Create and save a 4-dimension index
3593        let index = create_test_index();
3594        index.reserve(5).unwrap();
3595        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3596
3597        let temp_dir = std::env::temp_dir();
3598        let path = temp_dir.join("test_view_validated_mismatch.usearch");
3599
3600        index.save(&path).unwrap();
3601
3602        // Try to view with wrong dimensions - should fail safely
3603        let wrong_config = IndexConfig::new(1024); // Wrong dimensions!
3604        let result = VectorIndex::view_validated(&path, wrong_config);
3605
3606        assert!(result.is_err());
3607        let err_msg = result.unwrap_err().to_string();
3608        assert!(err_msg.contains("Dimension mismatch"));
3609
3610        // Cleanup
3611        std::fs::remove_file(&path).ok();
3612    }
3613
3614    #[test]
3615    fn test_restore_nonexistent_file() {
3616        let result = VectorIndex::restore("/nonexistent/path/index.usearch");
3617        assert!(result.is_err());
3618        assert!(result.unwrap_err().to_string().contains("not found"));
3619    }
3620
3621    #[test]
3622    fn test_view_restore_nonexistent_file() {
3623        let result = VectorIndex::view_restore("/nonexistent/path/index.usearch");
3624        assert!(result.is_err());
3625        assert!(result.unwrap_err().to_string().contains("not found"));
3626    }
3627
3628    #[test]
3629    fn test_atomic_save_creates_valid_file() {
3630        // Test that atomic save creates a properly readable index
3631        let index = create_test_index();
3632        index.reserve(10).unwrap();
3633        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3634        index.add(1, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3635        index.add(2, &[0.0, 0.0, 1.0, 0.0]).unwrap();
3636
3637        let temp_dir = std::env::temp_dir();
3638        let path = temp_dir.join("test_atomic_save.usearch");
3639
3640        // Atomic save
3641        index.save(&path).unwrap();
3642
3643        // Verify file exists and is loadable
3644        assert!(path.exists());
3645        let restored = VectorIndex::restore(&path).unwrap();
3646        assert_eq!(restored.len(), 3);
3647        assert!(restored.contains(0));
3648        assert!(restored.contains(1));
3649        assert!(restored.contains(2));
3650
3651        // Cleanup
3652        std::fs::remove_file(&path).ok();
3653    }
3654
3655    #[test]
3656    fn test_atomic_save_overwrites_existing_file() {
3657        // Test that atomic save properly replaces existing files
3658        let temp_dir = std::env::temp_dir();
3659        let path = temp_dir.join("test_atomic_overwrite.usearch");
3660
3661        // Create and save initial index
3662        let index1 = create_test_index();
3663        index1.reserve(5).unwrap();
3664        index1.add(100, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3665        index1.save(&path).unwrap();
3666
3667        // Create different index and overwrite
3668        let index2 = create_test_index();
3669        index2.reserve(5).unwrap();
3670        index2.add(200, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3671        index2.add(201, &[0.0, 0.0, 1.0, 0.0]).unwrap();
3672        index2.save(&path).unwrap();
3673
3674        // Verify the new index replaced the old one
3675        let restored = VectorIndex::restore(&path).unwrap();
3676        assert_eq!(restored.len(), 2);
3677        assert!(!restored.contains(100)); // Old data gone
3678        assert!(restored.contains(200));
3679        assert!(restored.contains(201));
3680
3681        // Cleanup
3682        std::fs::remove_file(&path).ok();
3683    }
3684
3685    #[test]
3686    fn test_save_fails_on_nonexistent_parent() {
3687        // Test that save fails gracefully when parent directory doesn't exist
3688        let index = create_test_index();
3689        index.reserve(5).unwrap();
3690        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3691
3692        let path = std::path::Path::new("/nonexistent_dir_abc123/index.usearch");
3693        let result = index.save(path);
3694
3695        assert!(result.is_err());
3696        let err_msg = result.unwrap_err().to_string();
3697        assert!(err_msg.contains("Parent directory does not exist"));
3698    }
3699
3700    #[test]
3701    fn test_save_unsafe_works() {
3702        // Test that save_unsafe still works for backward compatibility
3703        let index = create_test_index();
3704        index.reserve(5).unwrap();
3705        index.add(42, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3706
3707        let temp_dir = std::env::temp_dir();
3708        let path = temp_dir.join("test_save_unsafe.usearch");
3709
3710        // Non-atomic save
3711        index.save_unsafe(&path).unwrap();
3712
3713        // Verify it's loadable
3714        let restored = VectorIndex::restore(&path).unwrap();
3715        assert!(restored.contains(42));
3716
3717        // Cleanup
3718        std::fs::remove_file(&path).ok();
3719    }
3720
3721    #[test]
3722    fn test_save_to_current_directory() {
3723        // Test saving to current directory (parent = ".")
3724        let index = create_test_index();
3725        index.reserve(5).unwrap();
3726        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3727
3728        // Save to temp dir to avoid polluting project
3729        let temp_dir = std::env::temp_dir();
3730        let path = temp_dir.join("test_save_cwd.usearch");
3731
3732        index.save(&path).unwrap();
3733
3734        // Verify
3735        let restored = VectorIndex::restore(&path).unwrap();
3736        assert!(restored.contains(0));
3737
3738        // Cleanup
3739        std::fs::remove_file(&path).ok();
3740    }
3741
3742    // =========================================================================
3743    // In-Memory Serialization Tests
3744    // =========================================================================
3745
3746    #[test]
3747    fn test_serialization_roundtrip() {
3748        let config = IndexConfig {
3749            dimensions: 4,
3750            ..Default::default()
3751        };
3752        let index = VectorIndex::with_config(config.clone()).unwrap();
3753        index.reserve(10).unwrap();
3754
3755        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3756        index.add(2, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3757
3758        // Serialize
3759        let bytes = index.to_bytes().unwrap();
3760
3761        // Deserialize
3762        let loaded = VectorIndex::from_bytes(&bytes, config).unwrap();
3763
3764        assert_eq!(loaded.len(), 2);
3765        assert!(loaded.contains(1));
3766        assert!(loaded.contains(2));
3767    }
3768
3769    #[test]
3770    fn test_serialization_preserves_vectors() {
3771        let config = IndexConfig::new(4).with_metric(Metric::InnerProduct);
3772        let index = VectorIndex::with_config(config.clone()).unwrap();
3773        index.reserve(10).unwrap();
3774
3775        let vec1 = [1.0, 0.0, 0.0, 0.0];
3776        let vec2 = [0.0, 1.0, 0.0, 0.0];
3777        let vec3 = [0.0, 0.0, 1.0, 0.0];
3778
3779        index.add(10, &vec1).unwrap();
3780        index.add(20, &vec2).unwrap();
3781        index.add(30, &vec3).unwrap();
3782
3783        // Serialize and deserialize
3784        let bytes = index.to_bytes().unwrap();
3785        let loaded = VectorIndex::from_bytes(&bytes, config).unwrap();
3786
3787        // Verify vectors are searchable
3788        let results = loaded.search(&vec1, 3).unwrap();
3789        assert!(!results.is_empty());
3790        assert_eq!(results[0].0, 10); // Should find key 10 first
3791
3792        // Verify get() works
3793        let retrieved = loaded.get(10).unwrap();
3794        assert_eq!(retrieved, vec1.to_vec());
3795    }
3796
3797    #[test]
3798    fn test_serialization_dimension_mismatch() {
3799        // Create a 4-dimensional index
3800        let config = IndexConfig::new(4);
3801        let index = VectorIndex::with_config(config).unwrap();
3802        index.reserve(5).unwrap();
3803        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3804
3805        let bytes = index.to_bytes().unwrap();
3806
3807        // Try to load with wrong dimensions
3808        let wrong_config = IndexConfig::new(768);
3809        let result = VectorIndex::from_bytes(&bytes, wrong_config);
3810
3811        assert!(result.is_err());
3812        let err_msg = result.unwrap_err().to_string();
3813        assert!(err_msg.contains("Dimension mismatch"));
3814        assert!(err_msg.contains("768"));
3815    }
3816
3817    #[test]
3818    fn test_from_bytes_unchecked() {
3819        let config = IndexConfig::new(4);
3820        let index = VectorIndex::with_config(config).unwrap();
3821        index.reserve(5).unwrap();
3822        index.add(42, &[1.0, 0.5, 0.0, 0.0]).unwrap();
3823
3824        let bytes = index.to_bytes().unwrap();
3825
3826        // Load without specifying dimensions
3827        let loaded = VectorIndex::from_bytes_unchecked(&bytes).unwrap();
3828
3829        assert_eq!(loaded.dimensions(), 4);
3830        assert!(loaded.contains(42));
3831        assert_eq!(loaded.len(), 1);
3832    }
3833
3834    #[test]
3835    fn test_serialized_size_consistency() {
3836        let config = IndexConfig::new(4);
3837        let index = VectorIndex::with_config(config).unwrap();
3838        index.reserve(10).unwrap();
3839
3840        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3841        index.add(2, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3842
3843        let expected_size = index.serialized_size();
3844        let bytes = index.to_bytes().unwrap();
3845
3846        // Serialized bytes should match the reported size
3847        assert_eq!(bytes.len(), expected_size);
3848    }
3849
3850    #[test]
3851    fn test_serialization_empty_index() {
3852        let config = IndexConfig::new(768);
3853        let index = VectorIndex::with_config(config.clone()).unwrap();
3854        // Don't reserve or add anything
3855
3856        let bytes = index.to_bytes().unwrap();
3857        let loaded = VectorIndex::from_bytes(&bytes, config).unwrap();
3858
3859        assert_eq!(loaded.len(), 0);
3860        assert!(loaded.is_empty());
3861        assert_eq!(loaded.dimensions(), 768);
3862    }
3863
3864    #[test]
3865    fn test_serialization_with_quantization() {
3866        // Test that quantization settings are preserved through serialization
3867        let config = IndexConfig::new(4)
3868            .with_metric(Metric::Cosine)
3869            .with_quantization(Quantization::F16);
3870        let index = VectorIndex::with_config(config.clone()).unwrap();
3871        index.reserve(5).unwrap();
3872        index.add(1, &[0.5, 0.5, 0.5, 0.5]).unwrap();
3873
3874        let bytes = index.to_bytes().unwrap();
3875        let loaded = VectorIndex::from_bytes(&bytes, config.clone()).unwrap();
3876
3877        assert_eq!(loaded.dimensions(), 4);
3878        assert!(loaded.contains(1));
3879        assert_eq!(loaded.config().metric, Metric::Cosine);
3880        assert_eq!(loaded.config().quantization, Quantization::F16);
3881    }
3882
3883    #[test]
3884    fn test_serialization_large_index() {
3885        // Test with more vectors to ensure scalability
3886        let config = IndexConfig::new(128);
3887        let index = VectorIndex::with_config(config.clone()).unwrap();
3888        index.reserve(1000).unwrap();
3889
3890        // Add 100 vectors
3891        for i in 0..100u64 {
3892            let mut vec = vec![0.0f32; 128];
3893            vec[(i as usize) % 128] = 1.0;
3894            index.add(i, &vec).unwrap();
3895        }
3896
3897        let bytes = index.to_bytes().unwrap();
3898        let loaded = VectorIndex::from_bytes(&bytes, config).unwrap();
3899
3900        assert_eq!(loaded.len(), 100);
3901        for i in 0..100u64 {
3902            assert!(loaded.contains(i));
3903        }
3904    }
3905
3906    // =========================================================================
3907    // Disk Space Checking Tests
3908    // =========================================================================
3909
3910    #[test]
3911    fn test_check_disk_space_valid_path() {
3912        let index = create_test_index();
3913        index.reserve(10).unwrap();
3914        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3915
3916        let temp_dir = std::env::temp_dir();
3917        let path = temp_dir.join("test_disk_space_check.usearch");
3918
3919        // Should succeed on a valid path with existing parent directory
3920        let has_space = index.check_disk_space(&path).unwrap();
3921        // Temp directory should have enough space for a tiny index
3922        assert!(has_space);
3923    }
3924
3925    #[test]
3926    fn test_check_disk_space_nonexistent_parent() {
3927        let index = create_test_index();
3928        index.reserve(5).unwrap();
3929        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3930
3931        let path = std::path::Path::new("/nonexistent_dir_xyz123/index.usearch");
3932        let result = index.check_disk_space(path);
3933
3934        assert!(result.is_err());
3935        let err_msg = result.unwrap_err().to_string();
3936        assert!(err_msg.contains("parent directory does not exist"));
3937    }
3938
3939    #[test]
3940    fn test_disk_space_info() {
3941        let index = create_test_index();
3942        index.reserve(10).unwrap();
3943        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3944        index.add(1, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3945
3946        let temp_dir = std::env::temp_dir();
3947        let path = temp_dir.join("test_disk_space_info.usearch");
3948
3949        let (available, required, has_enough) = index.disk_space_info(&path).unwrap();
3950
3951        // Available should be a positive number
3952        assert!(available > 0);
3953        // Required should include safety margin (at least 1MB)
3954        assert!(required >= 1024 * 1024);
3955        // Has enough should be true for a tiny index on temp
3956        assert!(has_enough);
3957    }
3958
3959    #[test]
3960    fn test_save_checked_success() {
3961        let index = create_test_index();
3962        index.reserve(10).unwrap();
3963        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3964        index.add(1, &[0.0, 1.0, 0.0, 0.0]).unwrap();
3965
3966        let temp_dir = std::env::temp_dir();
3967        let path = temp_dir.join("test_save_checked.usearch");
3968
3969        let info = index.save_checked(&path).unwrap();
3970
3971        // Verify SaveInfo fields
3972        assert_eq!(info.path, path);
3973        assert!(info.size_bytes > 0);
3974        assert!(info.available_before > 0);
3975        assert!(info.space_remaining > 0);
3976
3977        // Verify the file was actually saved
3978        assert!(path.exists());
3979        let restored = VectorIndex::restore(&path).unwrap();
3980        assert_eq!(restored.len(), 2);
3981
3982        // Cleanup
3983        std::fs::remove_file(&path).ok();
3984    }
3985
3986    #[test]
3987    fn test_save_checked_nonexistent_parent() {
3988        let index = create_test_index();
3989        index.reserve(5).unwrap();
3990        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
3991
3992        let path = std::path::Path::new("/nonexistent_dir_abc999/index.usearch");
3993        let result = index.save_checked(path);
3994
3995        assert!(result.is_err());
3996        let err_msg = result.unwrap_err().to_string();
3997        assert!(err_msg.contains("Parent directory does not exist"));
3998    }
3999
4000    #[test]
4001    fn test_save_info_display() {
4002        let info = SaveInfo {
4003            path: PathBuf::from("/test/index.usearch"),
4004            size_bytes: 5 * 1024 * 1024, // 5 MB
4005            elapsed: std::time::Duration::from_secs(1),
4006            available_before: 100 * 1024 * 1024 * 1024, // 100 GB
4007            space_remaining: 99 * 1024 * 1024 * 1024,   // 99 GB
4008        };
4009
4010        let display = format!("{}", info);
4011        assert!(display.contains("5.00 MB"));
4012        assert!(display.contains("/test/index.usearch"));
4013        assert!(display.contains("MB/s"));
4014    }
4015
4016    #[test]
4017    fn test_save_info_human_size() {
4018        // Test bytes
4019        let info = SaveInfo {
4020            path: PathBuf::from("test"),
4021            size_bytes: 500,
4022            elapsed: std::time::Duration::from_secs(1),
4023            available_before: 0,
4024            space_remaining: 0,
4025        };
4026        assert_eq!(info.human_size(), "500 bytes");
4027
4028        // Test KB
4029        let info = SaveInfo {
4030            path: PathBuf::from("test"),
4031            size_bytes: 2048,
4032            elapsed: std::time::Duration::from_secs(1),
4033            available_before: 0,
4034            space_remaining: 0,
4035        };
4036        assert_eq!(info.human_size(), "2.00 KB");
4037
4038        // Test MB
4039        let info = SaveInfo {
4040            path: PathBuf::from("test"),
4041            size_bytes: 5 * 1024 * 1024,
4042            elapsed: std::time::Duration::from_secs(1),
4043            available_before: 0,
4044            space_remaining: 0,
4045        };
4046        assert_eq!(info.human_size(), "5.00 MB");
4047
4048        // Test GB
4049        let info = SaveInfo {
4050            path: PathBuf::from("test"),
4051            size_bytes: 2 * 1024 * 1024 * 1024,
4052            elapsed: std::time::Duration::from_secs(1),
4053            available_before: 0,
4054            space_remaining: 0,
4055        };
4056        assert_eq!(info.human_size(), "2.00 GB");
4057    }
4058
4059    #[test]
4060    fn test_save_info_bytes_per_second() {
4061        let info = SaveInfo {
4062            path: PathBuf::from("test"),
4063            size_bytes: 10 * 1024 * 1024, // 10 MB
4064            elapsed: std::time::Duration::from_secs(2),
4065            available_before: 0,
4066            space_remaining: 0,
4067        };
4068
4069        let bps = info.bytes_per_second();
4070        let expected = (10.0 * 1024.0 * 1024.0) / 2.0;
4071        assert!((bps - expected).abs() < 1.0);
4072
4073        let mbps = info.mb_per_second();
4074        assert!((mbps - 5.0).abs() < 0.01);
4075    }
4076
4077    #[test]
4078    fn test_save_info_zero_elapsed() {
4079        // Edge case: very fast save (zero duration)
4080        let info = SaveInfo {
4081            path: PathBuf::from("test"),
4082            size_bytes: 1000,
4083            elapsed: std::time::Duration::ZERO,
4084            available_before: 0,
4085            space_remaining: 0,
4086        };
4087
4088        // Should return infinity, not panic
4089        assert!(info.bytes_per_second().is_infinite());
4090        assert!(info.mb_per_second().is_infinite());
4091    }
4092
4093    #[test]
4094    fn test_check_disk_space_includes_safety_margin() {
4095        // This test verifies that the safety margin is being applied
4096        let index = create_test_index();
4097        index.reserve(5).unwrap();
4098        index.add(0, &[1.0, 0.0, 0.0, 0.0]).unwrap();
4099
4100        let serialized_size = index.serialized_size();
4101        let temp_dir = std::env::temp_dir();
4102        let path = temp_dir.join("test_safety_margin.usearch");
4103
4104        let (_, required, _) = index.disk_space_info(&path).unwrap();
4105
4106        // Required should be at least serialized_size + 1MB (minimum margin)
4107        let min_margin = 1024 * 1024u64;
4108        assert!(
4109            required >= serialized_size as u64 + min_margin,
4110            "Required {} should be >= serialized_size {} + margin {}",
4111            required,
4112            serialized_size,
4113            min_margin
4114        );
4115    }
4116
4117    #[test]
4118    fn test_add_batch_parallel_large() {
4119        // Test parallel insertion with more than PARALLEL_BATCH_THRESHOLD vectors
4120        let dims = 128;
4121        let count = 500; // Well above the 100 threshold
4122        let index = VectorIndex::new(dims, Metric::InnerProduct).unwrap();
4123        index.reserve(count).unwrap();
4124
4125        let keys: Vec<u64> = (0..count as u64).collect();
4126        let vectors: Vec<Vec<f32>> = (0..count)
4127            .map(|i| {
4128                let mut v = vec![0.0f32; dims];
4129                v[i % dims] = 1.0; // Create orthogonal-ish vectors
4130                v
4131            })
4132            .collect();
4133
4134        index.add_batch(&keys, &vectors).unwrap();
4135        assert_eq!(index.len(), count);
4136
4137        // Verify all vectors were added correctly
4138        for key in 0..count as u64 {
4139            assert!(index.contains(key), "Missing key {}", key);
4140        }
4141    }
4142
4143    #[test]
4144    fn test_add_batch_flat_parallel_large() {
4145        // Test parallel flat insertion with more than PARALLEL_BATCH_THRESHOLD vectors
4146        let dims = 64;
4147        let count = 200;
4148        let index = VectorIndex::new(dims, Metric::InnerProduct).unwrap();
4149        index.reserve(count).unwrap();
4150
4151        let keys: Vec<u64> = (0..count as u64).collect();
4152        let mut vectors_flat = vec![0.0f32; count * dims];
4153        for i in 0..count {
4154            vectors_flat[i * dims + (i % dims)] = 1.0;
4155        }
4156
4157        index.add_batch_flat(&keys, &vectors_flat).unwrap();
4158        assert_eq!(index.len(), count);
4159
4160        for key in 0..count as u64 {
4161            assert!(index.contains(key), "Missing key {}", key);
4162        }
4163    }
4164
4165    #[test]
4166    fn test_add_batch_sequential_method() {
4167        let index = create_test_index();
4168        index.reserve(5).unwrap();
4169
4170        let keys = vec![0, 1, 2, 3, 4];
4171        let vectors = vec![
4172            vec![1.0, 0.0, 0.0, 0.0],
4173            vec![0.0, 1.0, 0.0, 0.0],
4174            vec![0.0, 0.0, 1.0, 0.0],
4175            vec![0.0, 0.0, 0.0, 1.0],
4176            vec![0.5, 0.5, 0.0, 0.0],
4177        ];
4178
4179        index.add_batch_sequential(&keys, &vectors).unwrap();
4180        assert_eq!(index.len(), 5);
4181    }
4182
4183    #[test]
4184    fn test_add_batch_empty() {
4185        let index = create_test_index();
4186        index.reserve(10).unwrap();
4187
4188        // Empty batch should succeed
4189        let keys: Vec<u64> = vec![];
4190        let vectors: Vec<Vec<f32>> = vec![];
4191        index.add_batch(&keys, &vectors).unwrap();
4192        assert_eq!(index.len(), 0);
4193    }
4194
4195    #[test]
4196    fn test_add_batch_flat_empty() {
4197        let index = create_test_index();
4198        index.reserve(10).unwrap();
4199
4200        let keys: Vec<u64> = vec![];
4201        let vectors_flat: Vec<f32> = vec![];
4202        index.add_batch_flat(&keys, &vectors_flat).unwrap();
4203        assert_eq!(index.len(), 0);
4204    }
4205
4206    #[test]
4207    fn test_add_batch_below_parallel_threshold() {
4208        // Test that small batches (below threshold) still work correctly
4209        let index = create_test_index();
4210        let count = 50; // Below PARALLEL_BATCH_THRESHOLD (100)
4211        index.reserve(count).unwrap();
4212
4213        let keys: Vec<u64> = (0..count as u64).collect();
4214        let vectors: Vec<Vec<f32>> = (0..count)
4215            .map(|_| vec![0.25, 0.25, 0.25, 0.25])
4216            .collect();
4217
4218        index.add_batch(&keys, &vectors).unwrap();
4219        assert_eq!(index.len(), count);
4220    }
4221
4222    // =========================================================================
4223    // IndexView (Safe Memory-Mapped View) Tests
4224    // =========================================================================
4225
4226    #[test]
4227    fn test_view_safe_basic() {
4228        let temp_dir = tempfile::tempdir().unwrap();
4229        let path = temp_dir.path().join("test_view_safe.usearch");
4230
4231        // Create and save an index
4232        let index = create_test_index();
4233        index.reserve(5).unwrap();
4234        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
4235        index.add(2, &[0.0, 1.0, 0.0, 0.0]).unwrap();
4236        index.save(&path).unwrap();
4237
4238        // Open a safe view
4239        let view = VectorIndex::view_safe(&path).unwrap();
4240
4241        // Verify basic properties
4242        assert!(view.is_valid());
4243        assert_eq!(view.path(), path);
4244        assert_eq!(view.dimensions(), 4);
4245        assert_eq!(view.len(), 2);
4246        assert!(!view.is_empty());
4247        assert!(view.contains(1));
4248        assert!(view.contains(2));
4249        assert!(!view.contains(3));
4250    }
4251
4252    #[test]
4253    fn test_view_safe_search() {
4254        let temp_dir = tempfile::tempdir().unwrap();
4255        let path = temp_dir.path().join("test_view_safe_search.usearch");
4256
4257        // Create and save an index
4258        let index = create_test_index();
4259        index.reserve(5).unwrap();
4260        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
4261        index.add(2, &[0.0, 1.0, 0.0, 0.0]).unwrap();
4262        index.add(3, &[0.0, 0.0, 1.0, 0.0]).unwrap();
4263        index.save(&path).unwrap();
4264
4265        // Open a safe view and search
4266        let view = VectorIndex::view_safe(&path).unwrap();
4267        let results = view.search(&[1.0, 0.0, 0.0, 0.0], 3).unwrap();
4268
4269        assert!(!results.is_empty());
4270        assert_eq!(results[0].0, 1); // Should find key 1 first (exact match)
4271    }
4272
4273    #[test]
4274    fn test_view_validated_safe_matching_dimensions() {
4275        let temp_dir = tempfile::tempdir().unwrap();
4276        let path = temp_dir.path().join("test_view_validated_safe.usearch");
4277
4278        // Create and save an index with 4 dimensions
4279        let index = create_test_index();
4280        index.reserve(5).unwrap();
4281        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
4282        index.save(&path).unwrap();
4283
4284        // Open with matching dimensions - should succeed
4285        let config = IndexConfig::new(4);
4286        let view = VectorIndex::view_validated_safe(&path, config).unwrap();
4287
4288        assert!(view.is_valid());
4289        assert_eq!(view.dimensions(), 4);
4290        assert!(view.contains(1));
4291    }
4292
4293    #[test]
4294    fn test_view_validated_safe_dimension_mismatch() {
4295        let temp_dir = tempfile::tempdir().unwrap();
4296        let path = temp_dir.path().join("test_view_validated_safe_mismatch.usearch");
4297
4298        // Create and save an index with 4 dimensions
4299        let index = create_test_index();
4300        index.reserve(5).unwrap();
4301        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
4302        index.save(&path).unwrap();
4303
4304        // Try to open with wrong dimensions - should fail
4305        let config = IndexConfig::new(768);
4306        let result = VectorIndex::view_validated_safe(&path, config);
4307
4308        assert!(result.is_err());
4309        let err_msg = result.unwrap_err().to_string();
4310        assert!(err_msg.contains("Dimension mismatch"));
4311    }
4312
4313    #[test]
4314    fn test_view_validated_safe_zero_dimension_skips_validation() {
4315        let temp_dir = tempfile::tempdir().unwrap();
4316        let path = temp_dir.path().join("test_view_validated_safe_zero.usearch");
4317
4318        // Create and save an index with 4 dimensions
4319        let index = create_test_index();
4320        index.reserve(5).unwrap();
4321        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
4322        index.save(&path).unwrap();
4323
4324        // Open with 0 dimensions - should skip validation
4325        let config = IndexConfig::new(0);
4326        let view = VectorIndex::view_validated_safe(&path, config).unwrap();
4327
4328        assert!(view.is_valid());
4329        assert_eq!(view.dimensions(), 4); // Actual dimensions from file
4330    }
4331
4332    #[test]
4333    fn test_view_safe_nonexistent_file() {
4334        let result = VectorIndex::view_safe("/nonexistent/path/index.usearch");
4335        assert!(result.is_err());
4336    }
4337
4338    #[test]
4339    fn test_view_safe_into_inner() {
4340        let temp_dir = tempfile::tempdir().unwrap();
4341        let path = temp_dir.path().join("test_view_safe_into_inner.usearch");
4342
4343        // Create and save an index
4344        let index = create_test_index();
4345        index.reserve(5).unwrap();
4346        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
4347        index.save(&path).unwrap();
4348
4349        // Open a safe view and convert to VectorIndex
4350        let view = VectorIndex::view_safe(&path).unwrap();
4351        let inner = view.into_inner();
4352
4353        // Can still use the inner VectorIndex
4354        assert_eq!(inner.dimensions(), 4);
4355        assert!(inner.contains(1));
4356    }
4357
4358    #[test]
4359    fn test_view_safe_deref() {
4360        let temp_dir = tempfile::tempdir().unwrap();
4361        let path = temp_dir.path().join("test_view_safe_deref.usearch");
4362
4363        // Create and save an index
4364        let index = create_test_index();
4365        index.reserve(5).unwrap();
4366        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
4367        index.save(&path).unwrap();
4368
4369        // Open a safe view
4370        let view = VectorIndex::view_safe(&path).unwrap();
4371
4372        // Deref should work to access VectorIndex methods
4373        fn takes_vector_index(index: &VectorIndex) -> bool {
4374            index.contains(1)
4375        }
4376
4377        assert!(takes_vector_index(&view));
4378    }
4379
4380    #[cfg(unix)]
4381    #[test]
4382    fn test_view_safe_keeps_file_open_on_delete() {
4383        let temp_dir = tempfile::tempdir().unwrap();
4384        let path = temp_dir.path().join("test_view_safe_delete.usearch");
4385
4386        // Create and save an index
4387        let index = create_test_index();
4388        index.reserve(5).unwrap();
4389        index.add(1, &[1.0, 0.0, 0.0, 0.0]).unwrap();
4390        index.save(&path).unwrap();
4391
4392        // Open a safe view
4393        let view = VectorIndex::view_safe(&path).unwrap();
4394        assert!(view.is_valid());
4395        assert!(view.contains(1));
4396
4397        // Delete the file - on Unix, the file handle keeps data accessible
4398        std::fs::remove_file(&path).unwrap();
4399
4400        // is_valid() should now return false (file unlinked)
4401        assert!(!view.is_valid());
4402
4403        // But the view should still be usable because file handle keeps data
4404        // Note: This is platform-specific behavior (Unix file semantics)
4405        assert!(view.contains(1));
4406
4407        // Search should still work
4408        let results = view.search(&[1.0, 0.0, 0.0, 0.0], 1).unwrap();
4409        assert!(!results.is_empty());
4410        assert_eq!(results[0].0, 1);
4411    }
4412
4413    // =========================================================================
4414    // Query Embedding Cache Tests
4415    // =========================================================================
4416
4417    #[test]
4418    fn test_query_cache_basic_operations() {
4419        // Clear cache before test to ensure clean state
4420        clear_query_cache().unwrap();
4421
4422        // Cache should be empty initially
4423        let (size, capacity) = query_cache_stats().unwrap();
4424        assert_eq!(size, 0);
4425        assert_eq!(capacity, 100);
4426
4427        // Query not in cache
4428        assert!(!query_in_cache("test query").unwrap());
4429
4430        // Add query via get_cached_query_embedding
4431        let call_count = std::sync::atomic::AtomicUsize::new(0);
4432        let embedding = get_cached_query_embedding("test query", |_q| {
4433            call_count.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
4434            Ok(vec![0.1, 0.2, 0.3, 0.4])
4435        })
4436        .unwrap();
4437
4438        assert_eq!(embedding, vec![0.1, 0.2, 0.3, 0.4]);
4439        assert_eq!(call_count.load(std::sync::atomic::Ordering::SeqCst), 1);
4440
4441        // Query should now be in cache
4442        assert!(query_in_cache("test query").unwrap());
4443        let (size, _) = query_cache_stats().unwrap();
4444        assert_eq!(size, 1);
4445
4446        // Second call should use cache (compute_fn not called)
4447        let embedding2 = get_cached_query_embedding("test query", |_q| {
4448            call_count.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
4449            Ok(vec![0.5, 0.6, 0.7, 0.8]) // Different value to prove cache is used
4450        })
4451        .unwrap();
4452
4453        assert_eq!(embedding2, vec![0.1, 0.2, 0.3, 0.4]); // Should be original cached value
4454        assert_eq!(call_count.load(std::sync::atomic::Ordering::SeqCst), 1); // Not incremented
4455
4456        // Clear cache and verify
4457        clear_query_cache().unwrap();
4458        let (size, _) = query_cache_stats().unwrap();
4459        assert_eq!(size, 0);
4460        assert!(!query_in_cache("test query").unwrap());
4461    }
4462
4463    #[test]
4464    fn test_query_cache_different_queries() {
4465        clear_query_cache().unwrap();
4466
4467        // Add multiple different queries
4468        for i in 0..5 {
4469            let query = format!("query {}", i);
4470            let embedding = get_cached_query_embedding(&query, |_q| Ok(vec![i as f32; 4])).unwrap();
4471            assert_eq!(embedding, vec![i as f32; 4]);
4472        }
4473
4474        // All should be cached
4475        let (size, _) = query_cache_stats().unwrap();
4476        assert_eq!(size, 5);
4477
4478        // Verify each is in cache
4479        for i in 0..5 {
4480            let query = format!("query {}", i);
4481            assert!(query_in_cache(&query).unwrap());
4482        }
4483
4484        clear_query_cache().unwrap();
4485    }
4486
4487    #[test]
4488    fn test_query_cache_compute_error_not_cached() {
4489        clear_query_cache().unwrap();
4490
4491        // Attempt to cache a query where compute_fn returns error
4492        let result = get_cached_query_embedding("error query", |_q| {
4493            Err(anyhow::anyhow!("Simulated TEI error"))
4494        });
4495
4496        assert!(result.is_err());
4497
4498        // Failed query should NOT be in cache
4499        assert!(!query_in_cache("error query").unwrap());
4500        let (size, _) = query_cache_stats().unwrap();
4501        assert_eq!(size, 0);
4502
4503        clear_query_cache().unwrap();
4504    }
4505
4506    #[tokio::test]
4507    async fn test_query_cache_async_basic() {
4508        clear_query_cache().unwrap();
4509
4510        let call_count = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
4511        let call_count_clone = call_count.clone();
4512
4513        // First call - should compute
4514        let embedding = get_cached_query_embedding_async("async test", |_q| {
4515            let cc = call_count_clone.clone();
4516            async move {
4517                cc.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
4518                Ok(vec![1.0, 2.0, 3.0])
4519            }
4520        })
4521        .await
4522        .unwrap();
4523
4524        assert_eq!(embedding, vec![1.0, 2.0, 3.0]);
4525        assert_eq!(call_count.load(std::sync::atomic::Ordering::SeqCst), 1);
4526
4527        // Second call - should use cache
4528        let call_count_clone2 = call_count.clone();
4529        let embedding2 = get_cached_query_embedding_async("async test", |_q| {
4530            let cc = call_count_clone2.clone();
4531            async move {
4532                cc.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
4533                Ok(vec![9.0, 9.0, 9.0])
4534            }
4535        })
4536        .await
4537        .unwrap();
4538
4539        assert_eq!(embedding2, vec![1.0, 2.0, 3.0]); // Cached value
4540        assert_eq!(call_count.load(std::sync::atomic::Ordering::SeqCst), 1); // Not incremented
4541
4542        clear_query_cache().unwrap();
4543    }
4544}