oxirs_vec/
storage_optimizations.rs

1//! Storage optimizations for vector indices
2//!
3//! This module provides optimized storage formats for efficient vector serialization:
4//! - Binary formats for fast I/O
5//! - Compression with multiple algorithms
6//! - Streaming I/O for large datasets
7//! - Memory-mapped file support
8
9use crate::Vector;
10use anyhow::{anyhow, Result};
11use oxicode::{Decode, Encode};
12use serde::{Deserialize, Serialize};
13use std::fs::File;
14use std::io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write};
15use std::path::Path;
16
17/// Returns an oxicode configuration with fixed-int encoding.
18/// This ensures header sizes are consistent regardless of the values stored.
19fn bincode_config() -> oxicode::config::Configuration<
20    oxicode::config::LittleEndian,
21    oxicode::config::Fixint,
22    oxicode::config::NoLimit,
23> {
24    oxicode::config::standard().with_fixed_int_encoding()
25}
26
27/// Compression methods for vector storage
28#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Encode, Decode)]
29pub enum CompressionType {
30    /// No compression
31    None,
32    /// LZ4 fast compression
33    Lz4,
34    /// Zstandard balanced compression
35    Zstd,
36    /// Brotli high compression
37    Brotli,
38    /// Gzip standard compression
39    Gzip,
40    /// Custom vector quantization compression
41    VectorQuantization,
42}
43
44/// Binary storage format configuration
45#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
46pub struct StorageConfig {
47    /// Compression type to use
48    pub compression: CompressionType,
49    /// Compression level (0-9, algorithm dependent)
50    pub compression_level: u8,
51    /// Buffer size for streaming operations
52    pub buffer_size: usize,
53    /// Enable memory mapping for large files
54    pub enable_mmap: bool,
55    /// Block size for chunked reading/writing
56    pub block_size: usize,
57    /// Enable checksums for data integrity
58    pub enable_checksums: bool,
59    /// Metadata format version
60    pub format_version: u32,
61}
62
63impl Default for StorageConfig {
64    fn default() -> Self {
65        Self {
66            compression: CompressionType::Zstd,
67            compression_level: 3,
68            buffer_size: 1024 * 1024, // 1MB
69            enable_mmap: true,
70            block_size: 64 * 1024, // 64KB
71            enable_checksums: true,
72            format_version: 1,
73        }
74    }
75}
76
77/// Binary file header for vector storage
78#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
79pub struct VectorFileHeader {
80    /// Magic number for file format identification
81    pub magic: [u8; 8],
82    /// Format version
83    pub version: u32,
84    /// Number of vectors in file
85    pub vector_count: u64,
86    /// Vector dimensions
87    pub dimensions: usize,
88    /// Compression type used
89    pub compression: CompressionType,
90    /// Compression level
91    pub compression_level: u8,
92    /// Block size for chunked data
93    pub block_size: usize,
94    /// Checksum of header
95    pub header_checksum: u32,
96    /// Offset to vector data
97    pub data_offset: u64,
98    /// Total data size (compressed)
99    pub data_size: u64,
100    /// Original data size (uncompressed)
101    pub original_size: u64,
102    /// Reserved bytes for future use
103    pub reserved: [u8; 32],
104}
105
106impl Default for VectorFileHeader {
107    fn default() -> Self {
108        Self {
109            magic: *b"OXIRSVEC",
110            version: 1,
111            vector_count: 0,
112            dimensions: 0,
113            compression: CompressionType::None,
114            compression_level: 0,
115            block_size: 64 * 1024,
116            header_checksum: 0,
117            data_offset: 0,
118            data_size: 0,
119            original_size: 0,
120            reserved: [0; 32],
121        }
122    }
123}
124
125impl VectorFileHeader {
126    /// Calculate and set header checksum
127    pub fn calculate_checksum(&mut self) {
128        // Simple CRC32-like checksum
129        let mut checksum = 0u32;
130        checksum ^=
131            u32::from_le_bytes([self.magic[0], self.magic[1], self.magic[2], self.magic[3]]);
132        checksum ^=
133            u32::from_le_bytes([self.magic[4], self.magic[5], self.magic[6], self.magic[7]]);
134        checksum ^= self.version;
135        checksum ^= self.vector_count as u32;
136        checksum ^= self.dimensions as u32;
137        checksum ^= self.compression as u8 as u32;
138        checksum ^= self.compression_level as u32;
139        self.header_checksum = checksum;
140    }
141
142    /// Verify header checksum
143    pub fn verify_checksum(&self) -> bool {
144        let mut temp_header = self.clone();
145        temp_header.header_checksum = 0;
146        temp_header.calculate_checksum();
147        temp_header.header_checksum == self.header_checksum
148    }
149}
150
151/// Vector block for chunked storage
152#[derive(Debug, Clone)]
153pub struct VectorBlock {
154    /// Block index
155    pub block_id: u32,
156    /// Number of vectors in this block
157    pub vector_count: u32,
158    /// Compressed data
159    pub data: Vec<u8>,
160    /// Original size before compression
161    pub original_size: u32,
162    /// Block checksum
163    pub checksum: u32,
164}
165
166/// Streaming vector writer for large datasets
167pub struct VectorWriter {
168    writer: BufWriter<File>,
169    config: StorageConfig,
170    header: VectorFileHeader,
171    current_block: Vec<Vector>,
172    blocks_written: u32,
173    total_vectors: u64,
174}
175
176impl VectorWriter {
177    /// Create a new vector writer
178    pub fn new<P: AsRef<Path>>(path: P, config: StorageConfig) -> Result<Self> {
179        let file = File::create(path)?;
180        let mut writer = BufWriter::new(file);
181
182        let header = VectorFileHeader {
183            compression: config.compression,
184            compression_level: config.compression_level,
185            block_size: config.block_size,
186            ..Default::default()
187        };
188
189        // Write a placeholder header first to reserve space
190        let placeholder_header_bytes = oxicode::serde::encode_to_vec(&header, bincode_config())
191            .map_err(|e| anyhow!("Failed to serialize placeholder header: {}", e))?;
192        let _header_size = (4 + placeholder_header_bytes.len()) as u64;
193
194        // Write placeholder header size and header
195        writer.write_all(&(placeholder_header_bytes.len() as u32).to_le_bytes())?;
196        writer.write_all(&placeholder_header_bytes)?;
197        writer.flush()?;
198
199        Ok(Self {
200            writer,
201            config,
202            header,
203            current_block: Vec::new(),
204            blocks_written: 0,
205            total_vectors: 0,
206        })
207    }
208
209    /// Write a vector to the file
210    pub fn write_vector(&mut self, vector: Vector) -> Result<()> {
211        // Set dimensions from first vector
212        if self.header.dimensions == 0 {
213            self.header.dimensions = vector.dimensions;
214        } else if self.header.dimensions != vector.dimensions {
215            return Err(anyhow!(
216                "Vector dimension mismatch: expected {}, got {}",
217                self.header.dimensions,
218                vector.dimensions
219            ));
220        }
221
222        self.current_block.push(vector);
223        self.total_vectors += 1;
224
225        // Check if we need to flush the current block
226        let block_size_estimate = self.current_block.len() * self.header.dimensions * 4; // 4 bytes per f32
227        if block_size_estimate >= self.config.block_size {
228            self.flush_block()?;
229        }
230
231        Ok(())
232    }
233
234    /// Write multiple vectors
235    pub fn write_vectors(&mut self, vectors: &[Vector]) -> Result<()> {
236        for vector in vectors {
237            self.write_vector(vector.clone())?;
238        }
239        Ok(())
240    }
241
242    /// Flush current block to disk
243    fn flush_block(&mut self) -> Result<()> {
244        if self.current_block.is_empty() {
245            return Ok(());
246        }
247
248        // For uncompressed mode, write raw vector data
249        if self.config.compression == CompressionType::None {
250            for vector in &self.current_block {
251                let vector_bytes = vector.as_f32();
252                for value in vector_bytes {
253                    self.writer.write_all(&value.to_le_bytes())?;
254                }
255            }
256            self.current_block.clear();
257            return Ok(());
258        }
259
260        // Serialize vectors to binary
261        let mut block_data = Vec::new();
262        for vector in &self.current_block {
263            let vector_bytes = vector.as_f32();
264            for value in vector_bytes {
265                block_data.extend_from_slice(&value.to_le_bytes());
266            }
267        }
268
269        // Compress block data
270        let compressed_data = self.compress_data(&block_data)?;
271
272        // Create block header
273        let block = VectorBlock {
274            block_id: self.blocks_written,
275            vector_count: self.current_block.len() as u32,
276            original_size: block_data.len() as u32,
277            checksum: self.calculate_data_checksum(&compressed_data),
278            data: compressed_data,
279        };
280
281        // Write block to file
282        self.write_block(&block)?;
283
284        self.current_block.clear();
285        self.blocks_written += 1;
286
287        Ok(())
288    }
289
290    /// Compress data using configured algorithm
291    fn compress_data(&self, data: &[u8]) -> Result<Vec<u8>> {
292        match self.config.compression {
293            CompressionType::None => Ok(data.to_vec()),
294            CompressionType::Lz4 => {
295                // Placeholder for LZ4 compression
296                // In real implementation, use lz4_flex or similar crate
297                Ok(data.to_vec())
298            }
299            CompressionType::Zstd => {
300                // Placeholder for Zstandard compression
301                // In real implementation, use zstd crate
302                Ok(data.to_vec())
303            }
304            CompressionType::Brotli => {
305                // Placeholder for Brotli compression
306                // In real implementation, use brotli crate
307                Ok(data.to_vec())
308            }
309            CompressionType::Gzip => {
310                // Placeholder for Gzip compression
311                // In real implementation, use flate2 crate
312                Ok(data.to_vec())
313            }
314            CompressionType::VectorQuantization => {
315                // Placeholder for vector quantization
316                // In real implementation, use PQ or similar
317                Ok(data.to_vec())
318            }
319        }
320    }
321
322    /// Calculate checksum for data
323    fn calculate_data_checksum(&self, data: &[u8]) -> u32 {
324        // Simple checksum - in production use CRC32 or similar
325        data.iter().fold(0u32, |acc, &b| acc.wrapping_add(b as u32))
326    }
327
328    /// Write block to file
329    fn write_block(&mut self, block: &VectorBlock) -> Result<()> {
330        // Write block header
331        self.writer.write_all(&block.block_id.to_le_bytes())?;
332        self.writer.write_all(&block.vector_count.to_le_bytes())?;
333        self.writer.write_all(&block.original_size.to_le_bytes())?;
334        self.writer.write_all(&block.checksum.to_le_bytes())?;
335        self.writer
336            .write_all(&(block.data.len() as u32).to_le_bytes())?;
337
338        // Write block data
339        self.writer.write_all(&block.data)?;
340
341        Ok(())
342    }
343
344    /// Finalize and close the file
345    pub fn finalize(mut self) -> Result<()> {
346        // Flush any remaining vectors
347        self.flush_block()?;
348
349        // Update header with final counts
350        self.header.vector_count = self.total_vectors;
351
352        // The data offset is fixed at the position after the placeholder header
353        // We need to calculate this the same way it was calculated in new()
354        // First, serialize the header with correct vector_count to get actual size
355        let mut temp_header = self.header.clone();
356        temp_header.calculate_checksum();
357
358        let temp_header_bytes = oxicode::serde::encode_to_vec(&temp_header, bincode_config())
359            .map_err(|e| anyhow!("Failed to serialize header for size calculation: {}", e))?;
360
361        // Calculate actual data_offset based on real header size
362        self.header.data_offset = 4 + temp_header_bytes.len() as u64;
363        self.header.calculate_checksum();
364
365        // Flush before seeking to ensure all data is written
366        self.writer.flush()?;
367
368        // Seek to beginning and write header
369        self.writer.get_mut().seek(SeekFrom::Start(0))?;
370
371        let header_bytes = oxicode::serde::encode_to_vec(&self.header, bincode_config())
372            .map_err(|e| anyhow!("Failed to serialize header: {}", e))?;
373
374        // Write header size first, then header data
375        let header_size = header_bytes.len() as u32;
376        self.writer.write_all(&header_size.to_le_bytes())?;
377        self.writer.write_all(&header_bytes)?;
378
379        // Flush to ensure data is written to file
380        self.writer.flush()?;
381
382        // Explicitly drop the writer to close the file
383        drop(self.writer);
384
385        Ok(())
386    }
387}
388
389/// Streaming vector reader for large datasets
390pub struct VectorReader {
391    reader: BufReader<File>,
392    header: VectorFileHeader,
393    current_position: u64,
394    vectors_read: u64,
395}
396
397impl VectorReader {
398    /// Open a vector file for reading
399    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
400        let file = File::open(path)?;
401        let mut reader = BufReader::new(file);
402
403        // Read and verify header
404        let header = Self::read_header(&mut reader)?;
405        let data_offset = header.data_offset;
406
407        // Seek to data offset to position for reading vectors
408        reader.get_mut().seek(SeekFrom::Start(data_offset))?;
409
410        Ok(Self {
411            reader,
412            header,
413            current_position: data_offset,
414            vectors_read: 0,
415        })
416    }
417
418    /// Read file header
419    fn read_header(reader: &mut BufReader<File>) -> Result<VectorFileHeader> {
420        // Read header size first
421        let mut size_bytes = [0u8; 4];
422        reader.read_exact(&mut size_bytes)?;
423        let header_size = u32::from_le_bytes(size_bytes) as usize;
424
425        // Read header data of exact size
426        let mut header_data = vec![0u8; header_size];
427        reader.read_exact(&mut header_data)?;
428
429        let (header, _): (VectorFileHeader, _) =
430            oxicode::serde::decode_from_slice(&header_data, bincode_config())
431                .map_err(|e| anyhow!("Failed to deserialize header: {}", e))?;
432
433        // Verify magic number
434        if &header.magic != b"OXIRSVEC" {
435            return Err(anyhow!("Invalid file format: magic number mismatch"));
436        }
437
438        // Verify checksum
439        if !header.verify_checksum() {
440            return Err(anyhow!("Header checksum verification failed"));
441        }
442
443        Ok(header)
444    }
445
446    /// Get file metadata
447    pub fn metadata(&self) -> &VectorFileHeader {
448        &self.header
449    }
450
451    /// Read next vector from file
452    pub fn read_vector(&mut self) -> Result<Option<Vector>> {
453        if self.vectors_read >= self.header.vector_count {
454            return Ok(None);
455        }
456
457        // For simplicity, reading one vector at a time
458        // In production, implement block-wise reading for efficiency
459        let mut vector_data = vec![0f32; self.header.dimensions];
460
461        for vector_item in vector_data.iter_mut().take(self.header.dimensions) {
462            let mut bytes = [0u8; 4];
463            self.reader.read_exact(&mut bytes)?;
464            *vector_item = f32::from_le_bytes(bytes);
465        }
466
467        self.vectors_read += 1;
468        self.current_position += (self.header.dimensions * 4) as u64;
469        Ok(Some(Vector::new(vector_data)))
470    }
471
472    /// Read multiple vectors
473    pub fn read_vectors(&mut self, count: usize) -> Result<Vec<Vector>> {
474        let mut vectors = Vec::with_capacity(count);
475
476        for _ in 0..count {
477            if let Some(vector) = self.read_vector()? {
478                vectors.push(vector);
479            } else {
480                break;
481            }
482        }
483
484        Ok(vectors)
485    }
486
487    /// Read all remaining vectors
488    pub fn read_all(&mut self) -> Result<Vec<Vector>> {
489        let remaining = (self.header.vector_count - self.vectors_read) as usize;
490        self.read_vectors(remaining)
491    }
492
493    /// Skip to specific vector index
494    pub fn seek_to_vector(&mut self, index: u64) -> Result<()> {
495        if index >= self.header.vector_count {
496            return Err(anyhow!("Vector index {} out of bounds", index));
497        }
498
499        let byte_offset = self.header.data_offset + (index * self.header.dimensions as u64 * 4);
500        self.reader.get_mut().seek(SeekFrom::Start(byte_offset))?;
501        self.vectors_read = index;
502
503        Ok(())
504    }
505}
506
507/// Memory-mapped vector file for efficient random access
508pub struct MmapVectorFile {
509    _file: File,
510    mmap: memmap2::Mmap,
511    header: VectorFileHeader,
512}
513
514impl MmapVectorFile {
515    /// Open file with memory mapping
516    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
517        let file = File::open(path)?;
518        let mmap = unsafe { memmap2::Mmap::map(&file)? };
519
520        // Read header size (first 4 bytes) and then header from memory map
521        if mmap.len() < 4 {
522            return Err(anyhow!("File too small to contain header"));
523        }
524        let header_size = u32::from_le_bytes([mmap[0], mmap[1], mmap[2], mmap[3]]) as usize;
525        if mmap.len() < 4 + header_size {
526            return Err(anyhow!("File too small to contain full header"));
527        }
528        let header_bytes = &mmap[4..4 + header_size];
529        let (header, _): (VectorFileHeader, _) =
530            oxicode::serde::decode_from_slice(header_bytes, bincode_config())
531                .map_err(|e| anyhow!("Failed to deserialize header: {}", e))?;
532
533        // Verify header
534        if &header.magic != b"OXIRSVEC" {
535            return Err(anyhow!("Invalid file format"));
536        }
537
538        if !header.verify_checksum() {
539            return Err(anyhow!("Header checksum verification failed"));
540        }
541
542        Ok(Self {
543            _file: file,
544            mmap,
545            header,
546        })
547    }
548
549    /// Get vector by index (zero-copy)
550    pub fn get_vector(&self, index: u64) -> Result<Vector> {
551        if index >= self.header.vector_count {
552            return Err(anyhow!("Vector index out of bounds"));
553        }
554
555        let offset =
556            self.header.data_offset as usize + (index as usize * self.header.dimensions * 4);
557        let end_offset = offset + (self.header.dimensions * 4);
558
559        if end_offset > self.mmap.len() {
560            return Err(anyhow!("Vector data extends beyond file"));
561        }
562
563        let vector_bytes = &self.mmap[offset..end_offset];
564        let mut vector_data = vec![0f32; self.header.dimensions];
565
566        for (i, chunk) in vector_bytes.chunks_exact(4).enumerate() {
567            vector_data[i] = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
568        }
569
570        Ok(Vector::new(vector_data))
571    }
572
573    /// Get slice of vectors
574    pub fn get_vectors(&self, start: u64, count: usize) -> Result<Vec<Vector>> {
575        let mut vectors = Vec::with_capacity(count);
576
577        for i in 0..count {
578            let index = start + i as u64;
579            if index >= self.header.vector_count {
580                break;
581            }
582            vectors.push(self.get_vector(index)?);
583        }
584
585        Ok(vectors)
586    }
587
588    /// Get total vector count
589    pub fn vector_count(&self) -> u64 {
590        self.header.vector_count
591    }
592
593    /// Get vector dimensions
594    pub fn dimensions(&self) -> usize {
595        self.header.dimensions
596    }
597}
598
599/// Utility functions for storage operations
600pub struct StorageUtils;
601
602impl StorageUtils {
603    /// Convert vectors to binary format
604    pub fn vectors_to_binary(vectors: &[Vector]) -> Result<Vec<u8>> {
605        let mut data = Vec::new();
606
607        for vector in vectors {
608            let vector_f32 = vector.as_f32();
609            for value in vector_f32 {
610                data.extend_from_slice(&value.to_le_bytes());
611            }
612        }
613
614        Ok(data)
615    }
616
617    /// Convert binary data to vectors
618    pub fn binary_to_vectors(data: &[u8], dimensions: usize) -> Result<Vec<Vector>> {
619        if data.len() % (dimensions * 4) != 0 {
620            return Err(anyhow!("Invalid binary data length for given dimensions"));
621        }
622
623        let vector_count = data.len() / (dimensions * 4);
624        let mut vectors = Vec::with_capacity(vector_count);
625
626        for i in 0..vector_count {
627            let start = i * dimensions * 4;
628            let end = start + dimensions * 4;
629            let vector_bytes = &data[start..end];
630
631            let mut vector_data = vec![0f32; dimensions];
632            for (j, chunk) in vector_bytes.chunks_exact(4).enumerate() {
633                vector_data[j] = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
634            }
635
636            vectors.push(Vector::new(vector_data));
637        }
638
639        Ok(vectors)
640    }
641
642    /// Estimate storage size for vectors
643    pub fn estimate_storage_size(
644        vector_count: usize,
645        dimensions: usize,
646        compression: CompressionType,
647    ) -> usize {
648        let raw_size = vector_count * dimensions * 4; // 4 bytes per f32
649        let header_size = std::mem::size_of::<VectorFileHeader>();
650
651        let compressed_size = match compression {
652            CompressionType::None => raw_size,
653            CompressionType::Lz4 => (raw_size as f64 * 0.6) as usize, // ~40% compression
654            CompressionType::Zstd => (raw_size as f64 * 0.5) as usize, // ~50% compression
655            CompressionType::Brotli => (raw_size as f64 * 0.4) as usize, // ~60% compression
656            CompressionType::Gzip => (raw_size as f64 * 0.5) as usize, // ~50% compression
657            CompressionType::VectorQuantization => (raw_size as f64 * 0.25) as usize, // ~75% compression
658        };
659
660        header_size + compressed_size
661    }
662
663    /// Benchmark compression algorithms
664    pub fn benchmark_compression(vectors: &[Vector]) -> Result<Vec<(CompressionType, f64, usize)>> {
665        let binary_data = Self::vectors_to_binary(vectors)?;
666        let original_size = binary_data.len();
667
668        let algorithms = [
669            CompressionType::None,
670            CompressionType::Lz4,
671            CompressionType::Zstd,
672            CompressionType::Brotli,
673            CompressionType::Gzip,
674        ];
675
676        let mut results = Vec::new();
677
678        for &algorithm in &algorithms {
679            let start_time = std::time::Instant::now();
680
681            // Simulate compression (in real implementation, use actual compression)
682            let compressed_size = match algorithm {
683                CompressionType::None => original_size,
684                CompressionType::Lz4 => (original_size as f64 * 0.6) as usize,
685                CompressionType::Zstd => (original_size as f64 * 0.5) as usize,
686                CompressionType::Brotli => (original_size as f64 * 0.4) as usize,
687                CompressionType::Gzip => (original_size as f64 * 0.5) as usize,
688                CompressionType::VectorQuantization => (original_size as f64 * 0.25) as usize,
689            };
690
691            let duration = start_time.elapsed().as_secs_f64();
692            results.push((algorithm, duration, compressed_size));
693        }
694
695        Ok(results)
696    }
697}
698
699#[cfg(test)]
700mod tests {
701    use super::*;
702    use tempfile::NamedTempFile;
703
704    #[test]
705    fn test_vector_file_header() {
706        let mut header = VectorFileHeader {
707            vector_count: 1000,
708            dimensions: 128,
709            ..Default::default()
710        };
711        header.calculate_checksum();
712
713        assert!(header.verify_checksum());
714
715        // Modify header and verify checksum fails
716        header.vector_count = 2000;
717        assert!(!header.verify_checksum());
718    }
719
720    #[test]
721    fn test_storage_utils() {
722        let vectors = vec![
723            Vector::new(vec![1.0, 2.0, 3.0]),
724            Vector::new(vec![4.0, 5.0, 6.0]),
725        ];
726
727        let binary_data = StorageUtils::vectors_to_binary(&vectors).unwrap();
728        let restored_vectors = StorageUtils::binary_to_vectors(&binary_data, 3).unwrap();
729
730        assert_eq!(vectors.len(), restored_vectors.len());
731        for (original, restored) in vectors.iter().zip(restored_vectors.iter()) {
732            assert_eq!(original.as_f32(), restored.as_f32());
733        }
734    }
735
736    #[test]
737    fn test_vector_writer_reader() -> Result<()> {
738        let temp_file = NamedTempFile::new()?;
739        let file_path = temp_file.path();
740
741        // Write vectors
742        {
743            let config = StorageConfig {
744                compression: CompressionType::None,
745                ..Default::default()
746            };
747            let mut writer = VectorWriter::new(file_path, config)?;
748
749            let vectors = vec![
750                Vector::new(vec![1.0, 2.0, 3.0, 4.0]),
751                Vector::new(vec![5.0, 6.0, 7.0, 8.0]),
752                Vector::new(vec![9.0, 10.0, 11.0, 12.0]),
753            ];
754
755            writer.write_vectors(&vectors)?;
756            writer.finalize()?;
757        }
758
759        // Read vectors back
760        {
761            let mut reader = VectorReader::open(file_path)?;
762            let metadata = reader.metadata();
763
764            assert_eq!(metadata.vector_count, 3);
765            assert_eq!(metadata.dimensions, 4);
766
767            let vectors = reader.read_all()?;
768            assert_eq!(vectors.len(), 3);
769
770            assert_eq!(vectors[0].as_f32(), &[1.0, 2.0, 3.0, 4.0]);
771            assert_eq!(vectors[1].as_f32(), &[5.0, 6.0, 7.0, 8.0]);
772            assert_eq!(vectors[2].as_f32(), &[9.0, 10.0, 11.0, 12.0]);
773        }
774
775        Ok(())
776    }
777
778    #[test]
779    fn test_compression_benchmark() {
780        let vectors = vec![
781            Vector::new(vec![1.0; 128]),
782            Vector::new(vec![2.0; 128]),
783            Vector::new(vec![3.0; 128]),
784        ];
785
786        let results = StorageUtils::benchmark_compression(&vectors).unwrap();
787        assert_eq!(results.len(), 5); // 5 compression algorithms
788
789        // Verify that some compression types reduce size
790        let none_size = results
791            .iter()
792            .find(|(t, _, _)| *t == CompressionType::None)
793            .unwrap()
794            .2;
795        let zstd_size = results
796            .iter()
797            .find(|(t, _, _)| *t == CompressionType::Zstd)
798            .unwrap()
799            .2;
800
801        assert!(zstd_size < none_size);
802    }
803}