oxirs_vec/
storage_optimizations.rs

1//! Storage optimizations for vector indices
2//!
3//! This module provides optimized storage formats for efficient vector serialization:
4//! - Binary formats for fast I/O
5//! - Compression with multiple algorithms
6//! - Streaming I/O for large datasets
7//! - Memory-mapped file support
8
9use crate::Vector;
10use anyhow::{anyhow, Result};
11use serde::{Deserialize, Serialize};
12use std::fs::File;
13use std::io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write};
14use std::path::Path;
15
16/// Compression methods for vector storage
17#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
18pub enum CompressionType {
19    /// No compression
20    None,
21    /// LZ4 fast compression
22    Lz4,
23    /// Zstandard balanced compression
24    Zstd,
25    /// Brotli high compression
26    Brotli,
27    /// Gzip standard compression
28    Gzip,
29    /// Custom vector quantization compression
30    VectorQuantization,
31}
32
33/// Binary storage format configuration
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct StorageConfig {
36    /// Compression type to use
37    pub compression: CompressionType,
38    /// Compression level (0-9, algorithm dependent)
39    pub compression_level: u8,
40    /// Buffer size for streaming operations
41    pub buffer_size: usize,
42    /// Enable memory mapping for large files
43    pub enable_mmap: bool,
44    /// Block size for chunked reading/writing
45    pub block_size: usize,
46    /// Enable checksums for data integrity
47    pub enable_checksums: bool,
48    /// Metadata format version
49    pub format_version: u32,
50}
51
52impl Default for StorageConfig {
53    fn default() -> Self {
54        Self {
55            compression: CompressionType::Zstd,
56            compression_level: 3,
57            buffer_size: 1024 * 1024, // 1MB
58            enable_mmap: true,
59            block_size: 64 * 1024, // 64KB
60            enable_checksums: true,
61            format_version: 1,
62        }
63    }
64}
65
66/// Binary file header for vector storage
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct VectorFileHeader {
69    /// Magic number for file format identification
70    pub magic: [u8; 8],
71    /// Format version
72    pub version: u32,
73    /// Number of vectors in file
74    pub vector_count: u64,
75    /// Vector dimensions
76    pub dimensions: usize,
77    /// Compression type used
78    pub compression: CompressionType,
79    /// Compression level
80    pub compression_level: u8,
81    /// Block size for chunked data
82    pub block_size: usize,
83    /// Checksum of header
84    pub header_checksum: u32,
85    /// Offset to vector data
86    pub data_offset: u64,
87    /// Total data size (compressed)
88    pub data_size: u64,
89    /// Original data size (uncompressed)
90    pub original_size: u64,
91    /// Reserved bytes for future use
92    pub reserved: [u8; 32],
93}
94
95impl Default for VectorFileHeader {
96    fn default() -> Self {
97        Self {
98            magic: *b"OXIRSVEC",
99            version: 1,
100            vector_count: 0,
101            dimensions: 0,
102            compression: CompressionType::None,
103            compression_level: 0,
104            block_size: 64 * 1024,
105            header_checksum: 0,
106            data_offset: 0,
107            data_size: 0,
108            original_size: 0,
109            reserved: [0; 32],
110        }
111    }
112}
113
114impl VectorFileHeader {
115    /// Calculate and set header checksum
116    pub fn calculate_checksum(&mut self) {
117        // Simple CRC32-like checksum
118        let mut checksum = 0u32;
119        checksum ^=
120            u32::from_le_bytes([self.magic[0], self.magic[1], self.magic[2], self.magic[3]]);
121        checksum ^=
122            u32::from_le_bytes([self.magic[4], self.magic[5], self.magic[6], self.magic[7]]);
123        checksum ^= self.version;
124        checksum ^= self.vector_count as u32;
125        checksum ^= self.dimensions as u32;
126        checksum ^= self.compression as u8 as u32;
127        checksum ^= self.compression_level as u32;
128        self.header_checksum = checksum;
129    }
130
131    /// Verify header checksum
132    pub fn verify_checksum(&self) -> bool {
133        let mut temp_header = self.clone();
134        temp_header.header_checksum = 0;
135        temp_header.calculate_checksum();
136        temp_header.header_checksum == self.header_checksum
137    }
138}
139
140/// Vector block for chunked storage
141#[derive(Debug, Clone)]
142pub struct VectorBlock {
143    /// Block index
144    pub block_id: u32,
145    /// Number of vectors in this block
146    pub vector_count: u32,
147    /// Compressed data
148    pub data: Vec<u8>,
149    /// Original size before compression
150    pub original_size: u32,
151    /// Block checksum
152    pub checksum: u32,
153}
154
155/// Streaming vector writer for large datasets
156pub struct VectorWriter {
157    writer: BufWriter<File>,
158    config: StorageConfig,
159    header: VectorFileHeader,
160    current_block: Vec<Vector>,
161    blocks_written: u32,
162    total_vectors: u64,
163}
164
165impl VectorWriter {
166    /// Create a new vector writer
167    pub fn new<P: AsRef<Path>>(path: P, config: StorageConfig) -> Result<Self> {
168        let file = File::create(path)?;
169        let mut writer = BufWriter::new(file);
170
171        let header = VectorFileHeader {
172            compression: config.compression,
173            compression_level: config.compression_level,
174            block_size: config.block_size,
175            ..Default::default()
176        };
177
178        // Write a placeholder header first to reserve space
179        let placeholder_header_bytes = bincode::serialize(&header)?;
180        let _header_size = (4 + placeholder_header_bytes.len()) as u64;
181
182        // Write placeholder header size and header
183        writer.write_all(&(placeholder_header_bytes.len() as u32).to_le_bytes())?;
184        writer.write_all(&placeholder_header_bytes)?;
185        writer.flush()?;
186
187        Ok(Self {
188            writer,
189            config,
190            header,
191            current_block: Vec::new(),
192            blocks_written: 0,
193            total_vectors: 0,
194        })
195    }
196
197    /// Write a vector to the file
198    pub fn write_vector(&mut self, vector: Vector) -> Result<()> {
199        // Set dimensions from first vector
200        if self.header.dimensions == 0 {
201            self.header.dimensions = vector.dimensions;
202        } else if self.header.dimensions != vector.dimensions {
203            return Err(anyhow!(
204                "Vector dimension mismatch: expected {}, got {}",
205                self.header.dimensions,
206                vector.dimensions
207            ));
208        }
209
210        self.current_block.push(vector);
211        self.total_vectors += 1;
212
213        // Check if we need to flush the current block
214        let block_size_estimate = self.current_block.len() * self.header.dimensions * 4; // 4 bytes per f32
215        if block_size_estimate >= self.config.block_size {
216            self.flush_block()?;
217        }
218
219        Ok(())
220    }
221
222    /// Write multiple vectors
223    pub fn write_vectors(&mut self, vectors: &[Vector]) -> Result<()> {
224        for vector in vectors {
225            self.write_vector(vector.clone())?;
226        }
227        Ok(())
228    }
229
230    /// Flush current block to disk
231    fn flush_block(&mut self) -> Result<()> {
232        if self.current_block.is_empty() {
233            return Ok(());
234        }
235
236        // For uncompressed mode, write raw vector data
237        if self.config.compression == CompressionType::None {
238            for vector in &self.current_block {
239                let vector_bytes = vector.as_f32();
240                for value in vector_bytes {
241                    self.writer.write_all(&value.to_le_bytes())?;
242                }
243            }
244            self.current_block.clear();
245            return Ok(());
246        }
247
248        // Serialize vectors to binary
249        let mut block_data = Vec::new();
250        for vector in &self.current_block {
251            let vector_bytes = vector.as_f32();
252            for value in vector_bytes {
253                block_data.extend_from_slice(&value.to_le_bytes());
254            }
255        }
256
257        // Compress block data
258        let compressed_data = self.compress_data(&block_data)?;
259
260        // Create block header
261        let block = VectorBlock {
262            block_id: self.blocks_written,
263            vector_count: self.current_block.len() as u32,
264            original_size: block_data.len() as u32,
265            checksum: self.calculate_data_checksum(&compressed_data),
266            data: compressed_data,
267        };
268
269        // Write block to file
270        self.write_block(&block)?;
271
272        self.current_block.clear();
273        self.blocks_written += 1;
274
275        Ok(())
276    }
277
278    /// Compress data using configured algorithm
279    fn compress_data(&self, data: &[u8]) -> Result<Vec<u8>> {
280        match self.config.compression {
281            CompressionType::None => Ok(data.to_vec()),
282            CompressionType::Lz4 => {
283                // Placeholder for LZ4 compression
284                // In real implementation, use lz4_flex or similar crate
285                Ok(data.to_vec())
286            }
287            CompressionType::Zstd => {
288                // Placeholder for Zstandard compression
289                // In real implementation, use zstd crate
290                Ok(data.to_vec())
291            }
292            CompressionType::Brotli => {
293                // Placeholder for Brotli compression
294                // In real implementation, use brotli crate
295                Ok(data.to_vec())
296            }
297            CompressionType::Gzip => {
298                // Placeholder for Gzip compression
299                // In real implementation, use flate2 crate
300                Ok(data.to_vec())
301            }
302            CompressionType::VectorQuantization => {
303                // Placeholder for vector quantization
304                // In real implementation, use PQ or similar
305                Ok(data.to_vec())
306            }
307        }
308    }
309
310    /// Calculate checksum for data
311    fn calculate_data_checksum(&self, data: &[u8]) -> u32 {
312        // Simple checksum - in production use CRC32 or similar
313        data.iter().fold(0u32, |acc, &b| acc.wrapping_add(b as u32))
314    }
315
316    /// Write block to file
317    fn write_block(&mut self, block: &VectorBlock) -> Result<()> {
318        // Write block header
319        self.writer.write_all(&block.block_id.to_le_bytes())?;
320        self.writer.write_all(&block.vector_count.to_le_bytes())?;
321        self.writer.write_all(&block.original_size.to_le_bytes())?;
322        self.writer.write_all(&block.checksum.to_le_bytes())?;
323        self.writer
324            .write_all(&(block.data.len() as u32).to_le_bytes())?;
325
326        // Write block data
327        self.writer.write_all(&block.data)?;
328
329        Ok(())
330    }
331
332    /// Finalize and close the file
333    pub fn finalize(mut self) -> Result<()> {
334        // Flush any remaining vectors
335        self.flush_block()?;
336
337        // Update header with final counts
338        self.header.vector_count = self.total_vectors;
339
340        // The data offset is fixed at the position after the placeholder header
341        // We need to calculate this the same way it was calculated in new()
342        let placeholder_header = VectorFileHeader {
343            compression: self.config.compression,
344            compression_level: self.config.compression_level,
345            block_size: self.config.block_size,
346            ..Default::default()
347        };
348        let placeholder_header_bytes = bincode::serialize(&placeholder_header)?;
349        self.header.data_offset = 4 + placeholder_header_bytes.len() as u64;
350        self.header.calculate_checksum();
351
352        // Flush before seeking to ensure all data is written
353        self.writer.flush()?;
354
355        // Seek to beginning and write header
356        self.writer.get_mut().seek(SeekFrom::Start(0))?;
357
358        let header_bytes = bincode::serialize(&self.header)?;
359
360        // Write header size first, then header data
361        let header_size = header_bytes.len() as u32;
362        self.writer.write_all(&header_size.to_le_bytes())?;
363        self.writer.write_all(&header_bytes)?;
364
365        // Flush to ensure data is written to file
366        self.writer.flush()?;
367
368        // Explicitly drop the writer to close the file
369        drop(self.writer);
370
371        Ok(())
372    }
373}
374
375/// Streaming vector reader for large datasets
376pub struct VectorReader {
377    reader: BufReader<File>,
378    header: VectorFileHeader,
379    current_position: u64,
380    vectors_read: u64,
381}
382
383impl VectorReader {
384    /// Open a vector file for reading
385    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
386        let file = File::open(path)?;
387        let mut reader = BufReader::new(file);
388
389        // Read and verify header
390        let header = Self::read_header(&mut reader)?;
391        let data_offset = header.data_offset;
392
393        // Seek to data offset to position for reading vectors
394        reader.get_mut().seek(SeekFrom::Start(data_offset))?;
395
396        Ok(Self {
397            reader,
398            header,
399            current_position: data_offset,
400            vectors_read: 0,
401        })
402    }
403
404    /// Read file header
405    fn read_header(reader: &mut BufReader<File>) -> Result<VectorFileHeader> {
406        // Read header size first
407        let mut size_bytes = [0u8; 4];
408        reader.read_exact(&mut size_bytes)?;
409        let header_size = u32::from_le_bytes(size_bytes) as usize;
410
411        // Read header data of exact size
412        let mut header_data = vec![0u8; header_size];
413        reader.read_exact(&mut header_data)?;
414
415        let header: VectorFileHeader = bincode::deserialize(&header_data)?;
416
417        // Verify magic number
418        if &header.magic != b"OXIRSVEC" {
419            return Err(anyhow!("Invalid file format: magic number mismatch"));
420        }
421
422        // Verify checksum
423        if !header.verify_checksum() {
424            return Err(anyhow!("Header checksum verification failed"));
425        }
426
427        Ok(header)
428    }
429
430    /// Get file metadata
431    pub fn metadata(&self) -> &VectorFileHeader {
432        &self.header
433    }
434
435    /// Read next vector from file
436    pub fn read_vector(&mut self) -> Result<Option<Vector>> {
437        if self.vectors_read >= self.header.vector_count {
438            return Ok(None);
439        }
440
441        // For simplicity, reading one vector at a time
442        // In production, implement block-wise reading for efficiency
443        let mut vector_data = vec![0f32; self.header.dimensions];
444
445        for vector_item in vector_data.iter_mut().take(self.header.dimensions) {
446            let mut bytes = [0u8; 4];
447            self.reader.read_exact(&mut bytes)?;
448            *vector_item = f32::from_le_bytes(bytes);
449        }
450
451        self.vectors_read += 1;
452        self.current_position += (self.header.dimensions * 4) as u64;
453        Ok(Some(Vector::new(vector_data)))
454    }
455
456    /// Read multiple vectors
457    pub fn read_vectors(&mut self, count: usize) -> Result<Vec<Vector>> {
458        let mut vectors = Vec::with_capacity(count);
459
460        for _ in 0..count {
461            if let Some(vector) = self.read_vector()? {
462                vectors.push(vector);
463            } else {
464                break;
465            }
466        }
467
468        Ok(vectors)
469    }
470
471    /// Read all remaining vectors
472    pub fn read_all(&mut self) -> Result<Vec<Vector>> {
473        let remaining = (self.header.vector_count - self.vectors_read) as usize;
474        self.read_vectors(remaining)
475    }
476
477    /// Skip to specific vector index
478    pub fn seek_to_vector(&mut self, index: u64) -> Result<()> {
479        if index >= self.header.vector_count {
480            return Err(anyhow!("Vector index {} out of bounds", index));
481        }
482
483        let byte_offset = self.header.data_offset + (index * self.header.dimensions as u64 * 4);
484        self.reader.get_mut().seek(SeekFrom::Start(byte_offset))?;
485        self.vectors_read = index;
486
487        Ok(())
488    }
489}
490
491/// Memory-mapped vector file for efficient random access
492pub struct MmapVectorFile {
493    _file: File,
494    mmap: memmap2::Mmap,
495    header: VectorFileHeader,
496}
497
498impl MmapVectorFile {
499    /// Open file with memory mapping
500    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
501        let file = File::open(path)?;
502        let mmap = unsafe { memmap2::Mmap::map(&file)? };
503
504        // Read header from memory map
505        let header_bytes = &mmap[0..std::mem::size_of::<VectorFileHeader>()];
506        let header: VectorFileHeader = bincode::deserialize(header_bytes)?;
507
508        // Verify header
509        if &header.magic != b"OXIRSVEC" {
510            return Err(anyhow!("Invalid file format"));
511        }
512
513        if !header.verify_checksum() {
514            return Err(anyhow!("Header checksum verification failed"));
515        }
516
517        Ok(Self {
518            _file: file,
519            mmap,
520            header,
521        })
522    }
523
524    /// Get vector by index (zero-copy)
525    pub fn get_vector(&self, index: u64) -> Result<Vector> {
526        if index >= self.header.vector_count {
527            return Err(anyhow!("Vector index out of bounds"));
528        }
529
530        let offset =
531            self.header.data_offset as usize + (index as usize * self.header.dimensions * 4);
532        let end_offset = offset + (self.header.dimensions * 4);
533
534        if end_offset > self.mmap.len() {
535            return Err(anyhow!("Vector data extends beyond file"));
536        }
537
538        let vector_bytes = &self.mmap[offset..end_offset];
539        let mut vector_data = vec![0f32; self.header.dimensions];
540
541        for (i, chunk) in vector_bytes.chunks_exact(4).enumerate() {
542            vector_data[i] = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
543        }
544
545        Ok(Vector::new(vector_data))
546    }
547
548    /// Get slice of vectors
549    pub fn get_vectors(&self, start: u64, count: usize) -> Result<Vec<Vector>> {
550        let mut vectors = Vec::with_capacity(count);
551
552        for i in 0..count {
553            let index = start + i as u64;
554            if index >= self.header.vector_count {
555                break;
556            }
557            vectors.push(self.get_vector(index)?);
558        }
559
560        Ok(vectors)
561    }
562
563    /// Get total vector count
564    pub fn vector_count(&self) -> u64 {
565        self.header.vector_count
566    }
567
568    /// Get vector dimensions
569    pub fn dimensions(&self) -> usize {
570        self.header.dimensions
571    }
572}
573
574/// Utility functions for storage operations
575pub struct StorageUtils;
576
577impl StorageUtils {
578    /// Convert vectors to binary format
579    pub fn vectors_to_binary(vectors: &[Vector]) -> Result<Vec<u8>> {
580        let mut data = Vec::new();
581
582        for vector in vectors {
583            let vector_f32 = vector.as_f32();
584            for value in vector_f32 {
585                data.extend_from_slice(&value.to_le_bytes());
586            }
587        }
588
589        Ok(data)
590    }
591
592    /// Convert binary data to vectors
593    pub fn binary_to_vectors(data: &[u8], dimensions: usize) -> Result<Vec<Vector>> {
594        if data.len() % (dimensions * 4) != 0 {
595            return Err(anyhow!("Invalid binary data length for given dimensions"));
596        }
597
598        let vector_count = data.len() / (dimensions * 4);
599        let mut vectors = Vec::with_capacity(vector_count);
600
601        for i in 0..vector_count {
602            let start = i * dimensions * 4;
603            let end = start + dimensions * 4;
604            let vector_bytes = &data[start..end];
605
606            let mut vector_data = vec![0f32; dimensions];
607            for (j, chunk) in vector_bytes.chunks_exact(4).enumerate() {
608                vector_data[j] = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
609            }
610
611            vectors.push(Vector::new(vector_data));
612        }
613
614        Ok(vectors)
615    }
616
617    /// Estimate storage size for vectors
618    pub fn estimate_storage_size(
619        vector_count: usize,
620        dimensions: usize,
621        compression: CompressionType,
622    ) -> usize {
623        let raw_size = vector_count * dimensions * 4; // 4 bytes per f32
624        let header_size = std::mem::size_of::<VectorFileHeader>();
625
626        let compressed_size = match compression {
627            CompressionType::None => raw_size,
628            CompressionType::Lz4 => (raw_size as f64 * 0.6) as usize, // ~40% compression
629            CompressionType::Zstd => (raw_size as f64 * 0.5) as usize, // ~50% compression
630            CompressionType::Brotli => (raw_size as f64 * 0.4) as usize, // ~60% compression
631            CompressionType::Gzip => (raw_size as f64 * 0.5) as usize, // ~50% compression
632            CompressionType::VectorQuantization => (raw_size as f64 * 0.25) as usize, // ~75% compression
633        };
634
635        header_size + compressed_size
636    }
637
638    /// Benchmark compression algorithms
639    pub fn benchmark_compression(vectors: &[Vector]) -> Result<Vec<(CompressionType, f64, usize)>> {
640        let binary_data = Self::vectors_to_binary(vectors)?;
641        let original_size = binary_data.len();
642
643        let algorithms = [
644            CompressionType::None,
645            CompressionType::Lz4,
646            CompressionType::Zstd,
647            CompressionType::Brotli,
648            CompressionType::Gzip,
649        ];
650
651        let mut results = Vec::new();
652
653        for &algorithm in &algorithms {
654            let start_time = std::time::Instant::now();
655
656            // Simulate compression (in real implementation, use actual compression)
657            let compressed_size = match algorithm {
658                CompressionType::None => original_size,
659                CompressionType::Lz4 => (original_size as f64 * 0.6) as usize,
660                CompressionType::Zstd => (original_size as f64 * 0.5) as usize,
661                CompressionType::Brotli => (original_size as f64 * 0.4) as usize,
662                CompressionType::Gzip => (original_size as f64 * 0.5) as usize,
663                CompressionType::VectorQuantization => (original_size as f64 * 0.25) as usize,
664            };
665
666            let duration = start_time.elapsed().as_secs_f64();
667            results.push((algorithm, duration, compressed_size));
668        }
669
670        Ok(results)
671    }
672}
673
674#[cfg(test)]
675mod tests {
676    use super::*;
677    use tempfile::NamedTempFile;
678
679    #[test]
680    fn test_vector_file_header() {
681        let mut header = VectorFileHeader {
682            vector_count: 1000,
683            dimensions: 128,
684            ..Default::default()
685        };
686        header.calculate_checksum();
687
688        assert!(header.verify_checksum());
689
690        // Modify header and verify checksum fails
691        header.vector_count = 2000;
692        assert!(!header.verify_checksum());
693    }
694
695    #[test]
696    fn test_storage_utils() {
697        let vectors = vec![
698            Vector::new(vec![1.0, 2.0, 3.0]),
699            Vector::new(vec![4.0, 5.0, 6.0]),
700        ];
701
702        let binary_data = StorageUtils::vectors_to_binary(&vectors).unwrap();
703        let restored_vectors = StorageUtils::binary_to_vectors(&binary_data, 3).unwrap();
704
705        assert_eq!(vectors.len(), restored_vectors.len());
706        for (original, restored) in vectors.iter().zip(restored_vectors.iter()) {
707            assert_eq!(original.as_f32(), restored.as_f32());
708        }
709    }
710
711    #[test]
712    fn test_vector_writer_reader() -> Result<()> {
713        let temp_file = NamedTempFile::new()?;
714        let file_path = temp_file.path();
715
716        // Write vectors
717        {
718            let config = StorageConfig {
719                compression: CompressionType::None,
720                ..Default::default()
721            };
722            let mut writer = VectorWriter::new(file_path, config)?;
723
724            let vectors = vec![
725                Vector::new(vec![1.0, 2.0, 3.0, 4.0]),
726                Vector::new(vec![5.0, 6.0, 7.0, 8.0]),
727                Vector::new(vec![9.0, 10.0, 11.0, 12.0]),
728            ];
729
730            writer.write_vectors(&vectors)?;
731            writer.finalize()?;
732        }
733
734        // Read vectors back
735        {
736            let mut reader = VectorReader::open(file_path)?;
737            let metadata = reader.metadata();
738
739            assert_eq!(metadata.vector_count, 3);
740            assert_eq!(metadata.dimensions, 4);
741
742            let vectors = reader.read_all()?;
743            assert_eq!(vectors.len(), 3);
744
745            assert_eq!(vectors[0].as_f32(), &[1.0, 2.0, 3.0, 4.0]);
746            assert_eq!(vectors[1].as_f32(), &[5.0, 6.0, 7.0, 8.0]);
747            assert_eq!(vectors[2].as_f32(), &[9.0, 10.0, 11.0, 12.0]);
748        }
749
750        Ok(())
751    }
752
753    #[test]
754    fn test_compression_benchmark() {
755        let vectors = vec![
756            Vector::new(vec![1.0; 128]),
757            Vector::new(vec![2.0; 128]),
758            Vector::new(vec![3.0; 128]),
759        ];
760
761        let results = StorageUtils::benchmark_compression(&vectors).unwrap();
762        assert_eq!(results.len(), 5); // 5 compression algorithms
763
764        // Verify that some compression types reduce size
765        let none_size = results
766            .iter()
767            .find(|(t, _, _)| *t == CompressionType::None)
768            .unwrap()
769            .2;
770        let zstd_size = results
771            .iter()
772            .find(|(t, _, _)| *t == CompressionType::Zstd)
773            .unwrap()
774            .2;
775
776        assert!(zstd_size < none_size);
777    }
778}