Skip to main content

sqry_core/indexing/
compression.rs

1//! Index compression for reducing disk space and improving cold-start times.
2//!
3//! This module provides zstd-based compression for serialized indexes with
4//! backward compatibility for legacy uncompressed indexes.
5//!
6//! # Format
7//!
8//! Compressed indexes use a header-based format:
9//!
10//! ```text
11//! ┌──────────────────────────────────────┐
12//! │ Magic: "SQRY" (4 bytes)              │
13//! ├──────────────────────────────────────┤
14//! │ Version: u32 (4 bytes)               │
15//! ├──────────────────────────────────────┤
16//! │ Compression: u8 (1 byte)             │
17//! │   0 = None, 1 = zstd                 │
18//! ├──────────────────────────────────────┤
19//! │ Level: i32 (4 bytes)                 │
20//! ├──────────────────────────────────────┤
21//! │ Uncompressed Size: u64 (8 bytes)     │
22//! ├──────────────────────────────────────┤
23//! │ Compressed Data (variable)           │
24//! └──────────────────────────────────────┘
25//! ```
26//!
27//! # Examples
28//!
29//! ```no_run
30//! use sqry_core::indexing::CompressedIndex;
31//!
32//! // Compress index data
33//! let data = b"index data here";
34//! let compressed = CompressedIndex::compress(data, 3)?;
35//!
36//! // Serialize to disk format
37//! let serialized = compressed.serialize();
38//!
39//! // Deserialize and decompress
40//! let loaded = CompressedIndex::deserialize(&serialized)?;
41//! let original = loaded.decompress()?;
42//! # Ok::<(), Box<dyn std::error::Error>>(())
43//! ```
44
45use std::io::{self, Read, Write};
46
47/// Magic bytes for compressed index format: "SQRY"
48const MAGIC: &[u8; 4] = b"SQRY";
49
50/// Current format version
51const FORMAT_VERSION: u32 = 1;
52
53/// Default zstd compression level (3 = fast, good compression ratio)
54pub const DEFAULT_COMPRESSION_LEVEL: i32 = 3;
55
56/// Maximum allowed uncompressed size (500 MB by default)
57///
58/// This prevents decompression bombs from consuming excessive memory.
59/// Can be overridden via `SQRY_MAX_INDEX_SIZE` environment variable.
60pub const DEFAULT_MAX_UNCOMPRESSED_SIZE: u64 = 500 * 1024 * 1024;
61
62/// Minimum allowed maximum uncompressed size (1 MB)
63const MIN_MAX_UNCOMPRESSED_SIZE: u64 = 1024 * 1024;
64
65/// Maximum allowed maximum uncompressed size (2 GB)
66const MAX_MAX_UNCOMPRESSED_SIZE: u64 = 2 * 1024 * 1024 * 1024;
67
68/// Get maximum allowed uncompressed size
69///
70/// Reads from `SQRY_MAX_INDEX_SIZE` environment variable if set,
71/// otherwise returns `DEFAULT_MAX_UNCOMPRESSED_SIZE`.
72///
73/// # Security
74///
75/// Values are clamped between 1 MB and 2 GB for safety (P1-14).
76/// This prevents malicious environment variable values from either:
77/// - Allowing excessively large decompression (`DoS` via memory exhaustion)
78/// - Setting the limit too low (`DoS` via rejecting valid indexes)
79#[must_use]
80pub fn max_uncompressed_size() -> u64 {
81    let size = std::env::var("SQRY_MAX_INDEX_SIZE")
82        .ok()
83        .and_then(|s| s.parse().ok())
84        .unwrap_or(DEFAULT_MAX_UNCOMPRESSED_SIZE);
85    size.clamp(MIN_MAX_UNCOMPRESSED_SIZE, MAX_MAX_UNCOMPRESSED_SIZE)
86}
87
88/// Compression format type
89#[derive(Debug, Clone, Copy, PartialEq, Eq)]
90#[repr(u8)]
91pub enum CompressionFormat {
92    /// No compression (stored as-is)
93    None = 0,
94    /// Zstd compression
95    Zstd = 1,
96}
97
98impl CompressionFormat {
99    /// Convert from u8 byte value
100    fn from_u8(value: u8) -> Result<Self, CompressionError> {
101        match value {
102            0 => Ok(Self::None),
103            1 => Ok(Self::Zstd),
104            _ => Err(CompressionError::UnsupportedCompression(value)),
105        }
106    }
107}
108
109/// Errors that can occur during compression/decompression
110#[derive(Debug, thiserror::Error)]
111pub enum CompressionError {
112    /// I/O error during compression/decompression
113    #[error("I/O error: {0}")]
114    Io(#[from] io::Error),
115
116    /// Unsupported compression format
117    #[error("Unsupported compression format: {0}")]
118    UnsupportedCompression(u8),
119
120    /// Invalid magic bytes (not a compressed index)
121    #[error("Invalid magic bytes, expected SQRY")]
122    InvalidMagic,
123
124    /// Index format version is too new
125    #[error("Index version {index_version} is too new for sqry {sqry_version}, please upgrade")]
126    IndexVersionTooNew {
127        /// Version number found in the index file
128        index_version: u32,
129        /// Current sqry binary version
130        sqry_version: &'static str,
131    },
132
133    /// Invalid format version (reserved value 0)
134    #[error("Invalid index version: {0}")]
135    InvalidIndexVersion(u32),
136
137    /// Header is too small to be valid
138    #[error("Invalid header size: expected at least 21 bytes, got {0}")]
139    InvalidHeaderSize(usize),
140
141    /// Size mismatch after decompression
142    #[error("Decompressed size mismatch: expected {expected}, got {actual}")]
143    SizeMismatch {
144        /// Expected uncompressed size from header
145        expected: u64,
146        /// Actual size after decompression
147        actual: u64,
148    },
149
150    /// Decompression bomb detected (uncompressed size exceeds maximum)
151    #[error("Decompression bomb detected: uncompressed size {size} exceeds maximum {max}")]
152    DecompressionBomb {
153        /// Declared uncompressed size
154        size: u64,
155        /// Maximum allowed size
156        max: u64,
157    },
158}
159
160/// Compressed index container with format metadata
161#[derive(Debug, Clone)]
162pub struct CompressedIndex {
163    /// Format version (currently 1)
164    version: u32,
165    /// Compression format used
166    compression: CompressionFormat,
167    /// Compression level (for zstd)
168    level: i32,
169    /// Original uncompressed size
170    uncompressed_size: u64,
171    /// Compressed data
172    data: Vec<u8>,
173}
174
175impl CompressedIndex {
176    /// Compress data using zstd compression.
177    ///
178    /// # Arguments
179    ///
180    /// * `data` - The data to compress
181    /// * `level` - Compression level (1-22, where 3 is default)
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// use sqry_core::indexing::CompressedIndex;
187    ///
188    /// let data = b"test data";
189    /// let compressed = CompressedIndex::compress(data, 3)?;
190    /// # Ok::<(), Box<dyn std::error::Error>>(())
191    /// ```
192    ///
193    /// # Errors
194    ///
195    /// Returns [`CompressionError::Io`] if zstd fails to create the encoder or if writing to
196    /// the compressor fails.
197    pub fn compress(data: &[u8], level: i32) -> Result<Self, CompressionError> {
198        let mut encoder = zstd::Encoder::new(Vec::new(), level)?;
199        encoder.write_all(data)?;
200        let compressed = encoder.finish()?;
201
202        Ok(Self {
203            version: FORMAT_VERSION,
204            compression: CompressionFormat::Zstd,
205            level,
206            uncompressed_size: data.len() as u64,
207            data: compressed,
208        })
209    }
210
211    /// Create an uncompressed index container (for testing or fallback).
212    ///
213    /// # Examples
214    ///
215    /// ```
216    /// use sqry_core::indexing::CompressedIndex;
217    ///
218    /// let data = b"test data";
219    /// let uncompressed = CompressedIndex::uncompressed(data);
220    /// ```
221    #[must_use]
222    pub fn uncompressed(data: &[u8]) -> Self {
223        Self {
224            version: FORMAT_VERSION,
225            compression: CompressionFormat::None,
226            level: 0,
227            uncompressed_size: data.len() as u64,
228            data: data.to_vec(),
229        }
230    }
231
232    /// Decompress the index data.
233    ///
234    /// # Examples
235    ///
236    /// ```
237    /// use sqry_core::indexing::CompressedIndex;
238    ///
239    /// let data = b"test data";
240    /// let compressed = CompressedIndex::compress(data, 3)?;
241    /// let decompressed = compressed.decompress()?;
242    /// assert_eq!(data, &decompressed[..]);
243    /// # Ok::<(), Box<dyn std::error::Error>>(())
244    /// ```
245    ///
246    /// # Errors
247    ///
248    /// Returns [`CompressionError`] when decompression exceeds the configured safety limit,
249    /// when zstd emits an error, or when the resulting size does not match the stored header.
250    pub fn decompress(&self) -> Result<Vec<u8>, CompressionError> {
251        // P1-14: Check for decompression bombs before allocating memory
252        let max_size = max_uncompressed_size();
253        if self.uncompressed_size > max_size {
254            return Err(CompressionError::DecompressionBomb {
255                size: self.uncompressed_size,
256                max: max_size,
257            });
258        }
259
260        match self.compression {
261            CompressionFormat::None => {
262                // Even uncompressed data needs size check
263                if self.data.len() as u64 > max_size {
264                    return Err(CompressionError::DecompressionBomb {
265                        size: self.data.len() as u64,
266                        max: max_size,
267                    });
268                }
269                Ok(self.data.clone())
270            }
271            CompressionFormat::Zstd => {
272                // P1-14: CODEX review - enforce streaming limit during decompression
273                let decoder = zstd::Decoder::new(&self.data[..])?;
274
275                // Limit the decoder to max_size + 1 bytes to distinguish between:
276                // 1) Data that is exactly max_size (valid)
277                // 2) Data that exceeds max_size (decompression bomb)
278                // This ensures we never read more than the allowed maximum while
279                // allowing legitimate indexes at the boundary to pass.
280                let mut limited = decoder.take(max_size + 1);
281                let mut decompressed = Vec::new();
282                limited.read_to_end(&mut decompressed)?;
283
284                // Verify decompressed size matches header
285                let actual_size = decompressed.len() as u64;
286                if actual_size != self.uncompressed_size {
287                    return Err(CompressionError::SizeMismatch {
288                        expected: self.uncompressed_size,
289                        actual: actual_size,
290                    });
291                }
292
293                // Check if we exceeded the limit (decompression bomb detected)
294                // Using > instead of >= to allow data exactly at the limit
295                if actual_size > max_size {
296                    return Err(CompressionError::DecompressionBomb {
297                        size: actual_size,
298                        max: max_size,
299                    });
300                }
301
302                Ok(decompressed)
303            }
304        }
305    }
306
307    /// Serialize to on-disk format with header.
308    ///
309    /// # Format
310    ///
311    /// - Magic: "SQRY" (4 bytes)
312    /// - Version: u32 little-endian (4 bytes)
313    /// - Compression: u8 (1 byte)
314    /// - Level: i32 little-endian (4 bytes)
315    /// - Uncompressed Size: u64 little-endian (8 bytes)
316    /// - Data: variable length
317    ///
318    /// Total header size: 21 bytes
319    #[must_use]
320    pub fn serialize(&self) -> Vec<u8> {
321        let mut buffer = Vec::with_capacity(21 + self.data.len());
322
323        // Write magic
324        buffer.extend_from_slice(MAGIC);
325
326        // Write version
327        buffer.extend_from_slice(&self.version.to_le_bytes());
328
329        // Write compression format
330        buffer.push(self.compression as u8);
331
332        // Write compression level
333        buffer.extend_from_slice(&self.level.to_le_bytes());
334
335        // Write uncompressed size
336        buffer.extend_from_slice(&self.uncompressed_size.to_le_bytes());
337
338        // Write compressed data
339        buffer.extend_from_slice(&self.data);
340
341        buffer
342    }
343
344    /// Deserialize from on-disk format.
345    ///
346    /// # Errors
347    ///
348    /// Returns an error if:
349    /// - Magic bytes don't match "SQRY"
350    /// - Header is too small
351    /// - Version is unsupported
352    /// - Compression format is unknown
353    pub fn deserialize(data: &[u8]) -> Result<Self, CompressionError> {
354        // Check minimum header size (21 bytes)
355        if data.len() < 21 {
356            return Err(CompressionError::InvalidHeaderSize(data.len()));
357        }
358
359        // Check magic bytes
360        if &data[0..4] != MAGIC {
361            return Err(CompressionError::InvalidMagic);
362        }
363
364        // Parse version
365        let version = u32::from_le_bytes(
366            data[4..8]
367                .try_into()
368                .map_err(|_| CompressionError::InvalidHeaderSize(data.len()))?,
369        );
370
371        // Check version compatibility
372        match version {
373            0 => return Err(CompressionError::InvalidIndexVersion(0)),
374            FORMAT_VERSION => {
375                // Current version, continue parsing
376            }
377            v if v > FORMAT_VERSION => {
378                return Err(CompressionError::IndexVersionTooNew {
379                    index_version: v,
380                    sqry_version: env!("CARGO_PKG_VERSION"),
381                });
382            }
383            _ => {
384                // Older version - could support in future if needed
385                return Err(CompressionError::InvalidIndexVersion(version));
386            }
387        }
388
389        // Parse compression format
390        let compression = CompressionFormat::from_u8(data[8])?;
391
392        // Parse level
393        let level = i32::from_le_bytes(
394            data[9..13]
395                .try_into()
396                .map_err(|_| CompressionError::InvalidHeaderSize(data.len()))?,
397        );
398
399        // Parse uncompressed size
400        let uncompressed_size = u64::from_le_bytes(
401            data[13..21]
402                .try_into()
403                .map_err(|_| CompressionError::InvalidHeaderSize(data.len()))?,
404        );
405
406        // Extract data
407        let index_data = data[21..].to_vec();
408
409        Ok(Self {
410            version,
411            compression,
412            level,
413            uncompressed_size,
414            data: index_data,
415        })
416    }
417
418    /// Get the compression format used.
419    #[must_use]
420    pub fn compression(&self) -> CompressionFormat {
421        self.compression
422    }
423
424    /// Get the uncompressed size.
425    #[must_use]
426    pub fn uncompressed_size(&self) -> u64 {
427        self.uncompressed_size
428    }
429
430    /// Get the compressed size (actual data size).
431    #[must_use]
432    pub fn compressed_size(&self) -> usize {
433        self.data.len()
434    }
435
436    /// Get the compression ratio (uncompressed / compressed).
437    ///
438    /// Returns 1.0 for uncompressed data.
439    #[must_use]
440    pub fn compression_ratio(&self) -> f64 {
441        if self.data.is_empty() {
442            return 1.0;
443        }
444        Self::to_f64_lossy_u64(self.uncompressed_size) / Self::to_f64_lossy_usize(self.data.len())
445    }
446
447    #[inline]
448    #[allow(clippy::cast_precision_loss)] // Human-readable ratios tolerate lossy conversion
449    fn to_f64_lossy_u64(value: u64) -> f64 {
450        value as f64
451    }
452
453    #[inline]
454    #[allow(clippy::cast_precision_loss)] // Human-readable ratios tolerate lossy conversion
455    fn to_f64_lossy_usize(value: usize) -> f64 {
456        value as f64
457    }
458}
459
460#[cfg(test)]
461mod tests {
462    use super::*;
463
464    #[test]
465    fn test_compress_decompress_roundtrip() {
466        let original = b"test data for compression";
467        let compressed = CompressedIndex::compress(original, DEFAULT_COMPRESSION_LEVEL).unwrap();
468        let decompressed = compressed.decompress().unwrap();
469
470        assert_eq!(original, &decompressed[..]);
471    }
472
473    #[test]
474    fn test_serialize_deserialize_roundtrip() {
475        let original = b"test data for serialization";
476        let compressed = CompressedIndex::compress(original, 3).unwrap();
477        let serialized = compressed.serialize();
478        let deserialized = CompressedIndex::deserialize(&serialized).unwrap();
479        let decompressed = deserialized.decompress().unwrap();
480
481        assert_eq!(original, &decompressed[..]);
482    }
483
484    #[test]
485    fn test_compression_reduces_size() {
486        // Create highly compressible data (repeated pattern)
487        let original = vec![b'a'; 10000];
488        let compressed = CompressedIndex::compress(&original, 3).unwrap();
489
490        assert!(
491            compressed.compressed_size() < original.len(),
492            "Compressed size {} should be less than original size {}",
493            compressed.compressed_size(),
494            original.len()
495        );
496    }
497
498    #[test]
499    fn test_compression_ratio() {
500        let original = vec![b'x'; 1000];
501        let compressed = CompressedIndex::compress(&original, 3).unwrap();
502
503        let ratio = compressed.compression_ratio();
504        assert!(
505            ratio > 1.0,
506            "Compression ratio should be > 1.0 for compressible data"
507        );
508    }
509
510    #[test]
511    fn test_uncompressed_roundtrip() {
512        let original = b"uncompressed test data";
513        let uncompressed = CompressedIndex::uncompressed(original);
514        let decompressed = uncompressed.decompress().unwrap();
515
516        assert_eq!(original, &decompressed[..]);
517        assert_eq!(uncompressed.compression(), CompressionFormat::None);
518    }
519
520    #[test]
521    fn test_magic_bytes_in_header() {
522        let original = b"test";
523        let compressed = CompressedIndex::compress(original, 3).unwrap();
524        let serialized = compressed.serialize();
525
526        assert_eq!(&serialized[0..4], b"SQRY");
527    }
528
529    #[test]
530    fn test_invalid_magic_bytes() {
531        // Need at least 21 bytes to pass size check, but with wrong magic
532        let mut invalid_data = vec![0u8; 21];
533        invalid_data[0..4].copy_from_slice(b"XXXX"); // Wrong magic
534        let result = CompressedIndex::deserialize(&invalid_data);
535
536        assert!(matches!(result, Err(CompressionError::InvalidMagic)));
537    }
538
539    #[test]
540    fn test_header_too_small() {
541        let too_small = b"SQRY123"; // Only 7 bytes
542        let result = CompressedIndex::deserialize(too_small);
543
544        assert!(matches!(
545            result,
546            Err(CompressionError::InvalidHeaderSize(7))
547        ));
548    }
549
550    #[test]
551    fn test_unsupported_compression_format() {
552        let mut data = vec![0u8; 21];
553        data[0..4].copy_from_slice(b"SQRY");
554        data[4..8].copy_from_slice(&1u32.to_le_bytes()); // version = 1
555        data[8] = 99; // Invalid compression format
556
557        let result = CompressedIndex::deserialize(&data);
558
559        assert!(matches!(
560            result,
561            Err(CompressionError::UnsupportedCompression(99))
562        ));
563    }
564
565    #[test]
566    fn test_future_version_error() {
567        let mut data = vec![0u8; 21];
568        data[0..4].copy_from_slice(b"SQRY");
569        data[4..8].copy_from_slice(&999u32.to_le_bytes()); // version = 999
570
571        let result = CompressedIndex::deserialize(&data);
572
573        assert!(matches!(
574            result,
575            Err(CompressionError::IndexVersionTooNew { .. })
576        ));
577    }
578
579    #[test]
580    fn test_zero_version_error() {
581        let mut data = vec![0u8; 21];
582        data[0..4].copy_from_slice(b"SQRY");
583        data[4..8].copy_from_slice(&0u32.to_le_bytes()); // version = 0
584
585        let result = CompressedIndex::deserialize(&data);
586
587        assert!(matches!(
588            result,
589            Err(CompressionError::InvalidIndexVersion(0))
590        ));
591    }
592
593    #[test]
594    fn test_compression_metadata() {
595        let original = vec![b'y'; 5000];
596        let compressed = CompressedIndex::compress(&original, 5).unwrap();
597
598        assert_eq!(compressed.uncompressed_size(), 5000);
599        assert_eq!(compressed.compression(), CompressionFormat::Zstd);
600        assert!(compressed.compressed_size() < 5000);
601    }
602
603    #[test]
604    fn test_empty_data_compression() {
605        let original = b"";
606        let compressed = CompressedIndex::compress(original, 3).unwrap();
607        let decompressed = compressed.decompress().unwrap();
608
609        assert_eq!(original, &decompressed[..]);
610        assert_eq!(compressed.uncompressed_size(), 0);
611    }
612
613    #[test]
614    fn test_large_data_compression() {
615        // Test with ~1MB of data
616        let original = vec![b'z'; 1_000_000];
617        let compressed = CompressedIndex::compress(&original, 3).unwrap();
618        let decompressed = compressed.decompress().unwrap();
619
620        assert_eq!(original, decompressed);
621        // Should achieve significant compression on repeated data
622        assert!(
623            compressed.compressed_size() < 100_000,
624            "Expected < 100KB compressed, got {}",
625            compressed.compressed_size()
626        );
627    }
628
629    // ============================================================================
630    // Comprehensive tests for P1-14 decompression bomb protection
631    // ============================================================================
632
633    #[test]
634    fn test_decompression_bomb_protection_blocks_oversized() {
635        // Create data that would exceed max_uncompressed_size (default 500MB)
636        let original = vec![b'a'; 1_000_000]; // 1MB
637        let compressed = CompressedIndex::compress(&original, 3).unwrap();
638
639        // Manually corrupt the uncompressed_size field to claim 600MB
640        let mut serialized = compressed.serialize();
641        let fake_size = 600u64 * 1024 * 1024; // 600MB
642        serialized[13..21].copy_from_slice(&fake_size.to_le_bytes());
643
644        let corrupted = CompressedIndex::deserialize(&serialized).unwrap();
645        let result = corrupted.decompress();
646
647        // Should reject due to size claim exceeding limit
648        assert!(
649            matches!(result, Err(CompressionError::DecompressionBomb { .. })),
650            "Should reject oversized decompression claim"
651        );
652    }
653
654    #[test]
655    fn test_decompression_bomb_protection_allows_at_limit() {
656        // Test boundary case: data exactly at the limit should be accepted
657        // Create compressed data with uncompressed size exactly at default limit (500MB)
658        let original = vec![b'b'; 100_000]; // 100KB actual data
659        let mut compressed = CompressedIndex::compress(&original, 3).unwrap();
660
661        // Set uncompressed_size to exactly the limit (500MB)
662        let exact_limit = 500u64 * 1024 * 1024;
663        compressed.uncompressed_size = exact_limit;
664
665        let serialized = compressed.serialize();
666        let deserialized = CompressedIndex::deserialize(&serialized).unwrap();
667
668        // Should accept data exactly at limit (due to > check, not >=)
669        // Note: This will fail decompression for other reasons (actual data != claimed size)
670        // but it should NOT fail with DecompressionBomb error
671        let result = deserialized.decompress();
672
673        // Should not be a decompression bomb error
674        assert!(
675            !matches!(result, Err(CompressionError::DecompressionBomb { .. })),
676            "Should not reject data exactly at limit as decompression bomb"
677        );
678    }
679
680    #[test]
681    fn test_decompression_bomb_protection_blocks_one_over_limit() {
682        // Test boundary case: data one byte over limit should be rejected
683        let original = vec![b'c'; 100_000]; // 100KB actual data
684        let compressed = CompressedIndex::compress(&original, 3).unwrap();
685
686        // Manually set uncompressed_size to limit + 1
687        let mut serialized = compressed.serialize();
688        let over_limit = (500u64 * 1024 * 1024) + 1; // 500MB + 1 byte
689        serialized[13..21].copy_from_slice(&over_limit.to_le_bytes());
690
691        let corrupted = CompressedIndex::deserialize(&serialized).unwrap();
692        let result = corrupted.decompress();
693
694        // Should reject due to size exceeding limit by even 1 byte
695        assert!(
696            matches!(result, Err(CompressionError::DecompressionBomb { .. })),
697            "Should reject data exceeding limit by even 1 byte"
698        );
699    }
700
701    #[test]
702    fn test_decompression_enforces_streaming_limit() {
703        // Test that decompression uses streaming limit with take(max_size + 1)
704        // This ensures we can detect data that exceeds the limit during streaming
705
706        // Create data that's well within the limit
707        let original = vec![b'd'; 200_000]; // 200KB - well within 500MB limit
708        let compressed = CompressedIndex::compress(&original, 3).unwrap();
709
710        // Decompress normally - should succeed because streaming limit allows it
711        let result = compressed.decompress();
712        assert!(result.is_ok(), "Decompression within limit should succeed");
713
714        // The streaming limit (max_size + 1) ensures:
715        // - Data exactly at max_size passes (not flagged as bomb)
716        // - Data over max_size fails (flagged as bomb)
717        // This is validated by the boundary tests above
718    }
719
720    #[test]
721    fn test_max_uncompressed_size_clamping_enforces_minimum() {
722        // Test that MIN_MAX_UNCOMPRESSED_SIZE (1MB) is enforced
723        // Note: This tests the clamping logic, but we can't directly set env vars in tests
724        // without affecting other tests, so we verify the constants exist
725
726        const MIN_MAX_UNCOMPRESSED_SIZE: u64 = 1024 * 1024; // 1 MB
727        const MAX_MAX_UNCOMPRESSED_SIZE: u64 = 2 * 1024 * 1024 * 1024; // 2 GB
728
729        // Verify constants are reasonable
730        assert_eq!(MIN_MAX_UNCOMPRESSED_SIZE, 1_048_576, "MIN should be 1MB");
731        assert_eq!(
732            MAX_MAX_UNCOMPRESSED_SIZE, 2_147_483_648,
733            "MAX should be 2GB"
734        );
735
736        // Verify min < default < max
737        let default_size = max_uncompressed_size();
738        assert!(
739            default_size >= MIN_MAX_UNCOMPRESSED_SIZE,
740            "Default {default_size} should be >= MIN {MIN_MAX_UNCOMPRESSED_SIZE}"
741        );
742        assert!(
743            default_size <= MAX_MAX_UNCOMPRESSED_SIZE,
744            "Default {default_size} should be <= MAX {MAX_MAX_UNCOMPRESSED_SIZE}"
745        );
746    }
747
748    #[test]
749    fn test_max_uncompressed_size_default_is_500mb() {
750        // Verify default is 500MB when no env var is set
751        let default = max_uncompressed_size();
752
753        // Default should be 500MB = 524,288,000 bytes
754        // Unless overridden by env var, but in test environment it should be default
755        assert!(
756            default >= 500 * 1024 * 1024 || std::env::var("SQRY_MAX_INDEX_SIZE").is_ok(),
757            "Default should be 500MB or env var should be set"
758        );
759    }
760
761    #[test]
762    fn test_decompression_bomb_error_includes_sizes() {
763        // Verify that DecompressionBomb error includes both actual and max sizes
764        let original = vec![b'e'; 100_000];
765        let compressed = CompressedIndex::compress(&original, 3).unwrap();
766
767        // Create oversized claim
768        let mut serialized = compressed.serialize();
769        let oversized = 600u64 * 1024 * 1024; // 600MB
770        serialized[13..21].copy_from_slice(&oversized.to_le_bytes());
771
772        let corrupted = CompressedIndex::deserialize(&serialized).unwrap();
773
774        match corrupted.decompress() {
775            Err(CompressionError::DecompressionBomb { size, max }) => {
776                assert_eq!(size, oversized, "Error should report actual claimed size");
777                assert!(max > 0, "Error should report max limit");
778                assert!(size > max, "Error should show size exceeds max");
779            }
780            other => panic!("Expected DecompressionBomb error, got {other:?}"),
781        }
782    }
783
784    #[test]
785    fn test_compression_format_from_u8() {
786        // Verify CompressionFormat::from_u8() works correctly
787        assert!(matches!(
788            CompressionFormat::from_u8(0),
789            Ok(CompressionFormat::None)
790        ));
791        assert!(matches!(
792            CompressionFormat::from_u8(1),
793            Ok(CompressionFormat::Zstd)
794        ));
795        assert!(matches!(
796            CompressionFormat::from_u8(99),
797            Err(CompressionError::UnsupportedCompression(99))
798        ));
799    }
800}