1use crate::metrics::OperationMetrics;
9#[cfg(feature = "compression")]
10use lz4_flex;
11use serde::{Deserialize, Serialize};
12use std::sync::{Arc, Mutex};
13use std::time::Instant;
14use thiserror::Error;
15#[cfg(feature = "checksum")]
16use xxhash_rust::xxh3::xxh3_64;
17
18#[derive(Debug, Error, Clone, PartialEq)]
20pub enum ByteStorageError {
21 #[error("input exceeds maximum size")]
22 InputTooLarge,
23
24 #[error("decompression ratio exceeds safety limit")]
25 DecompressionBomb,
26
27 #[error("integrity check failed")]
28 ChecksumMismatch,
29
30 #[error("compression failed")]
31 CompressionFailed,
32
33 #[error("decompression failed")]
34 DecompressionFailed,
35
36 #[error("size validation failed")]
37 SizeValidationFailed,
38
39 #[error("serialization failed: {0}")]
40 SerializationFailed(String),
41
42 #[error("deserialization failed: {0}")]
43 DeserializationFailed(String),
44}
45
46const MAX_UNCOMPRESSED_SIZE: usize = 512 * 1024 * 1024; const MAX_COMPRESSED_SIZE: usize = 512 * 1024 * 1024; const MAX_COMPRESSION_RATIO: u64 = 1000;
52
53#[derive(Serialize, Deserialize)]
56pub struct StorageEnvelope {
57 pub compressed_data: Vec<u8>,
59 pub checksum: [u8; 8],
61 pub original_size: u32,
63 pub format: String,
65}
66
67impl StorageEnvelope {
68 #[cfg(all(feature = "compression", feature = "checksum"))]
70 pub fn new(data: Vec<u8>, format: String) -> Result<Self, ByteStorageError> {
71 if data.len() > MAX_UNCOMPRESSED_SIZE {
73 return Err(ByteStorageError::InputTooLarge);
74 }
75
76 let original_size = data.len() as u32;
77
78 let compressed_data = lz4_flex::compress(&data);
80
81 if compressed_data.len() > MAX_COMPRESSED_SIZE {
83 return Err(ByteStorageError::InputTooLarge);
84 }
85
86 let checksum = xxh3_64(&data).to_be_bytes();
88
89 Ok(StorageEnvelope {
90 compressed_data,
91 checksum,
92 original_size,
93 format,
94 })
95 }
96
97 #[cfg(all(feature = "compression", feature = "checksum"))]
99 pub fn extract(&self) -> Result<Vec<u8>, ByteStorageError> {
100 if self.compressed_data.len() > MAX_COMPRESSED_SIZE {
102 return Err(ByteStorageError::InputTooLarge);
103 }
104
105 if self.original_size as usize > MAX_UNCOMPRESSED_SIZE {
106 return Err(ByteStorageError::InputTooLarge);
107 }
108
109 let compressed_size = self.compressed_data.len() as u64;
112
113 if compressed_size == 0 {
115 return Err(ByteStorageError::DecompressionBomb);
116 }
117
118 let max_allowed_original = MAX_COMPRESSION_RATIO
120 .checked_mul(compressed_size)
121 .ok_or(ByteStorageError::DecompressionBomb)?;
122
123 if (self.original_size as u64) > max_allowed_original {
125 return Err(ByteStorageError::DecompressionBomb);
126 }
127
128 let decompressed = lz4_flex::decompress(&self.compressed_data, self.original_size as usize)
130 .map_err(|_| ByteStorageError::DecompressionFailed)?;
131
132 let computed_checksum = xxh3_64(&decompressed).to_be_bytes();
136 if computed_checksum != self.checksum {
137 return Err(ByteStorageError::ChecksumMismatch);
138 }
139
140 if decompressed.len() != self.original_size as usize {
142 return Err(ByteStorageError::SizeValidationFailed);
143 }
144
145 Ok(decompressed)
146 }
147}
148
149pub struct ByteStorage {
152 default_format: String,
153 last_metrics: Arc<Mutex<OperationMetrics>>,
155}
156
157impl ByteStorage {
158 pub fn new(default_format: Option<String>) -> Self {
160 ByteStorage {
161 default_format: default_format.unwrap_or_else(|| "msgpack".to_string()),
162 last_metrics: Arc::new(Mutex::new(OperationMetrics::new())),
163 }
164 }
165
166 #[cfg(all(feature = "compression", feature = "checksum", feature = "messagepack"))]
170 pub fn store(&self, data: &[u8], format: Option<String>) -> Result<Vec<u8>, ByteStorageError> {
171 if data.len() > MAX_UNCOMPRESSED_SIZE {
173 return Err(ByteStorageError::InputTooLarge);
174 }
175
176 let format = format.unwrap_or_else(|| self.default_format.clone());
177
178 let compression_start = Instant::now();
180 let original_size = data.len();
181
182 let envelope = StorageEnvelope::new(data.to_vec(), format)?;
183
184 let compression_elapsed = compression_start.elapsed();
185 let compression_micros = compression_elapsed.as_micros() as u64;
186 let compressed_size = envelope.compressed_data.len();
187
188 let envelope_bytes = rmp_serde::to_vec(&envelope)
190 .map_err(|e| ByteStorageError::SerializationFailed(e.to_string()))?;
191
192 if envelope_bytes.len() > MAX_COMPRESSED_SIZE {
194 return Err(ByteStorageError::InputTooLarge);
195 }
196
197 if let Ok(mut metrics) = self.last_metrics.lock() {
199 *metrics = OperationMetrics::new().with_compression(
200 compression_micros,
201 original_size,
202 compressed_size,
203 );
204 }
205
206 Ok(envelope_bytes)
207 }
208
209 #[cfg(all(feature = "compression", feature = "checksum", feature = "messagepack"))]
213 pub fn retrieve(&self, envelope_bytes: &[u8]) -> Result<(Vec<u8>, String), ByteStorageError> {
214 if envelope_bytes.len() > MAX_COMPRESSED_SIZE {
216 return Err(ByteStorageError::InputTooLarge);
217 }
218
219 let envelope: StorageEnvelope = rmp_serde::from_slice(envelope_bytes)
221 .map_err(|e| ByteStorageError::DeserializationFailed(e.to_string()))?;
222
223 let decompress_start = Instant::now();
225
226 let data = envelope.extract()?;
228
229 let decompress_elapsed = decompress_start.elapsed();
230 let decompress_micros = decompress_elapsed.as_micros() as u64;
231
232 let compressed_size = envelope.compressed_data.len();
234 let original_size = envelope.original_size as usize;
235
236 if let Ok(mut metrics) = self.last_metrics.lock() {
238 *metrics = OperationMetrics::new().with_compression(
239 decompress_micros,
240 original_size,
241 compressed_size,
242 );
243 }
244
245 Ok((data, envelope.format))
246 }
247
248 #[cfg(feature = "compression")]
250 pub fn estimate_compression(&self, data: &[u8]) -> Result<f64, ByteStorageError> {
251 if data.len() > MAX_UNCOMPRESSED_SIZE {
253 return Err(ByteStorageError::InputTooLarge);
254 }
255
256 let compressed = lz4_flex::compress(data);
257
258 Ok(data.len() as f64 / compressed.len() as f64)
259 }
260
261 #[cfg(all(feature = "compression", feature = "checksum", feature = "messagepack"))]
263 pub fn validate(&self, envelope_bytes: &[u8]) -> bool {
264 if envelope_bytes.len() > MAX_COMPRESSED_SIZE {
266 return false; }
268
269 match rmp_serde::from_slice::<StorageEnvelope>(envelope_bytes) {
270 Ok(envelope) => envelope.extract().is_ok(),
271 Err(_) => false,
272 }
273 }
274
275 pub fn get_last_metrics(&self) -> OperationMetrics {
279 self.last_metrics
280 .lock()
281 .map(|metrics| metrics.clone())
282 .unwrap_or_else(|_| OperationMetrics::new())
283 }
284
285 pub fn max_uncompressed_size(&self) -> usize {
287 MAX_UNCOMPRESSED_SIZE
288 }
289
290 pub fn max_compressed_size(&self) -> usize {
291 MAX_COMPRESSED_SIZE
292 }
293
294 pub fn max_compression_ratio(&self) -> u64 {
295 MAX_COMPRESSION_RATIO
296 }
297}
298
299impl Default for ByteStorage {
300 fn default() -> Self {
301 Self::new(None)
302 }
303}
304
305#[cfg(all(
306 test,
307 feature = "compression",
308 feature = "checksum",
309 feature = "messagepack"
310))]
311mod tests {
312 use super::*;
313
314 #[test]
315 fn test_storage_envelope_roundtrip() {
316 let data = b"Hello, World! This is test data for compression.".to_vec();
317 let envelope = StorageEnvelope::new(data.clone(), "test".to_string()).unwrap();
318 let extracted = envelope.extract().unwrap();
319 assert_eq!(data, extracted);
320 }
321
322 #[test]
323 fn test_compression_works() {
324 let data = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_vec(); let envelope = StorageEnvelope::new(data.clone(), "test".to_string()).unwrap();
326 assert!(envelope.compressed_data.len() < data.len());
327 }
328
329 #[test]
330 fn test_checksum_validation() {
331 let mut envelope = StorageEnvelope::new(b"test".to_vec(), "test".to_string()).unwrap();
332 envelope.checksum[0] = !envelope.checksum[0];
334 assert!(envelope.extract().is_err());
335 }
336
337 #[test]
338 fn test_raw_persistence_roundtrip() {
339 let storage = ByteStorage::new(None);
340 let test_data = b"test data for persistence";
341
342 let stored = storage.store(test_data, None).unwrap();
343 let (retrieved_data, format) = storage.retrieve(&stored).unwrap();
344 assert_eq!(test_data, retrieved_data.as_slice());
345 assert_eq!("msgpack", format);
346 }
347
348 #[test]
349 fn test_size_limits_input() {
350 let storage = ByteStorage::new(None);
351
352 let large_data = vec![0u8; MAX_UNCOMPRESSED_SIZE + 1];
354
355 let result = storage.store(&large_data, None);
356 assert!(matches!(result, Err(ByteStorageError::InputTooLarge)));
357 }
358
359 #[test]
360 fn test_size_limits_envelope() {
361 let max_data = vec![0u8; MAX_UNCOMPRESSED_SIZE];
363 let envelope_result = StorageEnvelope::new(max_data, "test".to_string());
364
365 assert!(envelope_result.is_ok());
367 }
368
369 #[test]
370 fn test_compression_ratio_bomb_protection() {
371 let malicious_envelope = StorageEnvelope {
373 compressed_data: vec![0u8; 1000], checksum: [0u8; 8], original_size: 200 * 1024 * 1024, format: "test".to_string(),
377 };
378
379 let result = malicious_envelope.extract();
380 assert!(matches!(result, Err(ByteStorageError::DecompressionBomb)));
381 }
382
383 #[test]
388 fn test_decompression_bomb_zero_compressed_size() {
389 let malicious_envelope = StorageEnvelope {
391 compressed_data: vec![], checksum: [0u8; 8],
393 original_size: 1000, format: "test".to_string(),
395 };
396
397 let result = malicious_envelope.extract();
398 assert!(
399 matches!(result, Err(ByteStorageError::DecompressionBomb)),
400 "Zero compressed size should be rejected as decompression bomb"
401 );
402 }
403
404 #[test]
405 fn test_decompression_bomb_extreme_ratio() {
406 let malicious_envelope = StorageEnvelope {
410 compressed_data: vec![0u8; 1], checksum: [0u8; 8],
412 original_size: 2000, format: "test".to_string(),
414 };
415
416 let result = malicious_envelope.extract();
417 assert!(
418 matches!(result, Err(ByteStorageError::DecompressionBomb)),
419 "Extreme ratio should be rejected as bomb: {:?}",
420 result
421 );
422 }
423
424 #[test]
425 fn test_decompression_u32_max_original_size() {
426 let malicious_envelope = StorageEnvelope {
430 compressed_data: vec![0u8; 1000],
431 checksum: [0u8; 8],
432 original_size: u32::MAX, format: "test".to_string(),
434 };
435
436 let result = malicious_envelope.extract();
437 assert!(
438 matches!(result, Err(ByteStorageError::InputTooLarge)),
439 "u32::MAX should be rejected as InputTooLarge (exceeds 512MB limit): {:?}",
440 result
441 );
442 }
443
444 #[test]
445 fn test_decompression_exactly_at_threshold() {
446 let envelope = StorageEnvelope {
450 compressed_data: vec![0u8; 100], checksum: [0u8; 8],
452 original_size: 100_000, format: "test".to_string(),
454 };
455
456 let result = envelope.extract();
457 assert!(
460 !matches!(result, Err(ByteStorageError::DecompressionBomb)),
461 "Exactly 1000:1 ratio should pass bomb check: {:?}",
462 result
463 );
464 assert!(
465 result.is_err(),
466 "Invalid data should still fail after ratio check"
467 );
468 }
469
470 #[test]
471 fn test_decompression_just_over_threshold() {
472 let malicious_envelope = StorageEnvelope {
474 compressed_data: vec![0u8; 100], checksum: [0u8; 8],
476 original_size: 100_001, format: "test".to_string(),
478 };
479
480 let result = malicious_envelope.extract();
481 assert!(
482 matches!(result, Err(ByteStorageError::DecompressionBomb)),
483 "Just over 1000:1 ratio should be rejected as bomb"
484 );
485 }
486
487 #[test]
488 fn test_decompression_bomb_integer_boundary() {
489 let envelope = StorageEnvelope {
496 compressed_data: vec![0u8; 1_000_000], checksum: [0u8; 8],
498 original_size: 1_000_000_000, format: "test".to_string(),
500 };
501
502 let result = envelope.extract();
504 assert!(
505 matches!(result, Err(ByteStorageError::InputTooLarge)),
506 "Should fail size check before ratio check: {:?}",
507 result
508 );
509 }
510
511 #[test]
512 fn test_envelope_size_validation() {
513 let storage = ByteStorage::new(None);
514
515 let oversized_envelope = vec![0u8; MAX_COMPRESSED_SIZE + 1];
517
518 let result = storage.retrieve(&oversized_envelope);
519 assert!(matches!(result, Err(ByteStorageError::InputTooLarge)));
520 }
521
522 #[test]
523 fn test_security_limits_getters() {
524 let storage = ByteStorage::new(None);
525
526 assert_eq!(storage.max_uncompressed_size(), MAX_UNCOMPRESSED_SIZE);
527 assert_eq!(storage.max_compressed_size(), MAX_COMPRESSED_SIZE);
528 assert_eq!(storage.max_compression_ratio(), 1000u64);
529 }
530
531 #[test]
532 fn test_compression_estimate_security() {
533 let storage = ByteStorage::new(None);
534
535 let large_data = vec![0u8; MAX_UNCOMPRESSED_SIZE + 1];
537 let result = storage.estimate_compression(&large_data);
538 assert!(matches!(result, Err(ByteStorageError::InputTooLarge)));
539 }
540
541 #[test]
542 fn test_validate_security() {
543 let storage = ByteStorage::new(None);
544
545 let large_envelope = vec![0u8; MAX_COMPRESSED_SIZE + 1];
547 let result = storage.validate(&large_envelope);
548 assert!(!result); }
550
551 #[test]
552 fn test_edge_case_exactly_at_limits() {
553 let storage = ByteStorage::new(None);
555 let max_size_data = vec![1u8; MAX_UNCOMPRESSED_SIZE]; let result = storage.store(&max_size_data, None);
559 assert!(result.is_ok());
560 }
561
562 #[test]
563 fn test_zero_size_edge_case() {
564 let storage = ByteStorage::new(None);
565 let empty_data = vec![];
566
567 let stored = storage.store(&empty_data, None).unwrap();
568 let (retrieved_data, format) = storage.retrieve(&stored).unwrap();
569 assert_eq!(empty_data, retrieved_data);
570 assert_eq!("msgpack", format);
571 }
572
573 #[test]
574 fn test_metrics_collection_on_store() {
575 let storage = ByteStorage::new(None);
576 let test_data = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_vec(); storage.store(&test_data, None).unwrap();
580 let metrics = storage.get_last_metrics();
581
582 assert!(metrics.compression_ratio > 0.0); }
585
586 #[test]
587 fn test_metrics_collection_on_retrieve() {
588 let storage = ByteStorage::new(None);
589 let test_data = b"test data for retrieval metrics";
590
591 let stored = storage.store(test_data, None).unwrap();
593 storage.retrieve(&stored).unwrap();
594 let metrics = storage.get_last_metrics();
595
596 assert!(metrics.compression_ratio > 0.0); }
599}
600
601#[cfg(kani)]
605mod kani_proofs {
606 use super::*;
607
608 #[kani::proof]
611 #[kani::unwind(10)] fn verify_checksum_detects_corruption() {
613 let checksum_a: [u8; 8] = kani::any();
615 let mut checksum_b = checksum_a;
616
617 let byte_index: usize = kani::any();
619 let bit_index: usize = kani::any();
620 kani::assume(byte_index < 8);
621 kani::assume(bit_index < 8);
622
623 checksum_b[byte_index] ^= 1 << bit_index;
624
625 assert_ne!(checksum_a, checksum_b);
627 }
628
629 #[kani::proof]
633 #[kani::unwind(3)]
634 fn verify_decompression_bomb_protection() {
635 let compressed_size: u64 = kani::any();
637 let original_size: u64 = kani::any();
638
639 kani::assume(compressed_size > 0 && compressed_size <= 1000);
641 kani::assume(original_size > 0);
642
643 let max_allowed = MAX_COMPRESSION_RATIO.checked_mul(compressed_size);
647
648 if let Some(max) = max_allowed {
650 let would_reject = original_size > max;
651 let exceeds_ratio = original_size > MAX_COMPRESSION_RATIO * compressed_size;
652 assert_eq!(would_reject, exceeds_ratio);
653 } else {
654 assert!(true); }
657 }
658
659 #[kani::proof]
662 #[kani::unwind(3)]
663 fn verify_input_size_limits() {
664 let size: usize = kani::any();
665
666 kani::assume(size <= MAX_UNCOMPRESSED_SIZE + 100);
668
669 let exceeds_limit = size > MAX_UNCOMPRESSED_SIZE;
671 let should_reject = size > MAX_UNCOMPRESSED_SIZE;
672
673 assert_eq!(exceeds_limit, should_reject);
674 }
675
676 #[kani::proof]
679 #[kani::unwind(3)]
680 fn verify_compressed_size_limits() {
681 let compressed_size: usize = kani::any();
682
683 kani::assume(compressed_size <= MAX_COMPRESSED_SIZE + 100);
685
686 let exceeds_limit = compressed_size > MAX_COMPRESSED_SIZE;
688 let should_reject = compressed_size > MAX_COMPRESSED_SIZE;
689
690 assert_eq!(exceeds_limit, should_reject);
691 }
692
693 #[kani::proof]
696 #[kani::unwind(3)]
697 fn verify_compression_ratio_calculation_safety() {
698 let original_size: u64 = kani::any();
699 let compressed_size: u64 = kani::any();
700
701 kani::assume(compressed_size > 0);
703 kani::assume(compressed_size <= 10000);
704 kani::assume(original_size <= 100_000_000); let result = MAX_COMPRESSION_RATIO.checked_mul(compressed_size);
708
709 if let Some(max_allowed) = result {
711 let is_bomb = original_size > max_allowed;
713
714 if original_size <= max_allowed {
717 assert!(!is_bomb);
718 } else {
719 assert!(is_bomb);
720 }
721 }
722
723 }
725}