1use crate::error::CoreError;
7use ::ndarray::{Array, ArrayBase, Data, Dimension};
8use flate2::{read::GzDecoder, write::GzEncoder, Compression};
9use lz4::{Decoder as Lz4Decoder, EncoderBuilder as Lz4EncoderBuilder};
10use std::io::Result as IoResult;
11use std::io::{Read, Write};
12use std::marker::PhantomData;
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum CompressionAlgorithm {
17 Gzip,
19 Lz4,
21 None,
23}
24
25#[derive(Debug, Clone, Copy)]
27pub enum CompressionLevel {
28 Fast,
30 Default,
32 Best,
34 Custom(u32),
36}
37
38impl From<CompressionLevel> for u32 {
39 fn from(level: CompressionLevel) -> Self {
40 match level {
41 CompressionLevel::Fast => 1,
42 CompressionLevel::Default => 6,
43 CompressionLevel::Best => 9,
44 CompressionLevel::Custom(level) => level,
45 }
46 }
47}
48
49pub struct CompressedBuffer<T> {
51 compressed_data: Vec<u8>,
52 algorithm: CompressionAlgorithm,
53 #[allow(dead_code)]
54 compression_level: CompressionLevel,
55 original_size: usize,
56 phantom: PhantomData<T>,
57}
58
59impl<T> CompressedBuffer<T>
60where
61 T: bytemuck::Pod + bytemuck::Zeroable,
62{
63 pub fn new(
65 data: &[T],
66 algorithm: CompressionAlgorithm,
67 level: CompressionLevel,
68 ) -> IoResult<Self> {
69 let original_size = std::mem::size_of_val(data);
70
71 let compressed_data = if data.is_empty() {
73 Vec::new()
74 } else {
75 let bytes = bytemuck::cast_slice(data);
76 match algorithm {
77 CompressionAlgorithm::Gzip => Self::compress_gzip(bytes, level)?,
78 CompressionAlgorithm::Lz4 => Self::compress_lz4(bytes, level)?,
79 CompressionAlgorithm::None => bytes.to_vec(),
80 }
81 };
82
83 Ok(Self {
84 compressed_data,
85 algorithm,
86 compression_level: level,
87 original_size,
88 phantom: PhantomData,
89 })
90 }
91
92 pub fn decompress(&self) -> IoResult<Vec<T>> {
94 if self.original_size == 0 {
96 return Ok(Vec::new());
97 }
98
99 let decompressed_bytes = match self.algorithm {
100 CompressionAlgorithm::Gzip => Self::decompress_gzip(&self.compressed_data)?,
101 CompressionAlgorithm::Lz4 => Self::decompress_lz4(&self.compressed_data)?,
102 CompressionAlgorithm::None => self.compressed_data.clone(),
103 };
104
105 if decompressed_bytes.len() != self.original_size {
107 return Err(std::io::Error::new(
108 std::io::ErrorKind::InvalidData,
109 "Decompressed data size doesn't match original size",
110 ));
111 }
112
113 let data = bytemuck::cast_slice(&decompressed_bytes).to_vec();
114 Ok(data)
115 }
116
117 pub fn compression_ratio(&self) -> f64 {
119 self.original_size as f64 / self.compressed_data.len() as f64
120 }
121
122 pub fn compressed_size(&self) -> usize {
124 self.compressed_data.len()
125 }
126
127 pub fn original_size(&self) -> usize {
129 self.original_size
130 }
131
132 pub fn algorithm(&self) -> CompressionAlgorithm {
134 self.algorithm
135 }
136
137 fn compress_gzip(data: &[u8], level: CompressionLevel) -> IoResult<Vec<u8>> {
138 let mut encoder = GzEncoder::new(Vec::new(), Compression::new(level.into()));
139 encoder.write_all(data)?;
140 encoder.finish()
141 }
142
143 fn decompress_gzip(data: &[u8]) -> IoResult<Vec<u8>> {
144 let mut decoder = GzDecoder::new(data);
145 let mut decompressed = Vec::new();
146 decoder.read_to_end(&mut decompressed)?;
147 Ok(decompressed)
148 }
149
150 fn compress_lz4(data: &[u8], level: CompressionLevel) -> IoResult<Vec<u8>> {
151 let mut encoder = Lz4EncoderBuilder::new()
152 .level(std::cmp::min(level.into(), 12))
153 .build(Vec::new())?;
154 encoder.write_all(data)?;
155 Ok(encoder.finish().0)
156 }
157
158 fn decompress_lz4(data: &[u8]) -> IoResult<Vec<u8>> {
159 let mut decoder = Lz4Decoder::new(data)?;
160 let mut decompressed = Vec::new();
161 decoder.read_to_end(&mut decompressed)?;
162 Ok(decompressed)
163 }
164}
165
166pub struct CompressedArray<T, D>
168where
169 D: Dimension,
170{
171 buffer: CompressedBuffer<T>,
172 shape: D,
173}
174
175impl<T, D> CompressedArray<T, D>
176where
177 T: bytemuck::Pod + bytemuck::Zeroable + Clone,
178 D: Dimension,
179{
180 pub fn from_array<S>(
182 array: &ArrayBase<S, D>,
183 algorithm: CompressionAlgorithm,
184 level: CompressionLevel,
185 ) -> Result<Self, CoreError>
186 where
187 S: Data<Elem = T>,
188 {
189 let data = if array.is_standard_layout() {
190 array.as_slice().expect("Operation failed").to_vec()
192 } else {
193 array.iter().cloned().collect()
195 };
196
197 let buffer = CompressedBuffer::new(&data, algorithm, level).map_err(|e| {
198 CoreError::CompressionError(crate::error::ErrorContext::new(e.to_string()))
199 })?;
200
201 Ok(Self {
202 buffer,
203 shape: array.raw_dim(),
204 })
205 }
206
207 pub fn to_array(&self) -> Result<Array<T, D>, CoreError> {
209 let data = self.buffer.decompress().map_err(|e| {
210 CoreError::CompressionError(crate::error::ErrorContext::new(e.to_string()))
211 })?;
212
213 Array::from_shape_vec(self.shape.clone(), data)
214 .map_err(|e| CoreError::InvalidShape(crate::error::ErrorContext::new(e.to_string())))
215 }
216
217 pub fn compression_ratio(&self) -> f64 {
219 self.buffer.compression_ratio()
220 }
221
222 pub fn compressed_size(&self) -> usize {
224 self.buffer.compressed_size()
225 }
226
227 pub fn original_size(&self) -> usize {
229 self.buffer.original_size()
230 }
231
232 pub const fn shape(&self) -> &D {
234 &self.shape
235 }
236}
237
238pub struct CompressedBufferPool<T> {
240 buffers: Vec<CompressedBuffer<T>>,
241 algorithm: CompressionAlgorithm,
242 compression_level: CompressionLevel,
243 total_original_size: usize,
244 total_compressed_size: usize,
245}
246
247impl<T> CompressedBufferPool<T>
248where
249 T: bytemuck::Pod + bytemuck::Zeroable,
250{
251 pub fn new(algorithm: CompressionAlgorithm, level: CompressionLevel) -> Self {
253 Self {
254 buffers: Vec::new(),
255 algorithm,
256 compression_level: level,
257 total_original_size: 0,
258 total_compressed_size: 0,
259 }
260 }
261
262 pub fn add_buffer(&mut self, data: &[T]) -> IoResult<usize> {
264 let buffer = CompressedBuffer::new(data, self.algorithm, self.compression_level)?;
265 self.total_original_size += buffer.original_size();
266 self.total_compressed_size += buffer.compressed_size();
267 let buffer_id = self.buffers.len();
268 self.buffers.push(buffer);
269 Ok(buffer_id)
270 }
271
272 pub fn get_buffer(&self, id: usize) -> Option<&CompressedBuffer<T>> {
274 self.buffers.get(id)
275 }
276
277 pub fn remove_buffer(&mut self, id: usize) -> Option<CompressedBuffer<T>> {
279 if id < self.buffers.len() {
280 let buffer = self.buffers.swap_remove(id);
281 self.total_original_size -= buffer.original_size();
282 self.total_compressed_size -= buffer.compressed_size();
283 Some(buffer)
284 } else {
285 None
286 }
287 }
288
289 pub fn total_compression_ratio(&self) -> f64 {
291 if self.total_compressed_size == 0 {
292 1.0
293 } else {
294 self.total_original_size as f64 / self.total_compressed_size as f64
295 }
296 }
297
298 pub fn memory_saved(&self) -> usize {
300 self.total_original_size
301 .saturating_sub(self.total_compressed_size)
302 }
303
304 pub fn stats(&self) -> CompressionStats {
306 CompressionStats {
307 buffer_count: self.buffers.len(),
308 total_original_size: self.total_original_size,
309 total_compressed_size: self.total_compressed_size,
310 compression_ratio: self.total_compression_ratio(),
311 memory_saved: self.memory_saved(),
312 algorithm: self.algorithm,
313 }
314 }
315
316 pub fn clear(&mut self) {
318 self.buffers.clear();
319 self.total_original_size = 0;
320 self.total_compressed_size = 0;
321 }
322}
323
324#[derive(Debug, Clone)]
326pub struct CompressionStats {
327 pub buffer_count: usize,
328 pub total_original_size: usize,
329 pub total_compressed_size: usize,
330 pub compression_ratio: f64,
331 pub memory_saved: usize,
332 pub algorithm: CompressionAlgorithm,
333}
334
335impl std::fmt::Display for CompressionStats {
336 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
337 write!(
338 f,
339 "Compression Stats:\n\
340 - Algorithm: {:?}\n\
341 - Buffers: {}\n\
342 - Original Size: {} bytes ({:.2} MB)\n\
343 - Compressed Size: {} bytes ({:.2} MB)\n\
344 - Compression Ratio: {:.2}x\n\
345 - Memory Saved: {} bytes ({:.2} MB)",
346 self.algorithm,
347 self.buffer_count,
348 self.total_original_size,
349 self.total_original_size as f64 / 1024.0 / 1024.0,
350 self.total_compressed_size,
351 self.total_compressed_size as f64 / 1024.0 / 1024.0,
352 self.compression_ratio,
353 self.memory_saved,
354 self.memory_saved as f64 / 1024.0 / 1024.0
355 )
356 }
357}
358
359pub struct AdaptiveCompression;
361
362impl AdaptiveCompression {
363 pub fn choose_algorithm<T>(data: &[T]) -> CompressionAlgorithm
365 where
366 T: bytemuck::Pod + bytemuck::Zeroable,
367 {
368 let bytes = bytemuck::cast_slice(data);
369
370 let sample_size = std::cmp::min(bytes.len(), 4096); let sample = &bytes[..sample_size];
373
374 let gzip_ratio = Self::estimate_compression_ratio(sample, CompressionAlgorithm::Gzip);
375 let lz4_ratio = Self::estimate_compression_ratio(sample, CompressionAlgorithm::Lz4);
376
377 if gzip_ratio > 2.0 {
379 CompressionAlgorithm::Gzip
380 } else if lz4_ratio > 1.5 {
381 CompressionAlgorithm::Lz4
382 } else {
383 CompressionAlgorithm::None
384 }
385 }
386
387 fn estimate_compression_ratio(data: &[u8], algorithm: CompressionAlgorithm) -> f64 {
388 match algorithm {
389 CompressionAlgorithm::Gzip => {
390 if let Ok(compressed) =
391 CompressedBuffer::<u8>::compress_gzip(data, CompressionLevel::Fast)
392 {
393 data.len() as f64 / compressed.len() as f64
394 } else {
395 1.0
396 }
397 }
398 CompressionAlgorithm::Lz4 => {
399 if let Ok(compressed) =
400 CompressedBuffer::<u8>::compress_lz4(data, CompressionLevel::Fast)
401 {
402 data.len() as f64 / compressed.len() as f64
403 } else {
404 1.0
405 }
406 }
407 CompressionAlgorithm::None => 1.0,
408 }
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415 use ::ndarray::Array2;
416
417 #[test]
418 fn test_compressed_buffer_basic() {
419 let data: Vec<f64> = (0..1000).map(|i| i as f64).collect();
420
421 let buffer =
422 CompressedBuffer::new(&data, CompressionAlgorithm::Gzip, CompressionLevel::Default)
423 .expect("Failed to create compressed buffer");
424
425 let decompressed = buffer.decompress().expect("Failed to decompress");
426 assert_eq!(data, decompressed);
427 assert!(buffer.compression_ratio() > 1.0);
428 }
429
430 #[test]
431 fn test_compressed_array() {
432 let array = Array2::<f64>::zeros((100, 100));
433
434 let compressed =
435 CompressedArray::from_array(&array, CompressionAlgorithm::Lz4, CompressionLevel::Fast)
436 .expect("Failed to create compressed array");
437
438 let decompressed = compressed.to_array().expect("Failed to decompress array");
439 assert_eq!(array, decompressed);
440 }
441
442 #[test]
443 fn test_compressed_buffer_pool() {
444 let mut pool =
445 CompressedBufferPool::new(CompressionAlgorithm::Gzip, CompressionLevel::Default);
446
447 let data1: Vec<f32> = vec![1.0; 1000];
448 let data2: Vec<f32> = (0..1000).map(|i| i as f32).collect();
449
450 let id1 = pool.add_buffer(&data1).expect("Failed to add buffer 1");
451 let id2 = pool.add_buffer(&data2).expect("Failed to add buffer 2");
452
453 assert_eq!(pool.stats().buffer_count, 2);
454 assert!(pool.total_compression_ratio() > 1.0);
455
456 let buffer1 = pool.get_buffer(id1).expect("Failed to get buffer 1");
457 let decompressed1 = buffer1.decompress().expect("Failed to decompress buffer 1");
458 assert_eq!(data1, decompressed1);
459 }
460
461 #[test]
462 fn test_adaptive_compression() {
463 let compressible_data: Vec<f64> = vec![0.0; 10000];
465 let algorithm = AdaptiveCompression::choose_algorithm(&compressible_data);
466 assert!(matches!(algorithm, CompressionAlgorithm::Gzip));
467
468 let random_data: Vec<u8> = (0..1000).map(|i| (i * 17 + 42) as u8).collect();
470 let algorithm = AdaptiveCompression::choose_algorithm(&random_data);
471 assert!(matches!(
474 algorithm,
475 CompressionAlgorithm::Gzip | CompressionAlgorithm::Lz4 | CompressionAlgorithm::None
476 ));
477 }
478
479 #[test]
480 fn test_compression_levels() {
481 let data: Vec<f64> = vec![1.0; 1000];
482
483 let levels = vec![
485 CompressionLevel::Fast,
486 CompressionLevel::Default,
487 CompressionLevel::Best,
488 CompressionLevel::Custom(5),
489 ];
490
491 for level in levels {
492 let buffer = CompressedBuffer::new(&data, CompressionAlgorithm::Gzip, level)
493 .expect("Failed to create buffer");
494 let decompressed = buffer.decompress().expect("Failed to decompress");
495 assert_eq!(data, decompressed);
496 }
497 }
498
499 #[test]
500 fn test_compression_level_conversion() {
501 assert_eq!(u32::from(CompressionLevel::Fast), 1);
502 assert_eq!(u32::from(CompressionLevel::Default), 6);
503 assert_eq!(u32::from(CompressionLevel::Best), 9);
504 assert_eq!(u32::from(CompressionLevel::Custom(7)), 7);
505 }
506
507 #[test]
508 fn test_all_compression_algorithms() {
509 let data: Vec<u32> = (0..100).collect();
510
511 let algorithms = vec![
512 CompressionAlgorithm::Gzip,
513 CompressionAlgorithm::Lz4,
514 CompressionAlgorithm::None,
515 ];
516
517 for algo in algorithms {
518 let buffer = CompressedBuffer::new(&data, algo, CompressionLevel::Default)
519 .expect("Failed to create buffer");
520
521 assert_eq!(buffer.algorithm(), algo);
522 assert_eq!(
523 buffer.original_size(),
524 data.len() * std::mem::size_of::<u32>()
525 );
526
527 let decompressed = buffer.decompress().expect("Failed to decompress");
528 assert_eq!(data, decompressed);
529
530 if algo == CompressionAlgorithm::None {
531 assert_eq!(buffer.compression_ratio(), 1.0);
532 }
533 }
534 }
535
536 #[test]
537 fn test_compressed_buffer_lz4() {
538 let data: Vec<i32> = (0..10000).map(|i| i % 10).collect();
540
541 let buffer =
542 CompressedBuffer::new(&data, CompressionAlgorithm::Lz4, CompressionLevel::Fast)
543 .expect("Failed to create LZ4 buffer");
544
545 let decompressed = buffer.decompress().expect("Failed to decompress");
546 assert_eq!(data, decompressed);
547
548 assert!(buffer.original_size() > 0);
550 assert!(buffer.compressed_size() > 0);
551
552 let compression_ratio = buffer.compressed_size() as f64 / buffer.original_size() as f64;
554 assert!(
555 compression_ratio < 1.0,
556 "Expected compression ratio < 1.0, got {}",
557 compression_ratio
558 );
559 }
560
561 #[test]
562 fn test_compressed_array_non_standard_layout() {
563 let array = Array2::<f64>::from_shape_fn((50, 50), |(i, j)| (i * 50 + j) as f64);
565 let transposed = array.t();
566
567 let compressed = CompressedArray::from_array(
568 &transposed,
569 CompressionAlgorithm::Gzip,
570 CompressionLevel::Default,
571 )
572 .expect("Failed to create compressed array");
573
574 let decompressed = compressed.to_array().expect("Failed to decompress");
575 assert_eq!(transposed, decompressed);
576 assert_eq!(compressed.shape().slice(), transposed.shape());
577 }
578
579 #[test]
580 fn test_compressed_buffer_pool_operations() {
581 let mut pool = CompressedBufferPool::new(CompressionAlgorithm::Lz4, CompressionLevel::Fast);
582
583 assert_eq!(pool.stats().buffer_count, 0);
585 assert_eq!(pool.total_compression_ratio(), 1.0);
586 assert_eq!(pool.memory_saved(), 0);
587
588 let data1: Vec<f64> = vec![0.0; 500];
590 let data2: Vec<f64> = (0..500).map(|i| i as f64).collect();
591 let data3: Vec<f64> = vec![std::f64::consts::PI; 500];
592
593 let id1 = pool.add_buffer(&data1).expect("Failed to add buffer 1");
594 let id2 = pool.add_buffer(&data2).expect("Failed to add buffer 2");
595 let id3 = pool.add_buffer(&data3).expect("Failed to add buffer 3");
596
597 assert_eq!(pool.stats().buffer_count, 3);
598
599 assert!(pool.get_buffer(id1).is_some());
601 assert!(pool.get_buffer(id2).is_some());
602 assert!(pool.get_buffer(id3).is_some());
603 assert!(pool.get_buffer(100).is_none());
604
605 let removed = pool.remove_buffer(id2).expect("Failed to remove buffer");
607 let decompressed = removed.decompress().expect("Failed to decompress");
608 assert_eq!(data2, decompressed);
609 assert_eq!(pool.stats().buffer_count, 2);
610
611 assert!(pool.remove_buffer(100).is_none());
613
614 pool.clear();
616 assert_eq!(pool.stats().buffer_count, 0);
617 assert_eq!(pool.total_compression_ratio(), 1.0);
618 }
619
620 #[test]
621 fn test_compression_stats_display() {
622 let stats = CompressionStats {
623 buffer_count: 5,
624 total_original_size: 10_485_760, total_compressed_size: 2_097_152, compression_ratio: 5.0,
627 memory_saved: 8_388_608, algorithm: CompressionAlgorithm::Gzip,
629 };
630
631 let display = format!("{stats}");
632 assert!(display.contains("Algorithm: Gzip"));
633 assert!(display.contains("Buffers: 5"));
634 assert!(display.contains("10.00 MB"));
635 assert!(display.contains("2.00 MB"));
636 assert!(display.contains("5.00x"));
637 assert!(display.contains("8.00 MB"));
638 }
639
640 #[test]
641 fn test_decompression_size_mismatch() {
642 let data = vec![1u8, 2, 3, 4];
644 let mut buffer =
645 CompressedBuffer::new(&data, CompressionAlgorithm::None, CompressionLevel::Default)
646 .expect("Failed to create buffer");
647
648 buffer.original_size = 10; let result = buffer.decompress();
652 assert!(result.is_err());
653 let err = result.unwrap_err();
654 assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
655 }
656
657 #[test]
658 fn test_compression_algorithm_equality() {
659 assert_eq!(CompressionAlgorithm::Gzip, CompressionAlgorithm::Gzip);
660 assert_ne!(CompressionAlgorithm::Gzip, CompressionAlgorithm::Lz4);
661 assert_ne!(CompressionAlgorithm::Lz4, CompressionAlgorithm::None);
662 }
663
664 #[test]
665 fn test_compressed_array_accessors() {
666 let array = Array2::<f32>::from_elem((10, 20), 42.0);
667
668 let compressed =
669 CompressedArray::from_array(&array, CompressionAlgorithm::Gzip, CompressionLevel::Best)
670 .expect("Failed to create compressed array");
671
672 assert!(compressed.compression_ratio() > 1.0);
673 assert!(compressed.compressed_size() < compressed.original_size());
674 assert_eq!(compressed.shape(), &array.raw_dim());
675 }
676
677 #[test]
678 fn test_compression_with_empty_data() {
679 let data: Vec<f64> = vec![];
680
681 let buffer =
683 CompressedBuffer::new(&data, CompressionAlgorithm::None, CompressionLevel::Default)
684 .expect("Failed to create buffer");
685
686 assert_eq!(buffer.original_size(), 0);
687 let decompressed = buffer.decompress().expect("Failed to decompress");
688 assert_eq!(data, decompressed);
689
690 let minimal_data: Vec<f64> = vec![1.0];
693 let buffer2 = CompressedBuffer::new(
694 &minimal_data,
695 CompressionAlgorithm::Gzip,
696 CompressionLevel::Default,
697 )
698 .expect("Failed to create buffer with minimal data");
699
700 assert_eq!(buffer2.original_size(), std::mem::size_of::<f64>());
701 let decompressed2 = buffer2.decompress().expect("Failed to decompress");
702 assert_eq!(minimal_data, decompressed2);
703 }
704
705 #[test]
706 fn test_lz4_compression_level_clamping() {
707 let data: Vec<u64> = vec![12345; 100];
708
709 let buffer = CompressedBuffer::new(
711 &data,
712 CompressionAlgorithm::Lz4,
713 CompressionLevel::Custom(20),
714 )
715 .expect("Failed to create buffer");
716
717 let decompressed = buffer.decompress().expect("Failed to decompress");
718 assert_eq!(data, decompressed);
719 }
720
721 #[test]
722 fn test_adaptive_compression_small_data() {
723 let small_data: Vec<u8> = vec![1, 2, 3, 4, 5];
725 let algorithm = AdaptiveCompression::choose_algorithm(&small_data);
726 assert!(matches!(
728 algorithm,
729 CompressionAlgorithm::None | CompressionAlgorithm::Lz4
730 ));
731 }
732
733 #[test]
734 fn test_compression_types() {
735 let u8_data: Vec<u8> = vec![255; 100];
737 let u16_data: Vec<u16> = vec![65535; 100];
738 let i64_data: Vec<i64> = vec![-1; 100];
739
740 let u8_buffer = CompressedBuffer::new(
741 &u8_data,
742 CompressionAlgorithm::Gzip,
743 CompressionLevel::Default,
744 )
745 .expect("Failed with u8");
746 let u16_buffer =
747 CompressedBuffer::new(&u16_data, CompressionAlgorithm::Lz4, CompressionLevel::Fast)
748 .expect("Failed with u16");
749 let i64_buffer = CompressedBuffer::new(
750 &i64_data,
751 CompressionAlgorithm::None,
752 CompressionLevel::Best,
753 )
754 .expect("Failed with i64");
755 }
756}