1use crate::error::CoreError;
7use ::ndarray::{Array, ArrayBase, Data, Dimension};
8use oxiarc_deflate::{gzip_compress, gzip_decompress};
9use oxiarc_lz4;
10use std::io::Result as IoResult;
11use std::marker::PhantomData;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum CompressionAlgorithm {
16 Gzip,
18 Lz4,
20 None,
22}
23
24#[derive(Debug, Clone, Copy)]
26pub enum CompressionLevel {
27 Fast,
29 Default,
31 Best,
33 Custom(u32),
35}
36
37impl From<CompressionLevel> for u32 {
38 fn from(level: CompressionLevel) -> Self {
39 match level {
40 CompressionLevel::Fast => 1,
41 CompressionLevel::Default => 6,
42 CompressionLevel::Best => 9,
43 CompressionLevel::Custom(level) => level,
44 }
45 }
46}
47
48pub struct CompressedBuffer<T> {
50 compressed_data: Vec<u8>,
51 algorithm: CompressionAlgorithm,
52 #[allow(dead_code)]
53 compression_level: CompressionLevel,
54 original_size: usize,
55 phantom: PhantomData<T>,
56}
57
58impl<T> CompressedBuffer<T>
59where
60 T: bytemuck::Pod + bytemuck::Zeroable,
61{
62 pub fn new(
64 data: &[T],
65 algorithm: CompressionAlgorithm,
66 level: CompressionLevel,
67 ) -> IoResult<Self> {
68 let original_size = std::mem::size_of_val(data);
69
70 let compressed_data = if data.is_empty() {
72 Vec::new()
73 } else {
74 let bytes = bytemuck::cast_slice(data);
75 match algorithm {
76 CompressionAlgorithm::Gzip => Self::compress_gzip(bytes, level)?,
77 CompressionAlgorithm::Lz4 => Self::compress_lz4(bytes, level)?,
78 CompressionAlgorithm::None => bytes.to_vec(),
79 }
80 };
81
82 Ok(Self {
83 compressed_data,
84 algorithm,
85 compression_level: level,
86 original_size,
87 phantom: PhantomData,
88 })
89 }
90
91 pub fn decompress(&self) -> IoResult<Vec<T>> {
93 if self.original_size == 0 {
95 return Ok(Vec::new());
96 }
97
98 let decompressed_bytes = match self.algorithm {
99 CompressionAlgorithm::Gzip => Self::decompress_gzip(&self.compressed_data)?,
100 CompressionAlgorithm::Lz4 => Self::decompress_lz4(&self.compressed_data)?,
101 CompressionAlgorithm::None => self.compressed_data.clone(),
102 };
103
104 if decompressed_bytes.len() != self.original_size {
106 return Err(std::io::Error::new(
107 std::io::ErrorKind::InvalidData,
108 "Decompressed data size doesn't match original size",
109 ));
110 }
111
112 let data = bytemuck::cast_slice(&decompressed_bytes).to_vec();
113 Ok(data)
114 }
115
116 pub fn compression_ratio(&self) -> f64 {
118 self.original_size as f64 / self.compressed_data.len() as f64
119 }
120
121 pub fn compressed_size(&self) -> usize {
123 self.compressed_data.len()
124 }
125
126 pub fn original_size(&self) -> usize {
128 self.original_size
129 }
130
131 pub fn algorithm(&self) -> CompressionAlgorithm {
133 self.algorithm
134 }
135
136 fn compress_gzip(data: &[u8], level: CompressionLevel) -> IoResult<Vec<u8>> {
137 let level_u8: u32 = level.into();
138 gzip_compress(data, level_u8.min(9) as u8).map_err(|e| std::io::Error::other(e.to_string()))
139 }
140
141 fn decompress_gzip(data: &[u8]) -> IoResult<Vec<u8>> {
142 gzip_decompress(data).map_err(|e| std::io::Error::other(e.to_string()))
143 }
144
145 fn compress_lz4(data: &[u8], _level: CompressionLevel) -> IoResult<Vec<u8>> {
146 oxiarc_lz4::compress(data).map_err(|e| std::io::Error::other(e.to_string()))
147 }
148
149 fn decompress_lz4(data: &[u8]) -> IoResult<Vec<u8>> {
150 oxiarc_lz4::decompress(data, 256 * 1024 * 1024)
151 .map_err(|e| std::io::Error::other(e.to_string()))
152 }
153}
154
155pub struct CompressedArray<T, D>
157where
158 D: Dimension,
159{
160 buffer: CompressedBuffer<T>,
161 shape: D,
162}
163
164impl<T, D> CompressedArray<T, D>
165where
166 T: bytemuck::Pod + bytemuck::Zeroable + Clone,
167 D: Dimension,
168{
169 pub fn from_array<S>(
171 array: &ArrayBase<S, D>,
172 algorithm: CompressionAlgorithm,
173 level: CompressionLevel,
174 ) -> Result<Self, CoreError>
175 where
176 S: Data<Elem = T>,
177 {
178 let data = if array.is_standard_layout() {
179 array.as_slice().expect("Operation failed").to_vec()
181 } else {
182 array.iter().cloned().collect()
184 };
185
186 let buffer = CompressedBuffer::new(&data, algorithm, level).map_err(|e| {
187 CoreError::CompressionError(crate::error::ErrorContext::new(e.to_string()))
188 })?;
189
190 Ok(Self {
191 buffer,
192 shape: array.raw_dim(),
193 })
194 }
195
196 pub fn to_array(&self) -> Result<Array<T, D>, CoreError> {
198 let data = self.buffer.decompress().map_err(|e| {
199 CoreError::CompressionError(crate::error::ErrorContext::new(e.to_string()))
200 })?;
201
202 Array::from_shape_vec(self.shape.clone(), data)
203 .map_err(|e| CoreError::InvalidShape(crate::error::ErrorContext::new(e.to_string())))
204 }
205
206 pub fn compression_ratio(&self) -> f64 {
208 self.buffer.compression_ratio()
209 }
210
211 pub fn compressed_size(&self) -> usize {
213 self.buffer.compressed_size()
214 }
215
216 pub fn original_size(&self) -> usize {
218 self.buffer.original_size()
219 }
220
221 pub const fn shape(&self) -> &D {
223 &self.shape
224 }
225}
226
227pub struct CompressedBufferPool<T> {
229 buffers: Vec<CompressedBuffer<T>>,
230 algorithm: CompressionAlgorithm,
231 compression_level: CompressionLevel,
232 total_original_size: usize,
233 total_compressed_size: usize,
234}
235
236impl<T> CompressedBufferPool<T>
237where
238 T: bytemuck::Pod + bytemuck::Zeroable,
239{
240 pub fn new(algorithm: CompressionAlgorithm, level: CompressionLevel) -> Self {
242 Self {
243 buffers: Vec::new(),
244 algorithm,
245 compression_level: level,
246 total_original_size: 0,
247 total_compressed_size: 0,
248 }
249 }
250
251 pub fn add_buffer(&mut self, data: &[T]) -> IoResult<usize> {
253 let buffer = CompressedBuffer::new(data, self.algorithm, self.compression_level)?;
254 self.total_original_size += buffer.original_size();
255 self.total_compressed_size += buffer.compressed_size();
256 let buffer_id = self.buffers.len();
257 self.buffers.push(buffer);
258 Ok(buffer_id)
259 }
260
261 pub fn get_buffer(&self, id: usize) -> Option<&CompressedBuffer<T>> {
263 self.buffers.get(id)
264 }
265
266 pub fn remove_buffer(&mut self, id: usize) -> Option<CompressedBuffer<T>> {
268 if id < self.buffers.len() {
269 let buffer = self.buffers.swap_remove(id);
270 self.total_original_size -= buffer.original_size();
271 self.total_compressed_size -= buffer.compressed_size();
272 Some(buffer)
273 } else {
274 None
275 }
276 }
277
278 pub fn total_compression_ratio(&self) -> f64 {
280 if self.total_compressed_size == 0 {
281 1.0
282 } else {
283 self.total_original_size as f64 / self.total_compressed_size as f64
284 }
285 }
286
287 pub fn memory_saved(&self) -> usize {
289 self.total_original_size
290 .saturating_sub(self.total_compressed_size)
291 }
292
293 pub fn stats(&self) -> CompressionStats {
295 CompressionStats {
296 buffer_count: self.buffers.len(),
297 total_original_size: self.total_original_size,
298 total_compressed_size: self.total_compressed_size,
299 compression_ratio: self.total_compression_ratio(),
300 memory_saved: self.memory_saved(),
301 algorithm: self.algorithm,
302 }
303 }
304
305 pub fn clear(&mut self) {
307 self.buffers.clear();
308 self.total_original_size = 0;
309 self.total_compressed_size = 0;
310 }
311}
312
313#[derive(Debug, Clone)]
315pub struct CompressionStats {
316 pub buffer_count: usize,
317 pub total_original_size: usize,
318 pub total_compressed_size: usize,
319 pub compression_ratio: f64,
320 pub memory_saved: usize,
321 pub algorithm: CompressionAlgorithm,
322}
323
324impl std::fmt::Display for CompressionStats {
325 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
326 write!(
327 f,
328 "Compression Stats:\n\
329 - Algorithm: {:?}\n\
330 - Buffers: {}\n\
331 - Original Size: {} bytes ({:.2} MB)\n\
332 - Compressed Size: {} bytes ({:.2} MB)\n\
333 - Compression Ratio: {:.2}x\n\
334 - Memory Saved: {} bytes ({:.2} MB)",
335 self.algorithm,
336 self.buffer_count,
337 self.total_original_size,
338 self.total_original_size as f64 / 1024.0 / 1024.0,
339 self.total_compressed_size,
340 self.total_compressed_size as f64 / 1024.0 / 1024.0,
341 self.compression_ratio,
342 self.memory_saved,
343 self.memory_saved as f64 / 1024.0 / 1024.0
344 )
345 }
346}
347
348pub struct AdaptiveCompression;
350
351impl AdaptiveCompression {
352 pub fn choose_algorithm<T>(data: &[T]) -> CompressionAlgorithm
354 where
355 T: bytemuck::Pod + bytemuck::Zeroable,
356 {
357 let bytes = bytemuck::cast_slice(data);
358
359 let sample_size = std::cmp::min(bytes.len(), 4096); let sample = &bytes[..sample_size];
362
363 let gzip_ratio = Self::estimate_compression_ratio(sample, CompressionAlgorithm::Gzip);
364 let lz4_ratio = Self::estimate_compression_ratio(sample, CompressionAlgorithm::Lz4);
365
366 if gzip_ratio > 2.0 {
368 CompressionAlgorithm::Gzip
369 } else if lz4_ratio > 1.5 {
370 CompressionAlgorithm::Lz4
371 } else {
372 CompressionAlgorithm::None
373 }
374 }
375
376 fn estimate_compression_ratio(data: &[u8], algorithm: CompressionAlgorithm) -> f64 {
377 match algorithm {
378 CompressionAlgorithm::Gzip => {
379 if let Ok(compressed) =
380 CompressedBuffer::<u8>::compress_gzip(data, CompressionLevel::Fast)
381 {
382 data.len() as f64 / compressed.len() as f64
383 } else {
384 1.0
385 }
386 }
387 CompressionAlgorithm::Lz4 => {
388 if let Ok(compressed) =
389 CompressedBuffer::<u8>::compress_lz4(data, CompressionLevel::Fast)
390 {
391 data.len() as f64 / compressed.len() as f64
392 } else {
393 1.0
394 }
395 }
396 CompressionAlgorithm::None => 1.0,
397 }
398 }
399}
400
401#[cfg(test)]
402mod tests {
403 use super::*;
404 use ::ndarray::Array2;
405
406 #[test]
407 fn test_compressed_buffer_basic() {
408 let data: Vec<f64> = (0..1000).map(|i| i as f64).collect();
409
410 let buffer =
411 CompressedBuffer::new(&data, CompressionAlgorithm::Gzip, CompressionLevel::Default)
412 .expect("Failed to create compressed buffer");
413
414 let decompressed = buffer.decompress().expect("Failed to decompress");
415 assert_eq!(data, decompressed);
416 assert!(buffer.compression_ratio() > 1.0);
417 }
418
419 #[test]
420 fn test_compressed_array() {
421 let array = Array2::<f64>::zeros((100, 100));
422
423 let compressed =
424 CompressedArray::from_array(&array, CompressionAlgorithm::Lz4, CompressionLevel::Fast)
425 .expect("Failed to create compressed array");
426
427 let decompressed = compressed.to_array().expect("Failed to decompress array");
428 assert_eq!(array, decompressed);
429 }
430
431 #[test]
432 fn test_compressed_buffer_pool() {
433 let mut pool =
434 CompressedBufferPool::new(CompressionAlgorithm::Gzip, CompressionLevel::Default);
435
436 let data1: Vec<f32> = vec![1.0; 1000];
437 let data2: Vec<f32> = (0..1000).map(|i| i as f32).collect();
438
439 let id1 = pool.add_buffer(&data1).expect("Failed to add buffer 1");
440 let id2 = pool.add_buffer(&data2).expect("Failed to add buffer 2");
441
442 assert_eq!(pool.stats().buffer_count, 2);
443 assert!(pool.total_compression_ratio() > 1.0);
444
445 let buffer1 = pool.get_buffer(id1).expect("Failed to get buffer 1");
446 let decompressed1 = buffer1.decompress().expect("Failed to decompress buffer 1");
447 assert_eq!(data1, decompressed1);
448 }
449
450 #[test]
451 fn test_adaptive_compression() {
452 let compressible_data: Vec<f64> = vec![0.0; 10000];
454 let algorithm = AdaptiveCompression::choose_algorithm(&compressible_data);
455 assert!(matches!(algorithm, CompressionAlgorithm::Gzip));
456
457 let random_data: Vec<u8> = (0..1000).map(|i| (i * 17 + 42) as u8).collect();
459 let algorithm = AdaptiveCompression::choose_algorithm(&random_data);
460 assert!(matches!(
463 algorithm,
464 CompressionAlgorithm::Gzip | CompressionAlgorithm::Lz4 | CompressionAlgorithm::None
465 ));
466 }
467
468 #[test]
469 fn test_compression_levels() {
470 let data: Vec<f64> = vec![1.0; 1000];
471
472 let levels = vec![
474 CompressionLevel::Fast,
475 CompressionLevel::Default,
476 CompressionLevel::Best,
477 CompressionLevel::Custom(5),
478 ];
479
480 for level in levels {
481 let buffer = CompressedBuffer::new(&data, CompressionAlgorithm::Gzip, level)
482 .expect("Failed to create buffer");
483 let decompressed = buffer.decompress().expect("Failed to decompress");
484 assert_eq!(data, decompressed);
485 }
486 }
487
488 #[test]
489 fn test_compression_level_conversion() {
490 assert_eq!(u32::from(CompressionLevel::Fast), 1);
491 assert_eq!(u32::from(CompressionLevel::Default), 6);
492 assert_eq!(u32::from(CompressionLevel::Best), 9);
493 assert_eq!(u32::from(CompressionLevel::Custom(7)), 7);
494 }
495
496 #[test]
497 fn test_all_compression_algorithms() {
498 let data: Vec<u32> = (0..100).collect();
499
500 let algorithms = vec![
501 CompressionAlgorithm::Gzip,
502 CompressionAlgorithm::Lz4,
503 CompressionAlgorithm::None,
504 ];
505
506 for algo in algorithms {
507 let buffer = CompressedBuffer::new(&data, algo, CompressionLevel::Default)
508 .expect("Failed to create buffer");
509
510 assert_eq!(buffer.algorithm(), algo);
511 assert_eq!(
512 buffer.original_size(),
513 data.len() * std::mem::size_of::<u32>()
514 );
515
516 let decompressed = buffer.decompress().expect("Failed to decompress");
517 assert_eq!(data, decompressed);
518
519 if algo == CompressionAlgorithm::None {
520 assert_eq!(buffer.compression_ratio(), 1.0);
521 }
522 }
523 }
524
525 #[test]
526 fn test_compressed_buffer_lz4() {
527 let data: Vec<i32> = (0..10000).map(|i| i % 10).collect();
529
530 let buffer =
531 CompressedBuffer::new(&data, CompressionAlgorithm::Lz4, CompressionLevel::Fast)
532 .expect("Failed to create LZ4 buffer");
533
534 let decompressed = buffer.decompress().expect("Failed to decompress");
535 assert_eq!(data, decompressed);
536
537 assert!(buffer.original_size() > 0);
539 assert!(buffer.compressed_size() > 0);
540
541 let compression_ratio = buffer.compressed_size() as f64 / buffer.original_size() as f64;
543 assert!(
544 compression_ratio < 1.0,
545 "Expected compression ratio < 1.0, got {}",
546 compression_ratio
547 );
548 }
549
550 #[test]
551 fn test_compressed_array_non_standard_layout() {
552 let array = Array2::<f64>::from_shape_fn((50, 50), |(i, j)| (i * 50 + j) as f64);
554 let transposed = array.t();
555
556 let compressed = CompressedArray::from_array(
557 &transposed,
558 CompressionAlgorithm::Gzip,
559 CompressionLevel::Default,
560 )
561 .expect("Failed to create compressed array");
562
563 let decompressed = compressed.to_array().expect("Failed to decompress");
564 assert_eq!(transposed, decompressed);
565 assert_eq!(compressed.shape().slice(), transposed.shape());
566 }
567
568 #[test]
569 fn test_compressed_buffer_pool_operations() {
570 let mut pool = CompressedBufferPool::new(CompressionAlgorithm::Lz4, CompressionLevel::Fast);
571
572 assert_eq!(pool.stats().buffer_count, 0);
574 assert_eq!(pool.total_compression_ratio(), 1.0);
575 assert_eq!(pool.memory_saved(), 0);
576
577 let data1: Vec<f64> = vec![0.0; 500];
579 let data2: Vec<f64> = (0..500).map(|i| i as f64).collect();
580 let data3: Vec<f64> = vec![std::f64::consts::PI; 500];
581
582 let id1 = pool.add_buffer(&data1).expect("Failed to add buffer 1");
583 let id2 = pool.add_buffer(&data2).expect("Failed to add buffer 2");
584 let id3 = pool.add_buffer(&data3).expect("Failed to add buffer 3");
585
586 assert_eq!(pool.stats().buffer_count, 3);
587
588 assert!(pool.get_buffer(id1).is_some());
590 assert!(pool.get_buffer(id2).is_some());
591 assert!(pool.get_buffer(id3).is_some());
592 assert!(pool.get_buffer(100).is_none());
593
594 let removed = pool.remove_buffer(id2).expect("Failed to remove buffer");
596 let decompressed = removed.decompress().expect("Failed to decompress");
597 assert_eq!(data2, decompressed);
598 assert_eq!(pool.stats().buffer_count, 2);
599
600 assert!(pool.remove_buffer(100).is_none());
602
603 pool.clear();
605 assert_eq!(pool.stats().buffer_count, 0);
606 assert_eq!(pool.total_compression_ratio(), 1.0);
607 }
608
609 #[test]
610 fn test_compression_stats_display() {
611 let stats = CompressionStats {
612 buffer_count: 5,
613 total_original_size: 10_485_760, total_compressed_size: 2_097_152, compression_ratio: 5.0,
616 memory_saved: 8_388_608, algorithm: CompressionAlgorithm::Gzip,
618 };
619
620 let display = format!("{stats}");
621 assert!(display.contains("Algorithm: Gzip"));
622 assert!(display.contains("Buffers: 5"));
623 assert!(display.contains("10.00 MB"));
624 assert!(display.contains("2.00 MB"));
625 assert!(display.contains("5.00x"));
626 assert!(display.contains("8.00 MB"));
627 }
628
629 #[test]
630 fn test_decompression_size_mismatch() {
631 let data = vec![1u8, 2, 3, 4];
633 let mut buffer =
634 CompressedBuffer::new(&data, CompressionAlgorithm::None, CompressionLevel::Default)
635 .expect("Failed to create buffer");
636
637 buffer.original_size = 10; let result = buffer.decompress();
641 assert!(result.is_err());
642 let err = result.unwrap_err();
643 assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
644 }
645
646 #[test]
647 fn test_compression_algorithm_equality() {
648 assert_eq!(CompressionAlgorithm::Gzip, CompressionAlgorithm::Gzip);
649 assert_ne!(CompressionAlgorithm::Gzip, CompressionAlgorithm::Lz4);
650 assert_ne!(CompressionAlgorithm::Lz4, CompressionAlgorithm::None);
651 }
652
653 #[test]
654 fn test_compressed_array_accessors() {
655 let array = Array2::<f32>::from_elem((10, 20), 42.0);
656
657 let compressed =
658 CompressedArray::from_array(&array, CompressionAlgorithm::Gzip, CompressionLevel::Best)
659 .expect("Failed to create compressed array");
660
661 assert!(compressed.compression_ratio() > 1.0);
662 assert!(compressed.compressed_size() < compressed.original_size());
663 assert_eq!(compressed.shape(), &array.raw_dim());
664 }
665
666 #[test]
667 fn test_compression_with_empty_data() {
668 let data: Vec<f64> = vec![];
669
670 let buffer =
672 CompressedBuffer::new(&data, CompressionAlgorithm::None, CompressionLevel::Default)
673 .expect("Failed to create buffer");
674
675 assert_eq!(buffer.original_size(), 0);
676 let decompressed = buffer.decompress().expect("Failed to decompress");
677 assert_eq!(data, decompressed);
678
679 let minimal_data: Vec<f64> = vec![1.0];
682 let buffer2 = CompressedBuffer::new(
683 &minimal_data,
684 CompressionAlgorithm::Gzip,
685 CompressionLevel::Default,
686 )
687 .expect("Failed to create buffer with minimal data");
688
689 assert_eq!(buffer2.original_size(), std::mem::size_of::<f64>());
690 let decompressed2 = buffer2.decompress().expect("Failed to decompress");
691 assert_eq!(minimal_data, decompressed2);
692 }
693
694 #[test]
695 fn test_lz4_compression_level_clamping() {
696 let data: Vec<u64> = vec![12345; 100];
697
698 let buffer = CompressedBuffer::new(
700 &data,
701 CompressionAlgorithm::Lz4,
702 CompressionLevel::Custom(20),
703 )
704 .expect("Failed to create buffer");
705
706 let decompressed = buffer.decompress().expect("Failed to decompress");
707 assert_eq!(data, decompressed);
708 }
709
710 #[test]
711 fn test_adaptive_compression_small_data() {
712 let small_data: Vec<u8> = vec![1, 2, 3, 4, 5];
714 let algorithm = AdaptiveCompression::choose_algorithm(&small_data);
715 assert!(matches!(
717 algorithm,
718 CompressionAlgorithm::None | CompressionAlgorithm::Lz4
719 ));
720 }
721
722 #[test]
723 fn test_compression_types() {
724 let u8_data: Vec<u8> = vec![255; 100];
726 let u16_data: Vec<u16> = vec![65535; 100];
727 let i64_data: Vec<i64> = vec![-1; 100];
728
729 let u8_buffer = CompressedBuffer::new(
730 &u8_data,
731 CompressionAlgorithm::Gzip,
732 CompressionLevel::Default,
733 )
734 .expect("Failed with u8");
735 let u16_buffer =
736 CompressedBuffer::new(&u16_data, CompressionAlgorithm::Lz4, CompressionLevel::Fast)
737 .expect("Failed with u16");
738 let i64_buffer = CompressedBuffer::new(
739 &i64_data,
740 CompressionAlgorithm::None,
741 CompressionLevel::Best,
742 )
743 .expect("Failed with i64");
744 }
745}