backup_suite/compression/
engines.rs

1//! # 圧縮エンジン
2//!
3//! zstd と gzip アルゴリズムによる高性能データ圧縮システム
4
5use crate::error::{BackupError, Result};
6use clap::ValueEnum;
7use flate2::{read::GzDecoder, write::GzEncoder, Compression};
8use std::io::{Read, Write};
9use std::str::FromStr;
10use zstd::{Decoder as ZstdDecoder, Encoder as ZstdEncoder};
11
12/// 圧縮アルゴリズムタイプ
13#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
14pub enum CompressionType {
15    /// zstd 圧縮(高速・高圧縮率)
16    Zstd,
17    /// gzip 圧縮(互換性重視)
18    Gzip,
19    /// 圧縮なし
20    None,
21}
22
23impl FromStr for CompressionType {
24    type Err = BackupError;
25
26    fn from_str(s: &str) -> Result<Self> {
27        match s.to_lowercase().as_str() {
28            "zstd" => Ok(Self::Zstd),
29            "gzip" => Ok(Self::Gzip),
30            "none" => Ok(Self::None),
31            _ => Err(BackupError::CompressionError(format!(
32                "不明な圧縮タイプ: {s}"
33            ))),
34        }
35    }
36}
37
38impl CompressionType {
39    /// 圧縮タイプを文字列に変換
40    #[must_use]
41    pub fn to_str(&self) -> &'static str {
42        match self {
43            Self::Zstd => "zstd",
44            Self::Gzip => "gzip",
45            Self::None => "none",
46        }
47    }
48
49    /// ファイル拡張子を取得
50    #[must_use]
51    pub fn file_extension(&self) -> &'static str {
52        match self {
53            Self::Zstd => ".zst",
54            Self::Gzip => ".gz",
55            Self::None => "",
56        }
57    }
58}
59
60/// 圧縮設定
61#[derive(Debug, Clone)]
62pub struct CompressionConfig {
63    /// 圧縮レベル(1-22 for zstd, 1-9 for gzip)
64    pub level: i32,
65    /// チャンクサイズ(バイト)
66    pub chunk_size: usize,
67    /// バッファサイズ(バイト)
68    pub buffer_size: usize,
69}
70
71impl CompressionConfig {
72    /// zstd用のデフォルト設定(最適化版)
73    #[must_use]
74    pub fn zstd_default() -> Self {
75        Self {
76            level: 5,                    // 速度と圧縮率のバランス(3→5に最適化)
77            chunk_size: 2 * 1024 * 1024, // 2MB チャンク(キャッシュ効率向上)
78            buffer_size: 128 * 1024,     // 128KB バッファ(I/O効率向上)
79        }
80    }
81
82    /// zstd用の適応的設定(CPU数に基づく動的調整)
83    #[must_use]
84    pub fn zstd_adaptive() -> Self {
85        let cpu_count = std::thread::available_parallelism()
86            .map(std::num::NonZero::get)
87            .unwrap_or(4);
88
89        Self {
90            level: if cpu_count >= 8 {
91                7
92            } else if cpu_count >= 4 {
93                5
94            } else {
95                3
96            },
97            chunk_size: 2 * 1024 * 1024,
98            buffer_size: 128 * 1024,
99        }
100    }
101
102    /// gzip用のデフォルト設定
103    #[must_use]
104    pub fn gzip_default() -> Self {
105        Self {
106            level: 6,                // デフォルトレベル
107            chunk_size: 1024 * 1024, // 1MB チャンク
108            buffer_size: 64 * 1024,  // 64KB バッファ
109        }
110    }
111
112    /// 高速圧縮設定
113    #[must_use]
114    pub fn fast(compression_type: CompressionType) -> Self {
115        match compression_type {
116            CompressionType::Zstd | CompressionType::Gzip => Self {
117                level: 1,
118                chunk_size: 2 * 1024 * 1024, // 2MB チャンク(高速化)
119                buffer_size: 128 * 1024,     // 128KB バッファ
120            },
121            CompressionType::None => Self::none(),
122        }
123    }
124
125    /// 高圧縮率設定
126    #[must_use]
127    pub fn best(compression_type: CompressionType) -> Self {
128        match compression_type {
129            CompressionType::Zstd => Self {
130                level: 19,              // 高圧縮率
131                chunk_size: 512 * 1024, // 512KB チャンク(圧縮率重視)
132                buffer_size: 32 * 1024, // 32KB バッファ
133            },
134            CompressionType::Gzip => Self {
135                level: 9,
136                chunk_size: 512 * 1024,
137                buffer_size: 32 * 1024,
138            },
139            CompressionType::None => Self::none(),
140        }
141    }
142
143    /// 圧縮なし設定
144    #[must_use]
145    pub fn none() -> Self {
146        Self {
147            level: 0,
148            chunk_size: 4 * 1024 * 1024, // 4MB チャンク(コピーのみ)
149            buffer_size: 256 * 1024,     // 256KB バッファ
150        }
151    }
152}
153
154/// 圧縮されたデータ
155#[derive(Debug, Clone)]
156pub struct CompressedData {
157    /// 圧縮タイプ
158    pub compression_type: CompressionType,
159    /// 圧縮レベル
160    pub compression_level: i32,
161    /// 元のデータサイズ
162    pub original_size: u64,
163    /// 圧縮後のデータサイズ
164    pub compressed_size: u64,
165    /// 圧縮されたデータ
166    pub data: Vec<u8>,
167}
168
169impl CompressedData {
170    /// 圧縮率を計算
171    #[must_use]
172    #[allow(clippy::cast_precision_loss)]
173    pub fn compression_ratio(&self) -> f64 {
174        if self.original_size == 0 {
175            return 0.0;
176        }
177        (self.compressed_size as f64) / (self.original_size as f64)
178    }
179
180    /// 圧縮率をパーセンテージで取得
181    #[must_use]
182    pub fn compression_percentage(&self) -> f64 {
183        (1.0 - self.compression_ratio()) * 100.0
184    }
185
186    /// バイナリ形式にシリアライズ
187    #[must_use]
188    pub fn to_bytes(&self) -> Vec<u8> {
189        let mut result = Vec::with_capacity(25 + self.data.len());
190
191        // ヘッダー情報
192        result.push(match self.compression_type {
193            CompressionType::Zstd => 1,
194            CompressionType::Gzip => 2,
195            CompressionType::None => 0,
196        });
197        #[allow(clippy::cast_sign_loss)]
198        result.extend_from_slice(&(self.compression_level as u32).to_le_bytes());
199        result.extend_from_slice(&self.original_size.to_le_bytes());
200        result.extend_from_slice(&self.compressed_size.to_le_bytes());
201
202        // データ
203        result.extend_from_slice(&self.data);
204        result
205    }
206
207    /// バイナリ形式からデシリアライズ
208    ///
209    /// # Errors
210    ///
211    /// 以下の場合にエラーを返します:
212    /// - データが最小長(25バイト)未満の場合
213    /// - 不明な圧縮タイプの場合
214    /// - データの長さが一致しない場合
215    pub fn from_bytes(data: &[u8]) -> Result<Self> {
216        if data.len() < 25 {
217            return Err(BackupError::CompressionError(
218                "圧縮データが短すぎます".to_string(),
219            ));
220        }
221
222        // SAFETY: Length check above ensures data has at least 25 bytes
223        let compression_type = match *data
224            .first()
225            .ok_or_else(|| BackupError::CompressionError("データが空です".to_string()))?
226        {
227            1 => CompressionType::Zstd,
228            2 => CompressionType::Gzip,
229            0 => CompressionType::None,
230            _ => {
231                return Err(BackupError::CompressionError(
232                    "不明な圧縮タイプ".to_string(),
233                ))
234            }
235        };
236
237        #[allow(clippy::cast_possible_wrap)]
238        let compression_level = u32::from_le_bytes(
239            data.get(1..5)
240                .and_then(|s| s.try_into().ok())
241                .ok_or_else(|| {
242                    BackupError::CompressionError("圧縮レベルの読み取りに失敗".to_string())
243                })?,
244        ) as i32;
245        let original_size =
246            u64::from_le_bytes(data.get(5..13).and_then(|s| s.try_into().ok()).ok_or_else(
247                || BackupError::CompressionError("元のサイズの読み取りに失敗".to_string()),
248            )?);
249        let compressed_size = u64::from_le_bytes(
250            data.get(13..21)
251                .and_then(|s| s.try_into().ok())
252                .ok_or_else(|| {
253                    BackupError::CompressionError("圧縮後サイズの読み取りに失敗".to_string())
254                })?,
255        );
256
257        #[allow(clippy::cast_possible_truncation)]
258        if data.len() != 21 + compressed_size as usize {
259            return Err(BackupError::CompressionError(
260                "圧縮データの長さが一致しません".to_string(),
261            ));
262        }
263
264        Ok(Self {
265            compression_type,
266            compression_level,
267            original_size,
268            compressed_size,
269            data: data
270                .get(21..)
271                .ok_or_else(|| BackupError::CompressionError("データの読み取りに失敗".to_string()))?
272                .to_vec(),
273        })
274    }
275}
276
277/// 圧縮エンジン
278pub struct CompressionEngine {
279    config: CompressionConfig,
280    compression_type: CompressionType,
281}
282
283impl CompressionEngine {
284    /// 新しい圧縮エンジンを作成
285    #[must_use]
286    pub fn new(compression_type: CompressionType, config: CompressionConfig) -> Self {
287        Self {
288            config,
289            compression_type,
290        }
291    }
292
293    /// zstd圧縮エンジンを作成
294    #[must_use]
295    pub fn zstd(config: Option<CompressionConfig>) -> Self {
296        Self::new(
297            CompressionType::Zstd,
298            config.unwrap_or_else(CompressionConfig::zstd_default),
299        )
300    }
301
302    /// gzip圧縮エンジンを作成
303    #[must_use]
304    pub fn gzip(config: Option<CompressionConfig>) -> Self {
305        Self::new(
306            CompressionType::Gzip,
307            config.unwrap_or_else(CompressionConfig::gzip_default),
308        )
309    }
310
311    /// 圧縮なしエンジンを作成
312    #[must_use]
313    pub fn none() -> Self {
314        Self::new(CompressionType::None, CompressionConfig::none())
315    }
316
317    /// データを圧縮
318    ///
319    /// # Errors
320    ///
321    /// 圧縮エンジンがデータの圧縮に失敗した場合にエラーを返します。
322    pub fn compress(&self, data: &[u8]) -> Result<CompressedData> {
323        let original_size = data.len() as u64;
324
325        let compressed_data = match self.compression_type {
326            CompressionType::Zstd => self.compress_zstd(data)?,
327            CompressionType::Gzip => self.compress_gzip(data)?,
328            CompressionType::None => data.to_vec(),
329        };
330
331        let compressed_size = compressed_data.len() as u64;
332
333        Ok(CompressedData {
334            compression_type: self.compression_type,
335            compression_level: self.config.level,
336            original_size,
337            compressed_size,
338            data: compressed_data,
339        })
340    }
341
342    /// データを展開
343    ///
344    /// # Errors
345    ///
346    /// 圧縮エンジンがデータの展開に失敗した場合にエラーを返します。
347    pub fn decompress(&self, compressed_data: &CompressedData) -> Result<Vec<u8>> {
348        match compressed_data.compression_type {
349            CompressionType::Zstd => Self::decompress_zstd(&compressed_data.data),
350            CompressionType::Gzip => Self::decompress_gzip(&compressed_data.data),
351            CompressionType::None => Ok(compressed_data.data.clone()),
352        }
353    }
354
355    /// ストリーミング圧縮
356    ///
357    /// # Errors
358    ///
359    /// 以下の場合にエラーを返します:
360    /// - リーダーからの読み取りに失敗した場合
361    /// - 圧縮エンジンの作成・実行に失敗した場合
362    /// - ライターへの書き込みに失敗した場合
363    #[allow(clippy::indexing_slicing)] // read() guarantees bytes_read <= buffer.len()
364    pub fn compress_stream<R: Read, W: Write>(
365        &self,
366        mut reader: R,
367        mut writer: W,
368    ) -> Result<CompressedData> {
369        let mut original_size = 0u64;
370        let mut compressed_buffer = Vec::new();
371
372        match self.compression_type {
373            CompressionType::Zstd => {
374                let mut encoder = ZstdEncoder::new(&mut compressed_buffer, self.config.level)
375                    .map_err(|e| {
376                        BackupError::CompressionError(format!("Zstdエンコーダ作成エラー: {e}"))
377                    })?;
378
379                let mut buffer = vec![0u8; self.config.buffer_size];
380                loop {
381                    let bytes_read = reader.read(&mut buffer)?;
382                    if bytes_read == 0 {
383                        break;
384                    }
385                    original_size += bytes_read as u64;
386                    encoder.write_all(&buffer[..bytes_read]).map_err(|e| {
387                        BackupError::CompressionError(format!("Zstd圧縮エラー: {e}"))
388                    })?;
389                }
390
391                encoder
392                    .finish()
393                    .map_err(|e| BackupError::CompressionError(format!("Zstd完了エラー: {e}")))?;
394            }
395            CompressionType::Gzip => {
396                #[allow(clippy::cast_sign_loss)]
397                let mut encoder = GzEncoder::new(
398                    &mut compressed_buffer,
399                    Compression::new(self.config.level as u32),
400                );
401
402                let mut buffer = vec![0u8; self.config.buffer_size];
403                loop {
404                    let bytes_read = reader.read(&mut buffer)?;
405                    if bytes_read == 0 {
406                        break;
407                    }
408                    original_size += bytes_read as u64;
409                    encoder.write_all(&buffer[..bytes_read])?;
410                }
411
412                encoder.finish()?;
413            }
414            CompressionType::None => {
415                let mut buffer = vec![0u8; self.config.buffer_size];
416                loop {
417                    let bytes_read = reader.read(&mut buffer)?;
418                    if bytes_read == 0 {
419                        break;
420                    }
421                    original_size += bytes_read as u64;
422                    compressed_buffer.extend_from_slice(&buffer[..bytes_read]);
423                }
424            }
425        }
426
427        writer.write_all(&compressed_buffer)?;
428
429        Ok(CompressedData {
430            compression_type: self.compression_type,
431            compression_level: self.config.level,
432            original_size,
433            compressed_size: compressed_buffer.len() as u64,
434            data: compressed_buffer,
435        })
436    }
437
438    /// ストリーミング展開
439    ///
440    /// # Errors
441    ///
442    /// 以下の場合にエラーを返します:
443    /// - デコーダーの作成に失敗した場合
444    /// - リーダーからの読み取りに失敗した場合
445    /// - ライターへの書き込みに失敗した場合
446    #[allow(clippy::indexing_slicing)] // read() guarantees bytes_read <= buffer.len()
447    pub fn decompress_stream<R: Read, W: Write>(
448        &self,
449        reader: R,
450        mut writer: W,
451        compression_type: CompressionType,
452    ) -> Result<u64> {
453        let mut decompressed_size = 0u64;
454
455        match compression_type {
456            CompressionType::Zstd => {
457                let mut decoder = ZstdDecoder::new(reader).map_err(|e| {
458                    BackupError::CompressionError(format!("Zstdデコーダ作成エラー: {e}"))
459                })?;
460
461                let mut buffer = vec![0u8; self.config.buffer_size];
462                loop {
463                    let bytes_read = decoder.read(&mut buffer).map_err(|e| {
464                        BackupError::CompressionError(format!("Zstd展開エラー: {e}"))
465                    })?;
466                    if bytes_read == 0 {
467                        break;
468                    }
469                    writer.write_all(&buffer[..bytes_read])?;
470                    decompressed_size += bytes_read as u64;
471                }
472            }
473            CompressionType::Gzip => {
474                let mut decoder = GzDecoder::new(reader);
475
476                let mut buffer = vec![0u8; self.config.buffer_size];
477                loop {
478                    let bytes_read = decoder.read(&mut buffer)?;
479                    if bytes_read == 0 {
480                        break;
481                    }
482                    writer.write_all(&buffer[..bytes_read])?;
483                    decompressed_size += bytes_read as u64;
484                }
485            }
486            CompressionType::None => {
487                let mut reader = reader;
488                let mut buffer = vec![0u8; self.config.buffer_size];
489                loop {
490                    let bytes_read = reader.read(&mut buffer)?;
491                    if bytes_read == 0 {
492                        break;
493                    }
494                    writer.write_all(&buffer[..bytes_read])?;
495                    decompressed_size += bytes_read as u64;
496                }
497            }
498        }
499
500        Ok(decompressed_size)
501    }
502
503    // プライベートメソッド
504
505    fn compress_zstd(&self, data: &[u8]) -> Result<Vec<u8>> {
506        zstd::encode_all(data, self.config.level)
507            .map_err(|e| BackupError::CompressionError(format!("Zstd圧縮エラー: {e}")))
508    }
509
510    fn decompress_zstd(data: &[u8]) -> Result<Vec<u8>> {
511        zstd::decode_all(data)
512            .map_err(|e| BackupError::CompressionError(format!("Zstd展開エラー: {e}")))
513    }
514
515    fn compress_gzip(&self, data: &[u8]) -> Result<Vec<u8>> {
516        #[allow(clippy::cast_sign_loss)]
517        let mut encoder = GzEncoder::new(Vec::new(), Compression::new(self.config.level as u32));
518        encoder.write_all(data)?;
519        encoder
520            .finish()
521            .map_err(|e| BackupError::CompressionError(format!("Gzip圧縮エラー: {e}")))
522    }
523
524    fn decompress_gzip(data: &[u8]) -> Result<Vec<u8>> {
525        use std::io::Cursor;
526        let mut decoder = GzDecoder::new(Cursor::new(data));
527        let mut result = Vec::new();
528        decoder.read_to_end(&mut result)?;
529        Ok(result)
530    }
531}
532
533#[cfg(test)]
534#[allow(clippy::unwrap_used)] // Tests can use unwrap
535mod tests {
536    use super::*;
537    use std::io::Cursor;
538
539    #[test]
540    fn test_compression_types() {
541        assert_eq!(
542            "zstd".parse::<CompressionType>().unwrap(),
543            CompressionType::Zstd
544        );
545        assert_eq!(
546            "gzip".parse::<CompressionType>().unwrap(),
547            CompressionType::Gzip
548        );
549        assert_eq!(
550            "none".parse::<CompressionType>().unwrap(),
551            CompressionType::None
552        );
553
554        assert_eq!(CompressionType::Zstd.to_str(), "zstd");
555        assert_eq!(CompressionType::Gzip.file_extension(), ".gz");
556    }
557
558    #[test]
559    fn test_zstd_compression() {
560        let engine = CompressionEngine::zstd(None);
561        let original_data = b"Hello, World! This is a test message for compression.".repeat(100);
562
563        let compressed = engine.compress(&original_data).unwrap();
564        let decompressed = engine.decompress(&compressed).unwrap();
565
566        assert_eq!(original_data, decompressed);
567        assert!(compressed.compressed_size < compressed.original_size);
568        assert!(compressed.compression_percentage() > 0.0);
569    }
570
571    #[test]
572    fn test_gzip_compression() {
573        let engine = CompressionEngine::gzip(None);
574        let original_data = b"Test data for gzip compression algorithm.".repeat(50);
575
576        let compressed = engine.compress(&original_data).unwrap();
577        let decompressed = engine.decompress(&compressed).unwrap();
578
579        assert_eq!(original_data, decompressed);
580        assert!(compressed.compressed_size < compressed.original_size);
581    }
582
583    #[test]
584    fn test_no_compression() {
585        let engine = CompressionEngine::none();
586        let original_data = b"This data should not be compressed";
587
588        let compressed = engine.compress(original_data).unwrap();
589        let decompressed = engine.decompress(&compressed).unwrap();
590
591        assert_eq!(original_data, decompressed.as_slice());
592        assert_eq!(compressed.compressed_size, compressed.original_size);
593        assert_eq!(compressed.compression_percentage(), 0.0);
594    }
595
596    #[test]
597    fn test_compressed_data_serialization() {
598        let engine = CompressionEngine::zstd(None);
599        let original_data = b"Serialization test data";
600
601        let compressed = engine.compress(original_data).unwrap();
602        let serialized = compressed.to_bytes();
603        let deserialized = CompressedData::from_bytes(&serialized).unwrap();
604
605        let decompressed = engine.decompress(&deserialized).unwrap();
606        assert_eq!(original_data, decompressed.as_slice());
607    }
608
609    #[test]
610    fn test_stream_compression() {
611        let engine = CompressionEngine::zstd(None);
612        let original_data = b"Stream compression test data. ".repeat(1000);
613
614        let reader = Cursor::new(&original_data);
615        let mut compressed_buffer = Vec::new();
616        let compressed_meta = engine
617            .compress_stream(reader, &mut compressed_buffer)
618            .unwrap();
619
620        assert_eq!(compressed_meta.original_size, original_data.len() as u64);
621
622        let compressed_reader = Cursor::new(&compressed_buffer);
623        let mut decompressed_buffer = Vec::new();
624        let decompressed_size = engine
625            .decompress_stream(
626                compressed_reader,
627                &mut decompressed_buffer,
628                CompressionType::Zstd,
629            )
630            .unwrap();
631
632        assert_eq!(decompressed_size, original_data.len() as u64);
633        assert_eq!(original_data, decompressed_buffer);
634    }
635}