Skip to main content

idb/innodb/
compression.rs

1//! Tablespace compression detection and decompression.
2//!
3//! Detects the compression algorithm from FSP flags and provides zlib and LZ4
4//! decompression helpers for compressed page data.
5//!
6//! Supports both MySQL (bits 11-12) and MariaDB flag layouts:
7//! - MariaDB full_crc32: compression algo in bits 5-7
8//! - MariaDB original: PAGE_COMPRESSION flag at bit 16
9//! - MariaDB page-level: algorithm ID embedded per-page at offset 26
10
11use flate2::read::ZlibDecoder;
12use std::io::Read;
13
14use crate::innodb::vendor::VendorInfo;
15
16/// Compression algorithm detected or used for a page.
17///
18/// # Examples
19///
20/// ```
21/// use idb::innodb::compression::CompressionAlgorithm;
22///
23/// let algo = CompressionAlgorithm::Zlib;
24/// assert_eq!(format!("{algo}"), "Zlib");
25///
26/// let algo = CompressionAlgorithm::None;
27/// assert_eq!(format!("{algo}"), "None");
28///
29/// let algo = CompressionAlgorithm::Lz4;
30/// assert_eq!(format!("{algo}"), "LZ4");
31/// ```
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum CompressionAlgorithm {
34    None,
35    Zlib,
36    Lz4,
37    /// MariaDB LZO compression (detection only — not decompressed).
38    Lzo,
39    /// MariaDB LZMA compression (detection only — not decompressed).
40    Lzma,
41    /// MariaDB bzip2 compression (detection only — not decompressed).
42    Bzip2,
43    /// MariaDB Snappy compression (detection only — not decompressed).
44    Snappy,
45    /// MySQL 8.0.14+ ZSTD compression (decompressed via ruzstd).
46    Zstd,
47}
48
49/// Detect the compression algorithm from FSP space flags.
50///
51/// When `vendor_info` is provided:
52/// - MariaDB full_crc32: reads compression algo from bits 5-7
53/// - MariaDB original: checks bit 16 for PAGE_COMPRESSION (algo is per-page)
54/// - MySQL/Percona: reads bits 11-12
55///
56/// Without vendor info, defaults to MySQL bit layout.
57///
58/// # Examples
59///
60/// ```
61/// use idb::innodb::compression::{detect_compression, CompressionAlgorithm};
62/// use idb::innodb::vendor::{VendorInfo, MariaDbFormat};
63///
64/// // No compression flags → None
65/// assert_eq!(detect_compression(0, None), CompressionAlgorithm::None);
66///
67/// // MySQL: bit 11 set → Zlib
68/// assert_eq!(detect_compression(1 << 11, None), CompressionAlgorithm::Zlib);
69///
70/// // MySQL: bits 11-12 = 2 → LZ4
71/// assert_eq!(detect_compression(2 << 11, None), CompressionAlgorithm::Lz4);
72///
73/// // MariaDB full_crc32: bits 5-7 = 1 → Zlib
74/// let maria = VendorInfo::mariadb(MariaDbFormat::FullCrc32);
75/// let flags = 0x10 | (1 << 5); // bit 4 (marker) + algo=1
76/// assert_eq!(detect_compression(flags, Some(&maria)), CompressionAlgorithm::Zlib);
77/// ```
78pub fn detect_compression(
79    fsp_flags: u32,
80    vendor_info: Option<&VendorInfo>,
81) -> CompressionAlgorithm {
82    use crate::innodb::constants::*;
83
84    if let Some(vi) = vendor_info {
85        if vi.is_full_crc32() {
86            // MariaDB full_crc32: compression algo in bits 5-7
87            let algo = (fsp_flags & MARIADB_FSP_FLAGS_FCRC32_COMPRESSED_ALGO_MASK) >> 5;
88            return mariadb_algo_from_id(algo as u8);
89        }
90        if vi.vendor == crate::innodb::vendor::InnoDbVendor::MariaDB {
91            // MariaDB original: bit 16 indicates page compression is enabled
92            // but the algorithm is stored per-page, not in FSP flags
93            if fsp_flags & MARIADB_FSP_FLAGS_PAGE_COMPRESSION != 0 {
94                // Algorithm is per-page; return Zlib as a default indicator
95                // that page compression is enabled. Actual algo is in each page.
96                return CompressionAlgorithm::Zlib;
97            }
98            return CompressionAlgorithm::None;
99        }
100    }
101
102    // MySQL/Percona: bits 11-12
103    let comp_bits = (fsp_flags >> 11) & 0x03;
104    match comp_bits {
105        1 => CompressionAlgorithm::Zlib,
106        2 => CompressionAlgorithm::Lz4,
107        3 => CompressionAlgorithm::Zstd,
108        _ => CompressionAlgorithm::None,
109    }
110}
111
112/// Detect the compression algorithm from a MariaDB page-compressed page.
113///
114/// For page types 34354 (PAGE_COMPRESSED) and 37401 (PAGE_COMPRESSED_ENCRYPTED),
115/// the algorithm ID is stored as a u8 at byte offset 26 (FIL_PAGE_FILE_FLUSH_LSN).
116///
117/// # Examples
118///
119/// ```
120/// use idb::innodb::compression::{detect_mariadb_page_compression, CompressionAlgorithm};
121///
122/// // Build a minimal page with algorithm ID at byte 26
123/// let mut page = vec![0u8; 38];
124///
125/// // Algorithm ID 2 = LZ4
126/// page[26] = 2;
127/// assert_eq!(detect_mariadb_page_compression(&page), Some(CompressionAlgorithm::Lz4));
128///
129/// // Algorithm ID 1 = Zlib
130/// page[26] = 1;
131/// assert_eq!(detect_mariadb_page_compression(&page), Some(CompressionAlgorithm::Zlib));
132///
133/// // Too-short buffer returns None
134/// let short = vec![0u8; 10];
135/// assert_eq!(detect_mariadb_page_compression(&short), None);
136/// ```
137pub fn detect_mariadb_page_compression(page_data: &[u8]) -> Option<CompressionAlgorithm> {
138    if page_data.len() < 27 {
139        return None;
140    }
141    let algo_id = page_data[26];
142    Some(mariadb_algo_from_id(algo_id))
143}
144
145/// Convert a MariaDB compression algorithm ID to enum.
146///
147/// IDs from MariaDB `fil_space_t::comp_algo`:
148/// 0 = none, 1 = zlib, 2 = lz4, 3 = lzo, 4 = lzma, 5 = bzip2, 6 = snappy
149fn mariadb_algo_from_id(id: u8) -> CompressionAlgorithm {
150    match id {
151        1 => CompressionAlgorithm::Zlib,
152        2 => CompressionAlgorithm::Lz4,
153        3 => CompressionAlgorithm::Lzo,
154        4 => CompressionAlgorithm::Lzma,
155        5 => CompressionAlgorithm::Bzip2,
156        6 => CompressionAlgorithm::Snappy,
157        _ => CompressionAlgorithm::None,
158    }
159}
160
161/// Decompress zlib-compressed page data.
162///
163/// Returns the decompressed data, or None if decompression fails.
164///
165/// # Examples
166///
167/// ```
168/// use idb::innodb::compression::decompress_zlib;
169///
170/// // Compress some data with flate2, then decompress with decompress_zlib
171/// use flate2::write::ZlibEncoder;
172/// use flate2::Compression;
173/// use std::io::Write;
174///
175/// let original = b"Hello, InnoDB!";
176/// let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
177/// encoder.write_all(original).unwrap();
178/// let compressed = encoder.finish().unwrap();
179///
180/// let result = decompress_zlib(&compressed).unwrap();
181/// assert_eq!(result, original);
182///
183/// // Invalid data returns None
184/// assert!(decompress_zlib(&[0xFF, 0xFE]).is_none());
185/// ```
186pub fn decompress_zlib(compressed: &[u8]) -> Option<Vec<u8>> {
187    let mut decoder = ZlibDecoder::new(compressed);
188    let mut decompressed = Vec::new();
189    decoder.read_to_end(&mut decompressed).ok()?;
190    Some(decompressed)
191}
192
193/// Decompress LZ4-compressed page data.
194///
195/// `uncompressed_len` is the expected output size (typically the page size).
196/// Returns the decompressed data, or None if decompression fails.
197///
198/// # Examples
199///
200/// ```
201/// use idb::innodb::compression::decompress_lz4;
202///
203/// let original = b"Hello, LZ4 compression!";
204/// let compressed = lz4_flex::compress(original);
205///
206/// let result = decompress_lz4(&compressed, original.len()).unwrap();
207/// assert_eq!(result, original);
208/// ```
209pub fn decompress_lz4(compressed: &[u8], uncompressed_len: usize) -> Option<Vec<u8>> {
210    lz4_flex::decompress(compressed, uncompressed_len).ok()
211}
212
213/// Decompress ZSTD-compressed page data.
214///
215/// Returns the decompressed data, or None if decompression fails.
216///
217/// # Examples
218///
219/// ```
220/// use idb::innodb::compression::decompress_zstd;
221///
222/// // Invalid data returns None
223/// assert!(decompress_zstd(&[0xFF, 0xFE]).is_none());
224/// ```
225pub fn decompress_zstd(compressed: &[u8]) -> Option<Vec<u8>> {
226    let mut decoder = ruzstd::decoding::StreamingDecoder::new(compressed).ok()?;
227    let mut decompressed = Vec::new();
228    std::io::Read::read_to_end(&mut decoder, &mut decompressed).ok()?;
229    Some(decompressed)
230}
231
232/// Check if a page appears to be a hole-punched page.
233///
234/// Hole-punched pages have their data zeroed out after the compressed content.
235/// The FIL header is preserved, and the actual data is followed by trailing zeros.
236///
237/// # Examples
238///
239/// ```
240/// use idb::innodb::compression::is_hole_punched;
241///
242/// let page_size = 16384u32;
243///
244/// // All-zero page is considered hole-punched
245/// let zeros = vec![0u8; page_size as usize];
246/// assert!(is_hole_punched(&zeros, page_size));
247///
248/// // Data in the first part but zeros in the last quarter → hole-punched
249/// let mut page = vec![0u8; page_size as usize];
250/// page[0] = 0xFF;
251/// page[100] = 0xAB;
252/// assert!(is_hole_punched(&page, page_size));
253///
254/// // Non-zero byte in the last quarter → not hole-punched
255/// page[page_size as usize - 10] = 0x01;
256/// assert!(!is_hole_punched(&page, page_size));
257/// ```
258pub fn is_hole_punched(page_data: &[u8], page_size: u32) -> bool {
259    if page_data.len() < page_size as usize {
260        return false;
261    }
262
263    // A hole-punched page has trailing zeros. Check the last quarter of the page.
264    let check_start = (page_size as usize * 3) / 4;
265    page_data[check_start..page_size as usize]
266        .iter()
267        .all(|&b| b == 0)
268}
269
270impl std::fmt::Display for CompressionAlgorithm {
271    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
272        match self {
273            CompressionAlgorithm::None => write!(f, "None"),
274            CompressionAlgorithm::Zlib => write!(f, "Zlib"),
275            CompressionAlgorithm::Lz4 => write!(f, "LZ4"),
276            CompressionAlgorithm::Lzo => write!(f, "LZO"),
277            CompressionAlgorithm::Lzma => write!(f, "LZMA"),
278            CompressionAlgorithm::Bzip2 => write!(f, "bzip2"),
279            CompressionAlgorithm::Snappy => write!(f, "Snappy"),
280            CompressionAlgorithm::Zstd => write!(f, "ZSTD"),
281        }
282    }
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288    use crate::innodb::vendor::MariaDbFormat;
289
290    #[test]
291    fn test_detect_compression_mysql() {
292        assert_eq!(detect_compression(0, None), CompressionAlgorithm::None);
293        assert_eq!(
294            detect_compression(1 << 11, None),
295            CompressionAlgorithm::Zlib
296        );
297        assert_eq!(detect_compression(2 << 11, None), CompressionAlgorithm::Lz4);
298        assert_eq!(
299            detect_compression(3 << 11, None),
300            CompressionAlgorithm::Zstd
301        );
302        // Other bits set shouldn't affect compression detection
303        assert_eq!(
304            detect_compression(0xFF | (1 << 11), None),
305            CompressionAlgorithm::Zlib
306        );
307    }
308
309    #[test]
310    fn test_detect_compression_mariadb_full_crc32() {
311        let vendor = VendorInfo::mariadb(MariaDbFormat::FullCrc32);
312        // bits 5-7 = 1 (zlib)
313        let flags = 0x10 | (1 << 5);
314        assert_eq!(
315            detect_compression(flags, Some(&vendor)),
316            CompressionAlgorithm::Zlib
317        );
318        // bits 5-7 = 2 (lz4)
319        let flags = 0x10 | (2 << 5);
320        assert_eq!(
321            detect_compression(flags, Some(&vendor)),
322            CompressionAlgorithm::Lz4
323        );
324        // bits 5-7 = 3 (lzo)
325        let flags = 0x10 | (3 << 5);
326        assert_eq!(
327            detect_compression(flags, Some(&vendor)),
328            CompressionAlgorithm::Lzo
329        );
330    }
331
332    #[test]
333    fn test_detect_mariadb_page_compression() {
334        let mut page = vec![0u8; 38];
335        page[26] = 2; // LZ4
336        assert_eq!(
337            detect_mariadb_page_compression(&page),
338            Some(CompressionAlgorithm::Lz4)
339        );
340        page[26] = 6; // Snappy
341        assert_eq!(
342            detect_mariadb_page_compression(&page),
343            Some(CompressionAlgorithm::Snappy)
344        );
345    }
346
347    #[test]
348    fn test_decompress_zlib() {
349        use flate2::write::ZlibEncoder;
350        use flate2::Compression;
351        use std::io::Write;
352
353        let original = b"Hello, InnoDB compression test data!";
354        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
355        encoder.write_all(original).unwrap();
356        let compressed = encoder.finish().unwrap();
357
358        let result = decompress_zlib(&compressed).unwrap();
359        assert_eq!(result, original);
360    }
361
362    #[test]
363    fn test_decompress_lz4() {
364        let original = b"Hello, LZ4 compression test data for InnoDB!";
365        let compressed = lz4_flex::compress_prepend_size(original);
366        // lz4_flex::compress_prepend_size adds 4-byte length prefix,
367        // but decompress expects just the compressed data with known length
368        let result = lz4_flex::decompress(&compressed[4..], original.len());
369        assert!(result.is_ok());
370        assert_eq!(result.unwrap(), original);
371    }
372
373    #[test]
374    fn test_detect_compression_mysql_zstd() {
375        // MySQL 8.0.14+: bits 11-12 = 3 → ZSTD
376        assert_eq!(
377            detect_compression(3 << 11, None),
378            CompressionAlgorithm::Zstd
379        );
380    }
381
382    #[test]
383    fn test_decompress_zstd() {
384        // Use ruzstd to compress, then decompress
385        let original = b"Hello, ZSTD compression test data for InnoDB!";
386        let compressed = ruzstd::encoding::compress_to_vec(
387            &original[..],
388            ruzstd::encoding::CompressionLevel::Fastest,
389        );
390        let result = decompress_zstd(&compressed).unwrap();
391        assert_eq!(result, original);
392    }
393
394    #[test]
395    fn test_zstd_display() {
396        assert_eq!(format!("{}", CompressionAlgorithm::Zstd), "ZSTD");
397    }
398
399    #[test]
400    fn test_decompress_zstd_invalid() {
401        assert!(decompress_zstd(&[0xFF, 0xFE]).is_none());
402    }
403
404    #[test]
405    fn test_is_hole_punched() {
406        let page_size = 16384u32;
407        let mut page = vec![0u8; page_size as usize];
408        // All zeros = hole punched
409        assert!(is_hole_punched(&page, page_size));
410
411        // Some data in the first part, zeros in the last quarter
412        page[0] = 0xFF;
413        page[100] = 0xAB;
414        assert!(is_hole_punched(&page, page_size));
415
416        // Non-zero byte in the last quarter = not hole punched
417        page[page_size as usize - 10] = 0x01;
418        assert!(!is_hole_punched(&page, page_size));
419    }
420}