Skip to main content

idb/innodb/
compression.rs

1//! Tablespace compression detection and decompression.
2//!
3//! Detects the compression algorithm from FSP flags and provides zlib and LZ4
4//! decompression helpers for compressed page data.
5//!
6//! Supports both MySQL (bits 11-12) and MariaDB flag layouts:
7//! - MariaDB full_crc32: compression algo in bits 5-7
8//! - MariaDB original: PAGE_COMPRESSION flag at bit 16
9//! - MariaDB page-level: algorithm ID embedded per-page at offset 26
10
11use flate2::read::ZlibDecoder;
12use std::io::Read;
13
14use crate::innodb::vendor::VendorInfo;
15
16/// Compression algorithm detected or used for a page.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum CompressionAlgorithm {
19    None,
20    Zlib,
21    Lz4,
22    /// MariaDB LZO compression (detection only — not decompressed).
23    Lzo,
24    /// MariaDB LZMA compression (detection only — not decompressed).
25    Lzma,
26    /// MariaDB bzip2 compression (detection only — not decompressed).
27    Bzip2,
28    /// MariaDB Snappy compression (detection only — not decompressed).
29    Snappy,
30}
31
32/// Detect the compression algorithm from FSP space flags.
33///
34/// When `vendor_info` is provided:
35/// - MariaDB full_crc32: reads compression algo from bits 5-7
36/// - MariaDB original: checks bit 16 for PAGE_COMPRESSION (algo is per-page)
37/// - MySQL/Percona: reads bits 11-12
38///
39/// Without vendor info, defaults to MySQL bit layout.
40pub fn detect_compression(
41    fsp_flags: u32,
42    vendor_info: Option<&VendorInfo>,
43) -> CompressionAlgorithm {
44    use crate::innodb::constants::*;
45
46    if let Some(vi) = vendor_info {
47        if vi.is_full_crc32() {
48            // MariaDB full_crc32: compression algo in bits 5-7
49            let algo = (fsp_flags & MARIADB_FSP_FLAGS_FCRC32_COMPRESSED_ALGO_MASK) >> 5;
50            return mariadb_algo_from_id(algo as u8);
51        }
52        if vi.vendor == crate::innodb::vendor::InnoDbVendor::MariaDB {
53            // MariaDB original: bit 16 indicates page compression is enabled
54            // but the algorithm is stored per-page, not in FSP flags
55            if fsp_flags & MARIADB_FSP_FLAGS_PAGE_COMPRESSION != 0 {
56                // Algorithm is per-page; return Zlib as a default indicator
57                // that page compression is enabled. Actual algo is in each page.
58                return CompressionAlgorithm::Zlib;
59            }
60            return CompressionAlgorithm::None;
61        }
62    }
63
64    // MySQL/Percona: bits 11-12
65    let comp_bits = (fsp_flags >> 11) & 0x03;
66    match comp_bits {
67        1 => CompressionAlgorithm::Zlib,
68        2 => CompressionAlgorithm::Lz4,
69        _ => CompressionAlgorithm::None,
70    }
71}
72
73/// Detect the compression algorithm from a MariaDB page-compressed page.
74///
75/// For page types 34354 (PAGE_COMPRESSED) and 37401 (PAGE_COMPRESSED_ENCRYPTED),
76/// the algorithm ID is stored as a u8 at byte offset 26 (FIL_PAGE_FILE_FLUSH_LSN).
77pub fn detect_mariadb_page_compression(page_data: &[u8]) -> Option<CompressionAlgorithm> {
78    if page_data.len() < 27 {
79        return None;
80    }
81    let algo_id = page_data[26];
82    Some(mariadb_algo_from_id(algo_id))
83}
84
85/// Convert a MariaDB compression algorithm ID to enum.
86///
87/// IDs from MariaDB `fil_space_t::comp_algo`:
88/// 0 = none, 1 = zlib, 2 = lz4, 3 = lzo, 4 = lzma, 5 = bzip2, 6 = snappy
89fn mariadb_algo_from_id(id: u8) -> CompressionAlgorithm {
90    match id {
91        1 => CompressionAlgorithm::Zlib,
92        2 => CompressionAlgorithm::Lz4,
93        3 => CompressionAlgorithm::Lzo,
94        4 => CompressionAlgorithm::Lzma,
95        5 => CompressionAlgorithm::Bzip2,
96        6 => CompressionAlgorithm::Snappy,
97        _ => CompressionAlgorithm::None,
98    }
99}
100
101/// Decompress zlib-compressed page data.
102///
103/// Returns the decompressed data, or None if decompression fails.
104pub fn decompress_zlib(compressed: &[u8]) -> Option<Vec<u8>> {
105    let mut decoder = ZlibDecoder::new(compressed);
106    let mut decompressed = Vec::new();
107    decoder.read_to_end(&mut decompressed).ok()?;
108    Some(decompressed)
109}
110
111/// Decompress LZ4-compressed page data.
112///
113/// `uncompressed_len` is the expected output size (typically the page size).
114/// Returns the decompressed data, or None if decompression fails.
115pub fn decompress_lz4(compressed: &[u8], uncompressed_len: usize) -> Option<Vec<u8>> {
116    lz4_flex::decompress(compressed, uncompressed_len).ok()
117}
118
119/// Check if a page appears to be a hole-punched page.
120///
121/// Hole-punched pages have their data zeroed out after the compressed content.
122/// The FIL header is preserved, and the actual data is followed by trailing zeros.
123pub fn is_hole_punched(page_data: &[u8], page_size: u32) -> bool {
124    if page_data.len() < page_size as usize {
125        return false;
126    }
127
128    // A hole-punched page has trailing zeros. Check the last quarter of the page.
129    let check_start = (page_size as usize * 3) / 4;
130    page_data[check_start..page_size as usize]
131        .iter()
132        .all(|&b| b == 0)
133}
134
135impl std::fmt::Display for CompressionAlgorithm {
136    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
137        match self {
138            CompressionAlgorithm::None => write!(f, "None"),
139            CompressionAlgorithm::Zlib => write!(f, "Zlib"),
140            CompressionAlgorithm::Lz4 => write!(f, "LZ4"),
141            CompressionAlgorithm::Lzo => write!(f, "LZO"),
142            CompressionAlgorithm::Lzma => write!(f, "LZMA"),
143            CompressionAlgorithm::Bzip2 => write!(f, "bzip2"),
144            CompressionAlgorithm::Snappy => write!(f, "Snappy"),
145        }
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use crate::innodb::vendor::MariaDbFormat;
153
154    #[test]
155    fn test_detect_compression_mysql() {
156        assert_eq!(detect_compression(0, None), CompressionAlgorithm::None);
157        assert_eq!(
158            detect_compression(1 << 11, None),
159            CompressionAlgorithm::Zlib
160        );
161        assert_eq!(
162            detect_compression(2 << 11, None),
163            CompressionAlgorithm::Lz4
164        );
165        assert_eq!(
166            detect_compression(3 << 11, None),
167            CompressionAlgorithm::None
168        );
169        // Other bits set shouldn't affect compression detection
170        assert_eq!(
171            detect_compression(0xFF | (1 << 11), None),
172            CompressionAlgorithm::Zlib
173        );
174    }
175
176    #[test]
177    fn test_detect_compression_mariadb_full_crc32() {
178        let vendor = VendorInfo::mariadb(MariaDbFormat::FullCrc32);
179        // bits 5-7 = 1 (zlib)
180        let flags = 0x10 | (1 << 5);
181        assert_eq!(
182            detect_compression(flags, Some(&vendor)),
183            CompressionAlgorithm::Zlib
184        );
185        // bits 5-7 = 2 (lz4)
186        let flags = 0x10 | (2 << 5);
187        assert_eq!(
188            detect_compression(flags, Some(&vendor)),
189            CompressionAlgorithm::Lz4
190        );
191        // bits 5-7 = 3 (lzo)
192        let flags = 0x10 | (3 << 5);
193        assert_eq!(
194            detect_compression(flags, Some(&vendor)),
195            CompressionAlgorithm::Lzo
196        );
197    }
198
199    #[test]
200    fn test_detect_mariadb_page_compression() {
201        let mut page = vec![0u8; 38];
202        page[26] = 2; // LZ4
203        assert_eq!(
204            detect_mariadb_page_compression(&page),
205            Some(CompressionAlgorithm::Lz4)
206        );
207        page[26] = 6; // Snappy
208        assert_eq!(
209            detect_mariadb_page_compression(&page),
210            Some(CompressionAlgorithm::Snappy)
211        );
212    }
213
214    #[test]
215    fn test_decompress_zlib() {
216        use flate2::write::ZlibEncoder;
217        use flate2::Compression;
218        use std::io::Write;
219
220        let original = b"Hello, InnoDB compression test data!";
221        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
222        encoder.write_all(original).unwrap();
223        let compressed = encoder.finish().unwrap();
224
225        let result = decompress_zlib(&compressed).unwrap();
226        assert_eq!(result, original);
227    }
228
229    #[test]
230    fn test_decompress_lz4() {
231        let original = b"Hello, LZ4 compression test data for InnoDB!";
232        let compressed = lz4_flex::compress_prepend_size(original);
233        // lz4_flex::compress_prepend_size adds 4-byte length prefix,
234        // but decompress expects just the compressed data with known length
235        let result = lz4_flex::decompress(&compressed[4..], original.len());
236        assert!(result.is_ok());
237        assert_eq!(result.unwrap(), original);
238    }
239
240    #[test]
241    fn test_is_hole_punched() {
242        let page_size = 16384u32;
243        let mut page = vec![0u8; page_size as usize];
244        // All zeros = hole punched
245        assert!(is_hole_punched(&page, page_size));
246
247        // Some data in the first part, zeros in the last quarter
248        page[0] = 0xFF;
249        page[100] = 0xAB;
250        assert!(is_hole_punched(&page, page_size));
251
252        // Non-zero byte in the last quarter = not hole punched
253        page[page_size as usize - 10] = 0x01;
254        assert!(!is_hole_punched(&page, page_size));
255    }
256}