Skip to main content

idb/innodb/
compression.rs

1//! Tablespace compression detection and decompression.
2//!
3//! Detects the compression algorithm from FSP flags and provides zlib and LZ4
4//! decompression helpers for compressed page data.
5//!
6//! Supports both MySQL (bits 11-12) and MariaDB flag layouts:
7//! - MariaDB full_crc32: compression algo in bits 5-7
8//! - MariaDB original: PAGE_COMPRESSION flag at bit 16
9//! - MariaDB page-level: algorithm ID embedded per-page at offset 26
10
11use flate2::read::ZlibDecoder;
12use std::io::Read;
13
14use crate::innodb::vendor::VendorInfo;
15
16/// Compression algorithm detected or used for a page.
17///
18/// # Examples
19///
20/// ```
21/// use idb::innodb::compression::CompressionAlgorithm;
22///
23/// let algo = CompressionAlgorithm::Zlib;
24/// assert_eq!(format!("{algo}"), "Zlib");
25///
26/// let algo = CompressionAlgorithm::None;
27/// assert_eq!(format!("{algo}"), "None");
28///
29/// let algo = CompressionAlgorithm::Lz4;
30/// assert_eq!(format!("{algo}"), "LZ4");
31/// ```
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum CompressionAlgorithm {
34    None,
35    Zlib,
36    Lz4,
37    /// MariaDB LZO compression (detection only — not decompressed).
38    Lzo,
39    /// MariaDB LZMA compression (detection only — not decompressed).
40    Lzma,
41    /// MariaDB bzip2 compression (detection only — not decompressed).
42    Bzip2,
43    /// MariaDB Snappy compression (detection only — not decompressed).
44    Snappy,
45}
46
47/// Detect the compression algorithm from FSP space flags.
48///
49/// When `vendor_info` is provided:
50/// - MariaDB full_crc32: reads compression algo from bits 5-7
51/// - MariaDB original: checks bit 16 for PAGE_COMPRESSION (algo is per-page)
52/// - MySQL/Percona: reads bits 11-12
53///
54/// Without vendor info, defaults to MySQL bit layout.
55///
56/// # Examples
57///
58/// ```
59/// use idb::innodb::compression::{detect_compression, CompressionAlgorithm};
60/// use idb::innodb::vendor::{VendorInfo, MariaDbFormat};
61///
62/// // No compression flags → None
63/// assert_eq!(detect_compression(0, None), CompressionAlgorithm::None);
64///
65/// // MySQL: bit 11 set → Zlib
66/// assert_eq!(detect_compression(1 << 11, None), CompressionAlgorithm::Zlib);
67///
68/// // MySQL: bits 11-12 = 2 → LZ4
69/// assert_eq!(detect_compression(2 << 11, None), CompressionAlgorithm::Lz4);
70///
71/// // MariaDB full_crc32: bits 5-7 = 1 → Zlib
72/// let maria = VendorInfo::mariadb(MariaDbFormat::FullCrc32);
73/// let flags = 0x10 | (1 << 5); // bit 4 (marker) + algo=1
74/// assert_eq!(detect_compression(flags, Some(&maria)), CompressionAlgorithm::Zlib);
75/// ```
76pub fn detect_compression(
77    fsp_flags: u32,
78    vendor_info: Option<&VendorInfo>,
79) -> CompressionAlgorithm {
80    use crate::innodb::constants::*;
81
82    if let Some(vi) = vendor_info {
83        if vi.is_full_crc32() {
84            // MariaDB full_crc32: compression algo in bits 5-7
85            let algo = (fsp_flags & MARIADB_FSP_FLAGS_FCRC32_COMPRESSED_ALGO_MASK) >> 5;
86            return mariadb_algo_from_id(algo as u8);
87        }
88        if vi.vendor == crate::innodb::vendor::InnoDbVendor::MariaDB {
89            // MariaDB original: bit 16 indicates page compression is enabled
90            // but the algorithm is stored per-page, not in FSP flags
91            if fsp_flags & MARIADB_FSP_FLAGS_PAGE_COMPRESSION != 0 {
92                // Algorithm is per-page; return Zlib as a default indicator
93                // that page compression is enabled. Actual algo is in each page.
94                return CompressionAlgorithm::Zlib;
95            }
96            return CompressionAlgorithm::None;
97        }
98    }
99
100    // MySQL/Percona: bits 11-12
101    let comp_bits = (fsp_flags >> 11) & 0x03;
102    match comp_bits {
103        1 => CompressionAlgorithm::Zlib,
104        2 => CompressionAlgorithm::Lz4,
105        _ => CompressionAlgorithm::None,
106    }
107}
108
109/// Detect the compression algorithm from a MariaDB page-compressed page.
110///
111/// For page types 34354 (PAGE_COMPRESSED) and 37401 (PAGE_COMPRESSED_ENCRYPTED),
112/// the algorithm ID is stored as a u8 at byte offset 26 (FIL_PAGE_FILE_FLUSH_LSN).
113///
114/// # Examples
115///
116/// ```
117/// use idb::innodb::compression::{detect_mariadb_page_compression, CompressionAlgorithm};
118///
119/// // Build a minimal page with algorithm ID at byte 26
120/// let mut page = vec![0u8; 38];
121///
122/// // Algorithm ID 2 = LZ4
123/// page[26] = 2;
124/// assert_eq!(detect_mariadb_page_compression(&page), Some(CompressionAlgorithm::Lz4));
125///
126/// // Algorithm ID 1 = Zlib
127/// page[26] = 1;
128/// assert_eq!(detect_mariadb_page_compression(&page), Some(CompressionAlgorithm::Zlib));
129///
130/// // Too-short buffer returns None
131/// let short = vec![0u8; 10];
132/// assert_eq!(detect_mariadb_page_compression(&short), None);
133/// ```
134pub fn detect_mariadb_page_compression(page_data: &[u8]) -> Option<CompressionAlgorithm> {
135    if page_data.len() < 27 {
136        return None;
137    }
138    let algo_id = page_data[26];
139    Some(mariadb_algo_from_id(algo_id))
140}
141
142/// Convert a MariaDB compression algorithm ID to enum.
143///
144/// IDs from MariaDB `fil_space_t::comp_algo`:
145/// 0 = none, 1 = zlib, 2 = lz4, 3 = lzo, 4 = lzma, 5 = bzip2, 6 = snappy
146fn mariadb_algo_from_id(id: u8) -> CompressionAlgorithm {
147    match id {
148        1 => CompressionAlgorithm::Zlib,
149        2 => CompressionAlgorithm::Lz4,
150        3 => CompressionAlgorithm::Lzo,
151        4 => CompressionAlgorithm::Lzma,
152        5 => CompressionAlgorithm::Bzip2,
153        6 => CompressionAlgorithm::Snappy,
154        _ => CompressionAlgorithm::None,
155    }
156}
157
158/// Decompress zlib-compressed page data.
159///
160/// Returns the decompressed data, or None if decompression fails.
161///
162/// # Examples
163///
164/// ```
165/// use idb::innodb::compression::decompress_zlib;
166///
167/// // Compress some data with flate2, then decompress with decompress_zlib
168/// use flate2::write::ZlibEncoder;
169/// use flate2::Compression;
170/// use std::io::Write;
171///
172/// let original = b"Hello, InnoDB!";
173/// let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
174/// encoder.write_all(original).unwrap();
175/// let compressed = encoder.finish().unwrap();
176///
177/// let result = decompress_zlib(&compressed).unwrap();
178/// assert_eq!(result, original);
179///
180/// // Invalid data returns None
181/// assert!(decompress_zlib(&[0xFF, 0xFE]).is_none());
182/// ```
183pub fn decompress_zlib(compressed: &[u8]) -> Option<Vec<u8>> {
184    let mut decoder = ZlibDecoder::new(compressed);
185    let mut decompressed = Vec::new();
186    decoder.read_to_end(&mut decompressed).ok()?;
187    Some(decompressed)
188}
189
190/// Decompress LZ4-compressed page data.
191///
192/// `uncompressed_len` is the expected output size (typically the page size).
193/// Returns the decompressed data, or None if decompression fails.
194///
195/// # Examples
196///
197/// ```
198/// use idb::innodb::compression::decompress_lz4;
199///
200/// let original = b"Hello, LZ4 compression!";
201/// let compressed = lz4_flex::compress(original);
202///
203/// let result = decompress_lz4(&compressed, original.len()).unwrap();
204/// assert_eq!(result, original);
205/// ```
206pub fn decompress_lz4(compressed: &[u8], uncompressed_len: usize) -> Option<Vec<u8>> {
207    lz4_flex::decompress(compressed, uncompressed_len).ok()
208}
209
210/// Check if a page appears to be a hole-punched page.
211///
212/// Hole-punched pages have their data zeroed out after the compressed content.
213/// The FIL header is preserved, and the actual data is followed by trailing zeros.
214///
215/// # Examples
216///
217/// ```
218/// use idb::innodb::compression::is_hole_punched;
219///
220/// let page_size = 16384u32;
221///
222/// // All-zero page is considered hole-punched
223/// let zeros = vec![0u8; page_size as usize];
224/// assert!(is_hole_punched(&zeros, page_size));
225///
226/// // Data in the first part but zeros in the last quarter → hole-punched
227/// let mut page = vec![0u8; page_size as usize];
228/// page[0] = 0xFF;
229/// page[100] = 0xAB;
230/// assert!(is_hole_punched(&page, page_size));
231///
232/// // Non-zero byte in the last quarter → not hole-punched
233/// page[page_size as usize - 10] = 0x01;
234/// assert!(!is_hole_punched(&page, page_size));
235/// ```
236pub fn is_hole_punched(page_data: &[u8], page_size: u32) -> bool {
237    if page_data.len() < page_size as usize {
238        return false;
239    }
240
241    // A hole-punched page has trailing zeros. Check the last quarter of the page.
242    let check_start = (page_size as usize * 3) / 4;
243    page_data[check_start..page_size as usize]
244        .iter()
245        .all(|&b| b == 0)
246}
247
248impl std::fmt::Display for CompressionAlgorithm {
249    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
250        match self {
251            CompressionAlgorithm::None => write!(f, "None"),
252            CompressionAlgorithm::Zlib => write!(f, "Zlib"),
253            CompressionAlgorithm::Lz4 => write!(f, "LZ4"),
254            CompressionAlgorithm::Lzo => write!(f, "LZO"),
255            CompressionAlgorithm::Lzma => write!(f, "LZMA"),
256            CompressionAlgorithm::Bzip2 => write!(f, "bzip2"),
257            CompressionAlgorithm::Snappy => write!(f, "Snappy"),
258        }
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265    use crate::innodb::vendor::MariaDbFormat;
266
267    #[test]
268    fn test_detect_compression_mysql() {
269        assert_eq!(detect_compression(0, None), CompressionAlgorithm::None);
270        assert_eq!(
271            detect_compression(1 << 11, None),
272            CompressionAlgorithm::Zlib
273        );
274        assert_eq!(detect_compression(2 << 11, None), CompressionAlgorithm::Lz4);
275        assert_eq!(
276            detect_compression(3 << 11, None),
277            CompressionAlgorithm::None
278        );
279        // Other bits set shouldn't affect compression detection
280        assert_eq!(
281            detect_compression(0xFF | (1 << 11), None),
282            CompressionAlgorithm::Zlib
283        );
284    }
285
286    #[test]
287    fn test_detect_compression_mariadb_full_crc32() {
288        let vendor = VendorInfo::mariadb(MariaDbFormat::FullCrc32);
289        // bits 5-7 = 1 (zlib)
290        let flags = 0x10 | (1 << 5);
291        assert_eq!(
292            detect_compression(flags, Some(&vendor)),
293            CompressionAlgorithm::Zlib
294        );
295        // bits 5-7 = 2 (lz4)
296        let flags = 0x10 | (2 << 5);
297        assert_eq!(
298            detect_compression(flags, Some(&vendor)),
299            CompressionAlgorithm::Lz4
300        );
301        // bits 5-7 = 3 (lzo)
302        let flags = 0x10 | (3 << 5);
303        assert_eq!(
304            detect_compression(flags, Some(&vendor)),
305            CompressionAlgorithm::Lzo
306        );
307    }
308
309    #[test]
310    fn test_detect_mariadb_page_compression() {
311        let mut page = vec![0u8; 38];
312        page[26] = 2; // LZ4
313        assert_eq!(
314            detect_mariadb_page_compression(&page),
315            Some(CompressionAlgorithm::Lz4)
316        );
317        page[26] = 6; // Snappy
318        assert_eq!(
319            detect_mariadb_page_compression(&page),
320            Some(CompressionAlgorithm::Snappy)
321        );
322    }
323
324    #[test]
325    fn test_decompress_zlib() {
326        use flate2::write::ZlibEncoder;
327        use flate2::Compression;
328        use std::io::Write;
329
330        let original = b"Hello, InnoDB compression test data!";
331        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
332        encoder.write_all(original).unwrap();
333        let compressed = encoder.finish().unwrap();
334
335        let result = decompress_zlib(&compressed).unwrap();
336        assert_eq!(result, original);
337    }
338
339    #[test]
340    fn test_decompress_lz4() {
341        let original = b"Hello, LZ4 compression test data for InnoDB!";
342        let compressed = lz4_flex::compress_prepend_size(original);
343        // lz4_flex::compress_prepend_size adds 4-byte length prefix,
344        // but decompress expects just the compressed data with known length
345        let result = lz4_flex::decompress(&compressed[4..], original.len());
346        assert!(result.is_ok());
347        assert_eq!(result.unwrap(), original);
348    }
349
350    #[test]
351    fn test_is_hole_punched() {
352        let page_size = 16384u32;
353        let mut page = vec![0u8; page_size as usize];
354        // All zeros = hole punched
355        assert!(is_hole_punched(&page, page_size));
356
357        // Some data in the first part, zeros in the last quarter
358        page[0] = 0xFF;
359        page[100] = 0xAB;
360        assert!(is_hole_punched(&page, page_size));
361
362        // Non-zero byte in the last quarter = not hole punched
363        page[page_size as usize - 10] = 0x01;
364        assert!(!is_hole_punched(&page, page_size));
365    }
366}