Skip to main content

idb/innodb/
checksum.rs

1//! InnoDB page checksum validation.
2//!
3//! Implements the checksum algorithms used by MySQL and MariaDB InnoDB:
4//!
5//! - **CRC-32C** (MySQL 5.7.7+ default): XOR of two independent CRC32c
6//!   values computed over bytes `[4..26)` and `[38..page_size-8)`. These are
7//!   NOT chained — each range is checksummed separately and the results XORed.
8//!
9//! - **Legacy InnoDB** (MySQL < 5.7.7): Uses `ut_fold_ulint_pair` with wrapping
10//!   `u32` arithmetic, processing bytes one at a time over the same two ranges.
11//!
12//! - **MariaDB full_crc32** (MariaDB 10.5+): Single CRC-32C over bytes
13//!   `[0..page_size-4)`. The checksum is stored in the last 4 bytes of the page
14//!   (not in the FIL header).
15//!
16//! Use [`validate_checksum`] to check a page against all applicable algorithms.
17
18use crate::innodb::constants::*;
19use crate::innodb::vendor::VendorInfo;
20use byteorder::{BigEndian, ByteOrder};
21use serde::Serialize;
22
23/// Checksum algorithms used by InnoDB.
24///
25/// # Examples
26///
27/// ```
28/// use idb::innodb::checksum::ChecksumAlgorithm;
29///
30/// let algo = ChecksumAlgorithm::Crc32c;
31/// assert_eq!(algo, ChecksumAlgorithm::Crc32c);
32///
33/// // All variants
34/// let _crc = ChecksumAlgorithm::Crc32c;
35/// let _legacy = ChecksumAlgorithm::InnoDB;
36/// let _maria = ChecksumAlgorithm::MariaDbFullCrc32;
37/// let _none = ChecksumAlgorithm::None;
38/// ```
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
40pub enum ChecksumAlgorithm {
41    /// CRC-32C (hardware accelerated, MySQL 5.7.7+ default)
42    Crc32c,
43    /// Legacy InnoDB checksum (buf_calc_page_new_checksum equivalent)
44    InnoDB,
45    /// MariaDB full_crc32 (single CRC-32C over entire page minus last 4 bytes)
46    MariaDbFullCrc32,
47    /// No checksum (innodb_checksum_algorithm=none)
48    None,
49}
50
51/// Validate a page's checksum.
52///
53/// When `vendor_info` is provided and indicates MariaDB full_crc32 format,
54/// the full_crc32 algorithm is tried first (checksum stored in the last 4
55/// bytes of the page). Otherwise, MySQL CRC-32C and legacy InnoDB are tried.
56///
57/// # Examples
58///
59/// An all-zeros page (freshly allocated) is always considered valid:
60///
61/// ```
62/// use idb::innodb::checksum::{validate_checksum, ChecksumAlgorithm};
63///
64/// let page = vec![0u8; 16384];
65/// let result = validate_checksum(&page, 16384, None);
66/// assert!(result.valid);
67/// assert_eq!(result.algorithm, ChecksumAlgorithm::None);
68/// ```
69///
70/// A page with the `BUF_NO_CHECKSUM_MAGIC` value (`0xDEADBEEF`) in bytes
71/// 0-3 is treated as having checksums disabled:
72///
73/// ```
74/// use idb::innodb::checksum::{validate_checksum, ChecksumAlgorithm};
75/// use byteorder::{BigEndian, ByteOrder};
76///
77/// let mut page = vec![0u8; 16384];
78/// BigEndian::write_u32(&mut page[0..], 0xDEADBEEF);
79/// let result = validate_checksum(&page, 16384, None);
80/// assert!(result.valid);
81/// assert_eq!(result.algorithm, ChecksumAlgorithm::None);
82/// ```
83pub fn validate_checksum(
84    page_data: &[u8],
85    page_size: u32,
86    vendor_info: Option<&VendorInfo>,
87) -> ChecksumResult {
88    let ps = page_size as usize;
89    if page_data.len() < ps {
90        return ChecksumResult {
91            algorithm: ChecksumAlgorithm::None,
92            valid: false,
93            stored_checksum: 0,
94            calculated_checksum: 0,
95        };
96    }
97
98    // All zeros page (freshly allocated) - valid with any algorithm
99    let first_u32 = BigEndian::read_u32(&page_data[FIL_PAGE_SPACE_OR_CHKSUM..]);
100    if first_u32 == 0 {
101        let all_zero = page_data[..ps].iter().all(|&b| b == 0);
102        if all_zero {
103            return ChecksumResult {
104                algorithm: ChecksumAlgorithm::None,
105                valid: true,
106                stored_checksum: 0,
107                calculated_checksum: 0,
108            };
109        }
110    }
111
112    // MariaDB full_crc32: try this first when vendor indicates it
113    if vendor_info.is_some_and(|v| v.is_full_crc32()) {
114        let stored = BigEndian::read_u32(&page_data[ps - 4..ps]);
115        let calculated = calculate_mariadb_full_crc32(page_data, ps);
116        if stored == calculated {
117            return ChecksumResult {
118                algorithm: ChecksumAlgorithm::MariaDbFullCrc32,
119                valid: true,
120                stored_checksum: stored,
121                calculated_checksum: calculated,
122            };
123        }
124        // full_crc32 didn't match — report failure
125        return ChecksumResult {
126            algorithm: ChecksumAlgorithm::MariaDbFullCrc32,
127            valid: false,
128            stored_checksum: stored,
129            calculated_checksum: calculated,
130        };
131    }
132
133    let stored_checksum = first_u32;
134
135    // Check for "none" algorithm (stored checksum is BUF_NO_CHECKSUM_MAGIC = 0xDEADBEEF)
136    if stored_checksum == 0xDEADBEEF {
137        return ChecksumResult {
138            algorithm: ChecksumAlgorithm::None,
139            valid: true,
140            stored_checksum,
141            calculated_checksum: 0xDEADBEEF,
142        };
143    }
144
145    // Try CRC-32C first (most common for MySQL 8.0+)
146    let crc_checksum = calculate_crc32c(page_data, ps);
147    if stored_checksum == crc_checksum {
148        return ChecksumResult {
149            algorithm: ChecksumAlgorithm::Crc32c,
150            valid: true,
151            stored_checksum,
152            calculated_checksum: crc_checksum,
153        };
154    }
155
156    // Try legacy InnoDB checksum
157    let innodb_checksum = calculate_innodb_checksum(page_data, ps);
158    if stored_checksum == innodb_checksum {
159        return ChecksumResult {
160            algorithm: ChecksumAlgorithm::InnoDB,
161            valid: true,
162            stored_checksum,
163            calculated_checksum: innodb_checksum,
164        };
165    }
166
167    // Neither matched - report failure with CRC-32C as expected
168    ChecksumResult {
169        algorithm: ChecksumAlgorithm::Crc32c,
170        valid: false,
171        stored_checksum,
172        calculated_checksum: crc_checksum,
173    }
174}
175
176/// Result of a checksum validation.
177///
178/// # Examples
179///
180/// ```
181/// use idb::innodb::checksum::{validate_checksum, ChecksumResult, ChecksumAlgorithm};
182///
183/// let page = vec![0u8; 16384];
184/// let result: ChecksumResult = validate_checksum(&page, 16384, None);
185///
186/// // Inspect individual fields
187/// println!("Algorithm: {:?}", result.algorithm);
188/// println!("Valid: {}", result.valid);
189/// println!("Stored:     0x{:08X}", result.stored_checksum);
190/// println!("Calculated: 0x{:08X}", result.calculated_checksum);
191/// ```
192#[derive(Debug, Clone)]
193pub struct ChecksumResult {
194    /// The checksum algorithm that was detected or attempted.
195    pub algorithm: ChecksumAlgorithm,
196    /// Whether the stored checksum matches the calculated value.
197    pub valid: bool,
198    /// The checksum value stored in the page's FIL header (bytes 0-3).
199    pub stored_checksum: u32,
200    /// The checksum value calculated from the page data.
201    pub calculated_checksum: u32,
202}
203
204/// Calculate MariaDB full_crc32 checksum.
205///
206/// MariaDB 10.5+ uses a single CRC-32C over bytes `[0..page_size-4)`.
207/// The checksum is stored in the last 4 bytes of the page (NOT in the
208/// FIL header at bytes 0-3 like MySQL).
209pub fn calculate_mariadb_full_crc32(page_data: &[u8], page_size: usize) -> u32 {
210    crc32c::crc32c(&page_data[0..page_size - 4])
211}
212
213/// Calculate CRC-32C checksum for an InnoDB page.
214///
215/// MySQL computes CRC-32C independently over two disjoint ranges and XORs
216/// the results (see buf_calc_page_crc32 in buf0checksum.cc). Skipped regions:
217/// - bytes 0-3 (stored checksum)
218/// - bytes 26-37 (flush LSN + space ID, written outside buffer pool)
219/// - last 8 bytes (trailer)
220///
221/// Range 1: bytes 4..26 (FIL_PAGE_OFFSET to FIL_PAGE_FILE_FLUSH_LSN)
222/// Range 2: bytes 38..(page_size-8) (FIL_PAGE_DATA to end before trailer)
223pub fn calculate_crc32c(page_data: &[u8], page_size: usize) -> u32 {
224    let end = page_size - SIZE_FIL_TRAILER;
225
226    // CRC-32C of range 1: bytes 4..26
227    let crc1 = crc32c::crc32c(&page_data[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
228
229    // CRC-32C of range 2: bytes 38..(page_size - 8)
230    let crc2 = crc32c::crc32c(&page_data[FIL_PAGE_DATA..end]);
231
232    // MySQL XORs the two CRC values (not chained/appended)
233    crc1 ^ crc2
234}
235
236/// InnoDB's ut_fold_ulint_pair — the core folding function.
237///
238/// All arithmetic is done in u32 with wrapping, matching the effective behavior
239/// of InnoDB's checksum as implemented by innodb_ruby and verified against real
240/// .ibd files from MySQL 5.0 through 5.6.
241#[inline]
242fn ut_fold_ulint_pair(n1: u32, n2: u32) -> u32 {
243    let step = n1 ^ n2 ^ UT_HASH_RANDOM_MASK2;
244    let step = (step << 8).wrapping_add(n1);
245    let step = step ^ UT_HASH_RANDOM_MASK;
246    step.wrapping_add(n2)
247}
248
249/// Fold a byte sequence using ut_fold_ulint_pair, one byte at a time.
250///
251/// This matches innodb_ruby's fold_enumerator implementation, which processes
252/// each byte individually through fold_pair. Verified against real .ibd files
253/// from MySQL 5.0 and 5.6.
254fn ut_fold_binary(data: &[u8]) -> u32 {
255    let mut fold: u32 = 0;
256    for &byte in data {
257        fold = ut_fold_ulint_pair(fold, byte as u32);
258    }
259    fold
260}
261
262/// Calculate the legacy InnoDB checksum (buf_calc_page_new_checksum).
263///
264/// Used by MySQL < 5.7.7 (innodb_checksum_algorithm=innodb).
265/// Folds two byte ranges and sums the results:
266/// 1. Bytes 4..26 (FIL_PAGE_OFFSET to FIL_PAGE_FILE_FLUSH_LSN)
267/// 2. Bytes 38..(page_size - 8) (FIL_PAGE_DATA to end before trailer)
268pub fn calculate_innodb_checksum(page_data: &[u8], page_size: usize) -> u32 {
269    let end = page_size - SIZE_FIL_TRAILER;
270
271    let fold1 = ut_fold_binary(&page_data[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
272    let fold2 = ut_fold_binary(&page_data[FIL_PAGE_DATA..end]);
273
274    fold1.wrapping_add(fold2)
275}
276
277/// Validate the LSN consistency between header and trailer.
278///
279/// The low 32 bits of the header LSN should match the trailer LSN field.
280///
281/// # Examples
282///
283/// Build a 16 KiB page with a matching LSN in the header (bytes 16-23)
284/// and trailer (last 4 bytes):
285///
286/// ```
287/// use idb::innodb::checksum::validate_lsn;
288/// use byteorder::{BigEndian, ByteOrder};
289///
290/// let mut page = vec![0u8; 16384];
291///
292/// // Write LSN 0x00000000_AABBCCDD into the FIL header at byte 16
293/// BigEndian::write_u64(&mut page[16..], 0xAABBCCDD);
294///
295/// // Write the low 32 bits into the trailer (last 4 bytes of the page)
296/// BigEndian::write_u32(&mut page[16380..], 0xAABBCCDD);
297///
298/// assert!(validate_lsn(&page, 16384));
299///
300/// // Corrupt the trailer — LSN no longer matches
301/// BigEndian::write_u32(&mut page[16380..], 0x00000000);
302/// assert!(!validate_lsn(&page, 16384));
303/// ```
304pub fn validate_lsn(page_data: &[u8], page_size: u32) -> bool {
305    let ps = page_size as usize;
306    if page_data.len() < ps {
307        return false;
308    }
309    let header_lsn = BigEndian::read_u64(&page_data[FIL_PAGE_LSN..]);
310    let header_lsn_low32 = (header_lsn & 0xFFFFFFFF) as u32;
311
312    let trailer_offset = ps - SIZE_FIL_TRAILER;
313    let trailer_lsn_low32 = BigEndian::read_u32(&page_data[trailer_offset + 4..]);
314
315    header_lsn_low32 == trailer_lsn_low32
316}
317
318/// Recalculate and write the correct checksum into a mutable page buffer.
319///
320/// For MySQL CRC-32C and legacy InnoDB, the checksum is written to bytes 0-3
321/// (the `FIL_PAGE_SPACE_OR_CHKSUM` field). For MariaDB full_crc32, the
322/// checksum is written to the last 4 bytes of the page.
323///
324/// The `ChecksumAlgorithm::None` variant is a no-op.
325pub fn recalculate_checksum(page_data: &mut [u8], page_size: u32, algorithm: ChecksumAlgorithm) {
326    let ps = page_size as usize;
327    if page_data.len() < ps {
328        return;
329    }
330
331    match algorithm {
332        ChecksumAlgorithm::Crc32c => {
333            let checksum = calculate_crc32c(page_data, ps);
334            BigEndian::write_u32(&mut page_data[FIL_PAGE_SPACE_OR_CHKSUM..], checksum);
335        }
336        ChecksumAlgorithm::InnoDB => {
337            let checksum = calculate_innodb_checksum(page_data, ps);
338            BigEndian::write_u32(&mut page_data[FIL_PAGE_SPACE_OR_CHKSUM..], checksum);
339        }
340        ChecksumAlgorithm::MariaDbFullCrc32 => {
341            let checksum = calculate_mariadb_full_crc32(page_data, ps);
342            BigEndian::write_u32(&mut page_data[ps - 4..], checksum);
343        }
344        ChecksumAlgorithm::None => {}
345    }
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351    use crate::innodb::vendor::MariaDbFormat;
352
353    #[test]
354    fn test_all_zero_page_is_valid() {
355        let page = vec![0u8; 16384];
356        let result = validate_checksum(&page, 16384, None);
357        assert!(result.valid);
358    }
359
360    #[test]
361    fn test_no_checksum_magic() {
362        let mut page = vec![0u8; 16384];
363        BigEndian::write_u32(&mut page[0..], 0xDEADBEEF);
364        let result = validate_checksum(&page, 16384, None);
365        assert!(result.valid);
366        assert_eq!(result.algorithm, ChecksumAlgorithm::None);
367    }
368
369    #[test]
370    fn test_mariadb_full_crc32() {
371        let ps = 16384usize;
372        let mut page = vec![0xABu8; ps];
373        // Write some data to make it non-trivial
374        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
375        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
376
377        // Calculate and store the full_crc32 checksum in last 4 bytes
378        let crc = crc32c::crc32c(&page[0..ps - 4]);
379        BigEndian::write_u32(&mut page[ps - 4..], crc);
380
381        let vendor = VendorInfo::mariadb(MariaDbFormat::FullCrc32);
382        let result = validate_checksum(&page, ps as u32, Some(&vendor));
383        assert!(result.valid);
384        assert_eq!(result.algorithm, ChecksumAlgorithm::MariaDbFullCrc32);
385    }
386
387    #[test]
388    fn test_mariadb_full_crc32_invalid() {
389        let ps = 16384usize;
390        let mut page = vec![0xABu8; ps];
391        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
392        // Wrong checksum in last 4 bytes
393        BigEndian::write_u32(&mut page[ps - 4..], 0xDEADDEAD);
394
395        let vendor = VendorInfo::mariadb(MariaDbFormat::FullCrc32);
396        let result = validate_checksum(&page, ps as u32, Some(&vendor));
397        assert!(!result.valid);
398        assert_eq!(result.algorithm, ChecksumAlgorithm::MariaDbFullCrc32);
399    }
400
401    #[test]
402    fn test_lsn_validation_matching() {
403        let mut page = vec![0u8; 16384];
404        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0x12345678);
405        BigEndian::write_u32(&mut page[16380..], 0x12345678);
406        assert!(validate_lsn(&page, 16384));
407    }
408
409    #[test]
410    fn test_lsn_validation_mismatch() {
411        let mut page = vec![0u8; 16384];
412        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0x12345678);
413        BigEndian::write_u32(&mut page[16380..], 0xAAAAAAAA);
414        assert!(!validate_lsn(&page, 16384));
415    }
416
417    #[test]
418    fn test_recalculate_checksum_crc32c() {
419        let ps = 16384usize;
420        let mut page = vec![0u8; ps];
421        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
422        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
423        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
424        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
425        let trailer = ps - SIZE_FIL_TRAILER;
426        BigEndian::write_u32(&mut page[trailer + 4..], 5000);
427
428        // Corrupt the checksum
429        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
430        let result = validate_checksum(&page, ps as u32, None);
431        assert!(!result.valid);
432
433        // Recalculate
434        recalculate_checksum(&mut page, ps as u32, ChecksumAlgorithm::Crc32c);
435        let result = validate_checksum(&page, ps as u32, None);
436        assert!(result.valid);
437        assert_eq!(result.algorithm, ChecksumAlgorithm::Crc32c);
438    }
439
440    #[test]
441    fn test_recalculate_checksum_innodb() {
442        let ps = 16384usize;
443        let mut page = vec![0u8; ps];
444        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
445        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
446        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
447        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
448        let trailer = ps - SIZE_FIL_TRAILER;
449        BigEndian::write_u32(&mut page[trailer + 4..], 5000);
450
451        recalculate_checksum(&mut page, ps as u32, ChecksumAlgorithm::InnoDB);
452        let result = validate_checksum(&page, ps as u32, None);
453        assert!(result.valid);
454        assert_eq!(result.algorithm, ChecksumAlgorithm::InnoDB);
455    }
456
457    #[test]
458    fn test_recalculate_checksum_mariadb() {
459        let ps = 16384usize;
460        let mut page = vec![0xABu8; ps];
461        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
462        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
463
464        recalculate_checksum(&mut page, ps as u32, ChecksumAlgorithm::MariaDbFullCrc32);
465        let vendor = VendorInfo::mariadb(MariaDbFormat::FullCrc32);
466        let result = validate_checksum(&page, ps as u32, Some(&vendor));
467        assert!(result.valid);
468        assert_eq!(result.algorithm, ChecksumAlgorithm::MariaDbFullCrc32);
469    }
470}