Skip to main content

idb/innodb/
checksum.rs

1//! InnoDB page checksum validation.
2//!
3//! Implements the two checksum algorithms used by MySQL's InnoDB engine:
4//!
5//! - **CRC-32C** (default since MySQL 5.7.7): XOR of two independent CRC32c
6//!   values computed over bytes `[4..26)` and `[38..page_size-8)`. These are
7//!   NOT chained — each range is checksummed separately and the results XORed.
8//!
9//! - **Legacy InnoDB** (MySQL < 5.7.7): Uses `ut_fold_ulint_pair` with wrapping
10//!   `u32` arithmetic, processing bytes one at a time over the same two ranges.
11//!
12//! Use [`validate_checksum`] to check a page against both algorithms.
13
14use byteorder::{BigEndian, ByteOrder};
15use crate::innodb::constants::*;
16
17/// Checksum algorithms used by InnoDB.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum ChecksumAlgorithm {
20    /// CRC-32C (hardware accelerated, MySQL 5.7.7+ default)
21    Crc32c,
22    /// Legacy InnoDB checksum (buf_calc_page_new_checksum equivalent)
23    InnoDB,
24    /// No checksum (innodb_checksum_algorithm=none)
25    None,
26}
27
28/// Validate a page's checksum.
29///
30/// Returns the detected algorithm and whether the checksum matches.
31pub fn validate_checksum(page_data: &[u8], page_size: u32) -> ChecksumResult {
32    let ps = page_size as usize;
33    if page_data.len() < ps {
34        return ChecksumResult {
35            algorithm: ChecksumAlgorithm::None,
36            valid: false,
37            stored_checksum: 0,
38            calculated_checksum: 0,
39        };
40    }
41
42    let stored_checksum = BigEndian::read_u32(&page_data[FIL_PAGE_SPACE_OR_CHKSUM..]);
43
44    // Check for "none" algorithm (stored checksum is BUF_NO_CHECKSUM_MAGIC = 0xDEADBEEF)
45    if stored_checksum == 0xDEADBEEF {
46        return ChecksumResult {
47            algorithm: ChecksumAlgorithm::None,
48            valid: true,
49            stored_checksum,
50            calculated_checksum: 0xDEADBEEF,
51        };
52    }
53
54    // All zeros page (freshly allocated) - valid with any algorithm
55    if stored_checksum == 0 {
56        let all_zero = page_data[..ps].iter().all(|&b| b == 0);
57        if all_zero {
58            return ChecksumResult {
59                algorithm: ChecksumAlgorithm::None,
60                valid: true,
61                stored_checksum: 0,
62                calculated_checksum: 0,
63            };
64        }
65    }
66
67    // Try CRC-32C first (most common for MySQL 8.0+)
68    let crc_checksum = calculate_crc32c(page_data, ps);
69    if stored_checksum == crc_checksum {
70        return ChecksumResult {
71            algorithm: ChecksumAlgorithm::Crc32c,
72            valid: true,
73            stored_checksum,
74            calculated_checksum: crc_checksum,
75        };
76    }
77
78    // Try legacy InnoDB checksum
79    let innodb_checksum = calculate_innodb_checksum(page_data, ps);
80    if stored_checksum == innodb_checksum {
81        return ChecksumResult {
82            algorithm: ChecksumAlgorithm::InnoDB,
83            valid: true,
84            stored_checksum,
85            calculated_checksum: innodb_checksum,
86        };
87    }
88
89    // Neither matched - report failure with CRC-32C as expected
90    ChecksumResult {
91        algorithm: ChecksumAlgorithm::Crc32c,
92        valid: false,
93        stored_checksum,
94        calculated_checksum: crc_checksum,
95    }
96}
97
98/// Result of a checksum validation.
99#[derive(Debug, Clone)]
100pub struct ChecksumResult {
101    /// The checksum algorithm that was detected or attempted.
102    pub algorithm: ChecksumAlgorithm,
103    /// Whether the stored checksum matches the calculated value.
104    pub valid: bool,
105    /// The checksum value stored in the page's FIL header (bytes 0-3).
106    pub stored_checksum: u32,
107    /// The checksum value calculated from the page data.
108    pub calculated_checksum: u32,
109}
110
111/// Calculate CRC-32C checksum for an InnoDB page.
112///
113/// MySQL computes CRC-32C independently over two disjoint ranges and XORs
114/// the results (see buf_calc_page_crc32 in buf0checksum.cc). Skipped regions:
115/// - bytes 0-3 (stored checksum)
116/// - bytes 26-37 (flush LSN + space ID, written outside buffer pool)
117/// - last 8 bytes (trailer)
118///
119/// Range 1: bytes 4..26 (FIL_PAGE_OFFSET to FIL_PAGE_FILE_FLUSH_LSN)
120/// Range 2: bytes 38..(page_size-8) (FIL_PAGE_DATA to end before trailer)
121fn calculate_crc32c(page_data: &[u8], page_size: usize) -> u32 {
122    let end = page_size - SIZE_FIL_TRAILER;
123
124    // CRC-32C of range 1: bytes 4..26
125    let crc1 = crc32c::crc32c(&page_data[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
126
127    // CRC-32C of range 2: bytes 38..(page_size - 8)
128    let crc2 = crc32c::crc32c(&page_data[FIL_PAGE_DATA..end]);
129
130    // MySQL XORs the two CRC values (not chained/appended)
131    crc1 ^ crc2
132}
133
134/// InnoDB's ut_fold_ulint_pair — the core folding function.
135///
136/// All arithmetic is done in u32 with wrapping, matching the effective behavior
137/// of InnoDB's checksum as implemented by innodb_ruby and verified against real
138/// .ibd files from MySQL 5.0 through 5.6.
139#[inline]
140fn ut_fold_ulint_pair(n1: u32, n2: u32) -> u32 {
141    let step = n1 ^ n2 ^ UT_HASH_RANDOM_MASK2;
142    let step = (step << 8).wrapping_add(n1);
143    let step = step ^ UT_HASH_RANDOM_MASK;
144    step.wrapping_add(n2)
145}
146
147/// Fold a byte sequence using ut_fold_ulint_pair, one byte at a time.
148///
149/// This matches innodb_ruby's fold_enumerator implementation, which processes
150/// each byte individually through fold_pair. Verified against real .ibd files
151/// from MySQL 5.0 and 5.6.
152fn ut_fold_binary(data: &[u8]) -> u32 {
153    let mut fold: u32 = 0;
154    for &byte in data {
155        fold = ut_fold_ulint_pair(fold, byte as u32);
156    }
157    fold
158}
159
160/// Calculate the legacy InnoDB checksum (buf_calc_page_new_checksum).
161///
162/// Used by MySQL < 5.7.7 (innodb_checksum_algorithm=innodb).
163/// Folds two byte ranges and sums the results:
164/// 1. Bytes 4..26 (FIL_PAGE_OFFSET to FIL_PAGE_FILE_FLUSH_LSN)
165/// 2. Bytes 38..(page_size - 8) (FIL_PAGE_DATA to end before trailer)
166fn calculate_innodb_checksum(page_data: &[u8], page_size: usize) -> u32 {
167    let end = page_size - SIZE_FIL_TRAILER;
168
169    let fold1 = ut_fold_binary(&page_data[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
170    let fold2 = ut_fold_binary(&page_data[FIL_PAGE_DATA..end]);
171
172    fold1.wrapping_add(fold2)
173}
174
175/// Validate the LSN consistency between header and trailer.
176///
177/// The low 32 bits of the header LSN should match the trailer LSN field.
178pub fn validate_lsn(page_data: &[u8], page_size: u32) -> bool {
179    let ps = page_size as usize;
180    if page_data.len() < ps {
181        return false;
182    }
183    let header_lsn = BigEndian::read_u64(&page_data[FIL_PAGE_LSN..]);
184    let header_lsn_low32 = (header_lsn & 0xFFFFFFFF) as u32;
185
186    let trailer_offset = ps - SIZE_FIL_TRAILER;
187    let trailer_lsn_low32 = BigEndian::read_u32(&page_data[trailer_offset + 4..]);
188
189    header_lsn_low32 == trailer_lsn_low32
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195
196    #[test]
197    fn test_all_zero_page_is_valid() {
198        let page = vec![0u8; 16384];
199        let result = validate_checksum(&page, 16384);
200        assert!(result.valid);
201    }
202
203    #[test]
204    fn test_no_checksum_magic() {
205        let mut page = vec![0u8; 16384];
206        BigEndian::write_u32(&mut page[0..], 0xDEADBEEF);
207        let result = validate_checksum(&page, 16384);
208        assert!(result.valid);
209        assert_eq!(result.algorithm, ChecksumAlgorithm::None);
210    }
211
212    #[test]
213    fn test_lsn_validation_matching() {
214        let mut page = vec![0u8; 16384];
215        // Write LSN = 0x0000000012345678 at offset 16
216        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0x12345678);
217        // Write low 32 bits at trailer + 4 (offset 16380)
218        BigEndian::write_u32(&mut page[16380..], 0x12345678);
219        assert!(validate_lsn(&page, 16384));
220    }
221
222    #[test]
223    fn test_lsn_validation_mismatch() {
224        let mut page = vec![0u8; 16384];
225        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0x12345678);
226        BigEndian::write_u32(&mut page[16380..], 0xAAAAAAAA);
227        assert!(!validate_lsn(&page, 16384));
228    }
229}