Skip to main content

idb/innodb/
checksum.rs

1//! InnoDB page checksum validation.
2//!
3//! Implements the checksum algorithms used by MySQL and MariaDB InnoDB:
4//!
5//! - **CRC-32C** (MySQL 5.7.7+ default): XOR of two independent CRC32c
6//!   values computed over bytes `[4..26)` and `[38..page_size-8)`. These are
7//!   NOT chained — each range is checksummed separately and the results XORed.
8//!
9//! - **Legacy InnoDB** (MySQL < 5.7.7): Uses `ut_fold_ulint_pair` with wrapping
10//!   `u32` arithmetic, processing bytes one at a time over the same two ranges.
11//!
12//! - **MariaDB full_crc32** (MariaDB 10.5+): Single CRC-32C over bytes
13//!   `[0..page_size-4)`. The checksum is stored in the last 4 bytes of the page
14//!   (not in the FIL header).
15//!
16//! Use [`validate_checksum`] to check a page against all applicable algorithms.
17
18use crate::innodb::constants::*;
19use crate::innodb::vendor::VendorInfo;
20use byteorder::{BigEndian, ByteOrder};
21
22/// Checksum algorithms used by InnoDB.
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum ChecksumAlgorithm {
25    /// CRC-32C (hardware accelerated, MySQL 5.7.7+ default)
26    Crc32c,
27    /// Legacy InnoDB checksum (buf_calc_page_new_checksum equivalent)
28    InnoDB,
29    /// MariaDB full_crc32 (single CRC-32C over entire page minus last 4 bytes)
30    MariaDbFullCrc32,
31    /// No checksum (innodb_checksum_algorithm=none)
32    None,
33}
34
35/// Validate a page's checksum.
36///
37/// When `vendor_info` is provided and indicates MariaDB full_crc32 format,
38/// the full_crc32 algorithm is tried first (checksum stored in the last 4
39/// bytes of the page). Otherwise, MySQL CRC-32C and legacy InnoDB are tried.
40pub fn validate_checksum(
41    page_data: &[u8],
42    page_size: u32,
43    vendor_info: Option<&VendorInfo>,
44) -> ChecksumResult {
45    let ps = page_size as usize;
46    if page_data.len() < ps {
47        return ChecksumResult {
48            algorithm: ChecksumAlgorithm::None,
49            valid: false,
50            stored_checksum: 0,
51            calculated_checksum: 0,
52        };
53    }
54
55    // All zeros page (freshly allocated) - valid with any algorithm
56    let first_u32 = BigEndian::read_u32(&page_data[FIL_PAGE_SPACE_OR_CHKSUM..]);
57    if first_u32 == 0 {
58        let all_zero = page_data[..ps].iter().all(|&b| b == 0);
59        if all_zero {
60            return ChecksumResult {
61                algorithm: ChecksumAlgorithm::None,
62                valid: true,
63                stored_checksum: 0,
64                calculated_checksum: 0,
65            };
66        }
67    }
68
69    // MariaDB full_crc32: try this first when vendor indicates it
70    if vendor_info.is_some_and(|v| v.is_full_crc32()) {
71        let stored = BigEndian::read_u32(&page_data[ps - 4..ps]);
72        let calculated = calculate_mariadb_full_crc32(page_data, ps);
73        if stored == calculated {
74            return ChecksumResult {
75                algorithm: ChecksumAlgorithm::MariaDbFullCrc32,
76                valid: true,
77                stored_checksum: stored,
78                calculated_checksum: calculated,
79            };
80        }
81        // full_crc32 didn't match — report failure
82        return ChecksumResult {
83            algorithm: ChecksumAlgorithm::MariaDbFullCrc32,
84            valid: false,
85            stored_checksum: stored,
86            calculated_checksum: calculated,
87        };
88    }
89
90    let stored_checksum = first_u32;
91
92    // Check for "none" algorithm (stored checksum is BUF_NO_CHECKSUM_MAGIC = 0xDEADBEEF)
93    if stored_checksum == 0xDEADBEEF {
94        return ChecksumResult {
95            algorithm: ChecksumAlgorithm::None,
96            valid: true,
97            stored_checksum,
98            calculated_checksum: 0xDEADBEEF,
99        };
100    }
101
102    // Try CRC-32C first (most common for MySQL 8.0+)
103    let crc_checksum = calculate_crc32c(page_data, ps);
104    if stored_checksum == crc_checksum {
105        return ChecksumResult {
106            algorithm: ChecksumAlgorithm::Crc32c,
107            valid: true,
108            stored_checksum,
109            calculated_checksum: crc_checksum,
110        };
111    }
112
113    // Try legacy InnoDB checksum
114    let innodb_checksum = calculate_innodb_checksum(page_data, ps);
115    if stored_checksum == innodb_checksum {
116        return ChecksumResult {
117            algorithm: ChecksumAlgorithm::InnoDB,
118            valid: true,
119            stored_checksum,
120            calculated_checksum: innodb_checksum,
121        };
122    }
123
124    // Neither matched - report failure with CRC-32C as expected
125    ChecksumResult {
126        algorithm: ChecksumAlgorithm::Crc32c,
127        valid: false,
128        stored_checksum,
129        calculated_checksum: crc_checksum,
130    }
131}
132
133/// Result of a checksum validation.
134#[derive(Debug, Clone)]
135pub struct ChecksumResult {
136    /// The checksum algorithm that was detected or attempted.
137    pub algorithm: ChecksumAlgorithm,
138    /// Whether the stored checksum matches the calculated value.
139    pub valid: bool,
140    /// The checksum value stored in the page's FIL header (bytes 0-3).
141    pub stored_checksum: u32,
142    /// The checksum value calculated from the page data.
143    pub calculated_checksum: u32,
144}
145
146/// Calculate MariaDB full_crc32 checksum.
147///
148/// MariaDB 10.5+ uses a single CRC-32C over bytes `[0..page_size-4)`.
149/// The checksum is stored in the last 4 bytes of the page (NOT in the
150/// FIL header at bytes 0-3 like MySQL).
151fn calculate_mariadb_full_crc32(page_data: &[u8], page_size: usize) -> u32 {
152    crc32c::crc32c(&page_data[0..page_size - 4])
153}
154
155/// Calculate CRC-32C checksum for an InnoDB page.
156///
157/// MySQL computes CRC-32C independently over two disjoint ranges and XORs
158/// the results (see buf_calc_page_crc32 in buf0checksum.cc). Skipped regions:
159/// - bytes 0-3 (stored checksum)
160/// - bytes 26-37 (flush LSN + space ID, written outside buffer pool)
161/// - last 8 bytes (trailer)
162///
163/// Range 1: bytes 4..26 (FIL_PAGE_OFFSET to FIL_PAGE_FILE_FLUSH_LSN)
164/// Range 2: bytes 38..(page_size-8) (FIL_PAGE_DATA to end before trailer)
165fn calculate_crc32c(page_data: &[u8], page_size: usize) -> u32 {
166    let end = page_size - SIZE_FIL_TRAILER;
167
168    // CRC-32C of range 1: bytes 4..26
169    let crc1 = crc32c::crc32c(&page_data[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
170
171    // CRC-32C of range 2: bytes 38..(page_size - 8)
172    let crc2 = crc32c::crc32c(&page_data[FIL_PAGE_DATA..end]);
173
174    // MySQL XORs the two CRC values (not chained/appended)
175    crc1 ^ crc2
176}
177
178/// InnoDB's ut_fold_ulint_pair — the core folding function.
179///
180/// All arithmetic is done in u32 with wrapping, matching the effective behavior
181/// of InnoDB's checksum as implemented by innodb_ruby and verified against real
182/// .ibd files from MySQL 5.0 through 5.6.
183#[inline]
184fn ut_fold_ulint_pair(n1: u32, n2: u32) -> u32 {
185    let step = n1 ^ n2 ^ UT_HASH_RANDOM_MASK2;
186    let step = (step << 8).wrapping_add(n1);
187    let step = step ^ UT_HASH_RANDOM_MASK;
188    step.wrapping_add(n2)
189}
190
191/// Fold a byte sequence using ut_fold_ulint_pair, one byte at a time.
192///
193/// This matches innodb_ruby's fold_enumerator implementation, which processes
194/// each byte individually through fold_pair. Verified against real .ibd files
195/// from MySQL 5.0 and 5.6.
196fn ut_fold_binary(data: &[u8]) -> u32 {
197    let mut fold: u32 = 0;
198    for &byte in data {
199        fold = ut_fold_ulint_pair(fold, byte as u32);
200    }
201    fold
202}
203
204/// Calculate the legacy InnoDB checksum (buf_calc_page_new_checksum).
205///
206/// Used by MySQL < 5.7.7 (innodb_checksum_algorithm=innodb).
207/// Folds two byte ranges and sums the results:
208/// 1. Bytes 4..26 (FIL_PAGE_OFFSET to FIL_PAGE_FILE_FLUSH_LSN)
209/// 2. Bytes 38..(page_size - 8) (FIL_PAGE_DATA to end before trailer)
210fn calculate_innodb_checksum(page_data: &[u8], page_size: usize) -> u32 {
211    let end = page_size - SIZE_FIL_TRAILER;
212
213    let fold1 = ut_fold_binary(&page_data[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
214    let fold2 = ut_fold_binary(&page_data[FIL_PAGE_DATA..end]);
215
216    fold1.wrapping_add(fold2)
217}
218
219/// Validate the LSN consistency between header and trailer.
220///
221/// The low 32 bits of the header LSN should match the trailer LSN field.
222pub fn validate_lsn(page_data: &[u8], page_size: u32) -> bool {
223    let ps = page_size as usize;
224    if page_data.len() < ps {
225        return false;
226    }
227    let header_lsn = BigEndian::read_u64(&page_data[FIL_PAGE_LSN..]);
228    let header_lsn_low32 = (header_lsn & 0xFFFFFFFF) as u32;
229
230    let trailer_offset = ps - SIZE_FIL_TRAILER;
231    let trailer_lsn_low32 = BigEndian::read_u32(&page_data[trailer_offset + 4..]);
232
233    header_lsn_low32 == trailer_lsn_low32
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239    use crate::innodb::vendor::MariaDbFormat;
240
241    #[test]
242    fn test_all_zero_page_is_valid() {
243        let page = vec![0u8; 16384];
244        let result = validate_checksum(&page, 16384, None);
245        assert!(result.valid);
246    }
247
248    #[test]
249    fn test_no_checksum_magic() {
250        let mut page = vec![0u8; 16384];
251        BigEndian::write_u32(&mut page[0..], 0xDEADBEEF);
252        let result = validate_checksum(&page, 16384, None);
253        assert!(result.valid);
254        assert_eq!(result.algorithm, ChecksumAlgorithm::None);
255    }
256
257    #[test]
258    fn test_mariadb_full_crc32() {
259        let ps = 16384usize;
260        let mut page = vec![0xABu8; ps];
261        // Write some data to make it non-trivial
262        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
263        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
264
265        // Calculate and store the full_crc32 checksum in last 4 bytes
266        let crc = crc32c::crc32c(&page[0..ps - 4]);
267        BigEndian::write_u32(&mut page[ps - 4..], crc);
268
269        let vendor = VendorInfo::mariadb(MariaDbFormat::FullCrc32);
270        let result = validate_checksum(&page, ps as u32, Some(&vendor));
271        assert!(result.valid);
272        assert_eq!(result.algorithm, ChecksumAlgorithm::MariaDbFullCrc32);
273    }
274
275    #[test]
276    fn test_mariadb_full_crc32_invalid() {
277        let ps = 16384usize;
278        let mut page = vec![0xABu8; ps];
279        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
280        // Wrong checksum in last 4 bytes
281        BigEndian::write_u32(&mut page[ps - 4..], 0xDEADDEAD);
282
283        let vendor = VendorInfo::mariadb(MariaDbFormat::FullCrc32);
284        let result = validate_checksum(&page, ps as u32, Some(&vendor));
285        assert!(!result.valid);
286        assert_eq!(result.algorithm, ChecksumAlgorithm::MariaDbFullCrc32);
287    }
288
289    #[test]
290    fn test_lsn_validation_matching() {
291        let mut page = vec![0u8; 16384];
292        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0x12345678);
293        BigEndian::write_u32(&mut page[16380..], 0x12345678);
294        assert!(validate_lsn(&page, 16384));
295    }
296
297    #[test]
298    fn test_lsn_validation_mismatch() {
299        let mut page = vec![0u8; 16384];
300        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0x12345678);
301        BigEndian::write_u32(&mut page[16380..], 0xAAAAAAAA);
302        assert!(!validate_lsn(&page, 16384));
303    }
304}