Skip to main content

idb/innodb/
checksum.rs

1use byteorder::{BigEndian, ByteOrder};
2use crate::innodb::constants::*;
3
4/// Checksum algorithms used by InnoDB.
5#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6pub enum ChecksumAlgorithm {
7    /// CRC-32C (hardware accelerated, MySQL 5.7.7+ default)
8    Crc32c,
9    /// Legacy InnoDB checksum (buf_calc_page_new_checksum equivalent)
10    InnoDB,
11    /// No checksum (innodb_checksum_algorithm=none)
12    None,
13}
14
15/// Validate a page's checksum.
16///
17/// Returns the detected algorithm and whether the checksum matches.
18pub fn validate_checksum(page_data: &[u8], page_size: u32) -> ChecksumResult {
19    let ps = page_size as usize;
20    if page_data.len() < ps {
21        return ChecksumResult {
22            algorithm: ChecksumAlgorithm::None,
23            valid: false,
24            stored_checksum: 0,
25            calculated_checksum: 0,
26        };
27    }
28
29    let stored_checksum = BigEndian::read_u32(&page_data[FIL_PAGE_SPACE_OR_CHKSUM..]);
30
31    // Check for "none" algorithm (stored checksum is BUF_NO_CHECKSUM_MAGIC = 0xDEADBEEF)
32    if stored_checksum == 0xDEADBEEF {
33        return ChecksumResult {
34            algorithm: ChecksumAlgorithm::None,
35            valid: true,
36            stored_checksum,
37            calculated_checksum: 0xDEADBEEF,
38        };
39    }
40
41    // All zeros page (freshly allocated) - valid with any algorithm
42    if stored_checksum == 0 {
43        let all_zero = page_data[..ps].iter().all(|&b| b == 0);
44        if all_zero {
45            return ChecksumResult {
46                algorithm: ChecksumAlgorithm::None,
47                valid: true,
48                stored_checksum: 0,
49                calculated_checksum: 0,
50            };
51        }
52    }
53
54    // Try CRC-32C first (most common for MySQL 8.0+)
55    let crc_checksum = calculate_crc32c(page_data, ps);
56    if stored_checksum == crc_checksum {
57        return ChecksumResult {
58            algorithm: ChecksumAlgorithm::Crc32c,
59            valid: true,
60            stored_checksum,
61            calculated_checksum: crc_checksum,
62        };
63    }
64
65    // Try legacy InnoDB checksum
66    let innodb_checksum = calculate_innodb_checksum(page_data, ps);
67    if stored_checksum == innodb_checksum {
68        return ChecksumResult {
69            algorithm: ChecksumAlgorithm::InnoDB,
70            valid: true,
71            stored_checksum,
72            calculated_checksum: innodb_checksum,
73        };
74    }
75
76    // Neither matched - report failure with CRC-32C as expected
77    ChecksumResult {
78        algorithm: ChecksumAlgorithm::Crc32c,
79        valid: false,
80        stored_checksum,
81        calculated_checksum: crc_checksum,
82    }
83}
84
85/// Result of a checksum validation.
86#[derive(Debug, Clone)]
87pub struct ChecksumResult {
88    pub algorithm: ChecksumAlgorithm,
89    pub valid: bool,
90    pub stored_checksum: u32,
91    pub calculated_checksum: u32,
92}
93
94/// Calculate CRC-32C checksum for an InnoDB page.
95///
96/// MySQL computes CRC-32C independently over two disjoint ranges and XORs
97/// the results (see buf_calc_page_crc32 in buf0checksum.cc). Skipped regions:
98/// - bytes 0-3 (stored checksum)
99/// - bytes 26-37 (flush LSN + space ID, written outside buffer pool)
100/// - last 8 bytes (trailer)
101///
102/// Range 1: bytes 4..26 (FIL_PAGE_OFFSET to FIL_PAGE_FILE_FLUSH_LSN)
103/// Range 2: bytes 38..(page_size-8) (FIL_PAGE_DATA to end before trailer)
104fn calculate_crc32c(page_data: &[u8], page_size: usize) -> u32 {
105    let end = page_size - SIZE_FIL_TRAILER;
106
107    // CRC-32C of range 1: bytes 4..26
108    let crc1 = crc32c::crc32c(&page_data[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
109
110    // CRC-32C of range 2: bytes 38..(page_size - 8)
111    let crc2 = crc32c::crc32c(&page_data[FIL_PAGE_DATA..end]);
112
113    // MySQL XORs the two CRC values (not chained/appended)
114    crc1 ^ crc2
115}
116
117/// InnoDB's ut_fold_ulint_pair — the core folding function.
118///
119/// All arithmetic is done in u32 with wrapping, matching the effective behavior
120/// of InnoDB's checksum as implemented by innodb_ruby and verified against real
121/// .ibd files from MySQL 5.0 through 5.6.
122#[inline]
123fn ut_fold_ulint_pair(n1: u32, n2: u32) -> u32 {
124    let step = n1 ^ n2 ^ UT_HASH_RANDOM_MASK2;
125    let step = (step << 8).wrapping_add(n1);
126    let step = step ^ UT_HASH_RANDOM_MASK;
127    step.wrapping_add(n2)
128}
129
130/// Fold a byte sequence using ut_fold_ulint_pair, one byte at a time.
131///
132/// This matches innodb_ruby's fold_enumerator implementation, which processes
133/// each byte individually through fold_pair. Verified against real .ibd files
134/// from MySQL 5.0 and 5.6.
135fn ut_fold_binary(data: &[u8]) -> u32 {
136    let mut fold: u32 = 0;
137    for &byte in data {
138        fold = ut_fold_ulint_pair(fold, byte as u32);
139    }
140    fold
141}
142
143/// Calculate the legacy InnoDB checksum (buf_calc_page_new_checksum).
144///
145/// Used by MySQL < 5.7.7 (innodb_checksum_algorithm=innodb).
146/// Folds two byte ranges and sums the results:
147/// 1. Bytes 4..26 (FIL_PAGE_OFFSET to FIL_PAGE_FILE_FLUSH_LSN)
148/// 2. Bytes 38..(page_size - 8) (FIL_PAGE_DATA to end before trailer)
149fn calculate_innodb_checksum(page_data: &[u8], page_size: usize) -> u32 {
150    let end = page_size - SIZE_FIL_TRAILER;
151
152    let fold1 = ut_fold_binary(&page_data[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
153    let fold2 = ut_fold_binary(&page_data[FIL_PAGE_DATA..end]);
154
155    fold1.wrapping_add(fold2)
156}
157
158/// Validate the LSN consistency between header and trailer.
159///
160/// The low 32 bits of the header LSN should match the trailer LSN field.
161pub fn validate_lsn(page_data: &[u8], page_size: u32) -> bool {
162    let ps = page_size as usize;
163    if page_data.len() < ps {
164        return false;
165    }
166    let header_lsn = BigEndian::read_u64(&page_data[FIL_PAGE_LSN..]);
167    let header_lsn_low32 = (header_lsn & 0xFFFFFFFF) as u32;
168
169    let trailer_offset = ps - SIZE_FIL_TRAILER;
170    let trailer_lsn_low32 = BigEndian::read_u32(&page_data[trailer_offset + 4..]);
171
172    header_lsn_low32 == trailer_lsn_low32
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178
179    #[test]
180    fn test_all_zero_page_is_valid() {
181        let page = vec![0u8; 16384];
182        let result = validate_checksum(&page, 16384);
183        assert!(result.valid);
184    }
185
186    #[test]
187    fn test_no_checksum_magic() {
188        let mut page = vec![0u8; 16384];
189        BigEndian::write_u32(&mut page[0..], 0xDEADBEEF);
190        let result = validate_checksum(&page, 16384);
191        assert!(result.valid);
192        assert_eq!(result.algorithm, ChecksumAlgorithm::None);
193    }
194
195    #[test]
196    fn test_lsn_validation_matching() {
197        let mut page = vec![0u8; 16384];
198        // Write LSN = 0x0000000012345678 at offset 16
199        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0x12345678);
200        // Write low 32 bits at trailer + 4 (offset 16380)
201        BigEndian::write_u32(&mut page[16380..], 0x12345678);
202        assert!(validate_lsn(&page, 16384));
203    }
204
205    #[test]
206    fn test_lsn_validation_mismatch() {
207        let mut page = vec![0u8; 16384];
208        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0x12345678);
209        BigEndian::write_u32(&mut page[16380..], 0xAAAAAAAA);
210        assert!(!validate_lsn(&page, 16384));
211    }
212}