Skip to main content

idb/innodb/
checksum.rs

1use byteorder::{BigEndian, ByteOrder};
2
3use crate::innodb::constants::*;
4
5/// Checksum algorithms used by InnoDB.
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum ChecksumAlgorithm {
8    /// CRC-32C (hardware accelerated, MySQL 5.7.7+ default)
9    Crc32c,
10    /// Legacy InnoDB checksum (buf_calc_page_new_checksum equivalent)
11    InnoDB,
12    /// No checksum (innodb_checksum_algorithm=none)
13    None,
14}
15
16/// Validate a page's checksum.
17///
18/// Returns the detected algorithm and whether the checksum matches.
19pub fn validate_checksum(page_data: &[u8], page_size: u32) -> ChecksumResult {
20    let ps = page_size as usize;
21    if page_data.len() < ps {
22        return ChecksumResult {
23            algorithm: ChecksumAlgorithm::None,
24            valid: false,
25            stored_checksum: 0,
26            calculated_checksum: 0,
27        };
28    }
29
30    let stored_checksum = BigEndian::read_u32(&page_data[FIL_PAGE_SPACE_OR_CHKSUM..]);
31
32    // Check for "none" algorithm (stored checksum is BUF_NO_CHECKSUM_MAGIC = 0xDEADBEEF)
33    if stored_checksum == 0xDEADBEEF {
34        return ChecksumResult {
35            algorithm: ChecksumAlgorithm::None,
36            valid: true,
37            stored_checksum,
38            calculated_checksum: 0xDEADBEEF,
39        };
40    }
41
42    // All zeros page (freshly allocated) - valid with any algorithm
43    if stored_checksum == 0 {
44        let all_zero = page_data[..ps].iter().all(|&b| b == 0);
45        if all_zero {
46            return ChecksumResult {
47                algorithm: ChecksumAlgorithm::None,
48                valid: true,
49                stored_checksum: 0,
50                calculated_checksum: 0,
51            };
52        }
53    }
54
55    // Try CRC-32C first (most common for MySQL 8.0+)
56    let crc_checksum = calculate_crc32c(page_data, ps);
57    if stored_checksum == crc_checksum {
58        return ChecksumResult {
59            algorithm: ChecksumAlgorithm::Crc32c,
60            valid: true,
61            stored_checksum,
62            calculated_checksum: crc_checksum,
63        };
64    }
65
66    // Try legacy InnoDB checksum
67    let innodb_checksum = calculate_innodb_checksum(page_data, ps);
68    if stored_checksum == innodb_checksum {
69        return ChecksumResult {
70            algorithm: ChecksumAlgorithm::InnoDB,
71            valid: true,
72            stored_checksum,
73            calculated_checksum: innodb_checksum,
74        };
75    }
76
77    // Neither matched - report failure with CRC-32C as expected
78    ChecksumResult {
79        algorithm: ChecksumAlgorithm::Crc32c,
80        valid: false,
81        stored_checksum,
82        calculated_checksum: crc_checksum,
83    }
84}
85
86/// Result of a checksum validation.
87#[derive(Debug, Clone)]
88pub struct ChecksumResult {
89    pub algorithm: ChecksumAlgorithm,
90    pub valid: bool,
91    pub stored_checksum: u32,
92    pub calculated_checksum: u32,
93}
94
95/// Calculate CRC-32C checksum for an InnoDB page.
96///
97/// MySQL computes CRC-32C over two disjoint ranges, skipping:
98/// - bytes 0-3 (stored checksum)
99/// - bytes 26-37 (flush LSN + space ID, written outside buffer pool)
100/// - last 8 bytes (trailer)
101///
102/// Range 1: bytes 4..26 (FIL_PAGE_OFFSET to FIL_PAGE_FILE_FLUSH_LSN)
103/// Range 2: bytes 38..(page_size-8) (FIL_PAGE_DATA to end before trailer)
104fn calculate_crc32c(page_data: &[u8], page_size: usize) -> u32 {
105    let end = page_size - SIZE_FIL_TRAILER;
106
107    // CRC-32C of range 1: bytes 4..26
108    let crc = crc32c::crc32c(&page_data[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
109
110    // Continue CRC with range 2: bytes 38..(page_size - 8)
111    crc32c::crc32c_append(crc, &page_data[FIL_PAGE_DATA..end])
112}
113
114/// MySQL's ut_fold_ulint_pair — the core folding function.
115///
116/// Uses u64 to match MySQL's `ulint` (unsigned long) on LP64 platforms.
117/// The final result is masked to 32 bits by the caller.
118#[inline]
119fn ut_fold_ulint_pair(n1: u64, n2: u64) -> u64 {
120    let mask2 = UT_HASH_RANDOM_MASK2 as u64;
121    let mask = UT_HASH_RANDOM_MASK as u64;
122    ((((n1 ^ n2 ^ mask2) << 8).wrapping_add(n1)) ^ mask).wrapping_add(n2)
123}
124
125/// MySQL's ut_fold_binary — fold a byte sequence using ut_fold_ulint_pair.
126///
127/// Processes 8 bytes at a time (two u32 reads), then handles remainder.
128/// Returns u64 (matching MySQL's ulint) to be truncated by caller.
129fn ut_fold_binary(data: &[u8]) -> u64 {
130    let mut fold: u64 = 0;
131    let len = data.len();
132    let aligned_len = len & !7; // round down to multiple of 8
133
134    // Process 8 bytes at a time
135    let mut i = 0;
136    while i < aligned_len {
137        fold = ut_fold_ulint_pair(fold, BigEndian::read_u32(&data[i..]) as u64);
138        i += 4;
139        fold = ut_fold_ulint_pair(fold, BigEndian::read_u32(&data[i..]) as u64);
140        i += 4;
141    }
142
143    // Handle remaining bytes (matches MySQL's switch fallthrough)
144    let remainder = len & 7;
145    match remainder {
146        7 => {
147            fold = ut_fold_ulint_pair(fold, data[i] as u64);
148            i += 1;
149            fold = ut_fold_ulint_pair(fold, data[i] as u64);
150            i += 1;
151            fold = ut_fold_ulint_pair(fold, data[i] as u64);
152            i += 1;
153            fold = ut_fold_ulint_pair(fold, BigEndian::read_u32(&data[i..]) as u64);
154        }
155        6 => {
156            fold = ut_fold_ulint_pair(fold, data[i] as u64);
157            i += 1;
158            fold = ut_fold_ulint_pair(fold, data[i] as u64);
159            i += 1;
160            fold = ut_fold_ulint_pair(fold, BigEndian::read_u32(&data[i..]) as u64);
161        }
162        5 => {
163            fold = ut_fold_ulint_pair(fold, data[i] as u64);
164            i += 1;
165            fold = ut_fold_ulint_pair(fold, BigEndian::read_u32(&data[i..]) as u64);
166        }
167        4 => {
168            fold = ut_fold_ulint_pair(fold, BigEndian::read_u32(&data[i..]) as u64);
169        }
170        3 => {
171            fold = ut_fold_ulint_pair(fold, data[i] as u64);
172            i += 1;
173            fold = ut_fold_ulint_pair(fold, data[i] as u64);
174            i += 1;
175            fold = ut_fold_ulint_pair(fold, data[i] as u64);
176        }
177        2 => {
178            fold = ut_fold_ulint_pair(fold, data[i] as u64);
179            i += 1;
180            fold = ut_fold_ulint_pair(fold, data[i] as u64);
181        }
182        1 => {
183            fold = ut_fold_ulint_pair(fold, data[i] as u64);
184        }
185        _ => {}
186    }
187
188    fold
189}
190
191/// Calculate the legacy InnoDB checksum (buf_calc_page_new_checksum).
192///
193/// This matches the MySQL source exactly:
194/// 1. Fold bytes from FIL_PAGE_OFFSET (4) to FIL_PAGE_FILE_FLUSH_LSN (26)
195/// 2. Fold bytes from FIL_PAGE_DATA (38) to page_size - 8
196/// 3. Sum both results and mask to 32 bits
197fn calculate_innodb_checksum(page_data: &[u8], page_size: usize) -> u32 {
198    let end = page_size - SIZE_FIL_TRAILER;
199
200    // Range 1: bytes 4..26 (FIL_PAGE_OFFSET to FIL_PAGE_FILE_FLUSH_LSN)
201    let fold1 = ut_fold_binary(&page_data[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
202
203    // Range 2: bytes 38..(page_size - 8) (FIL_PAGE_DATA to end of user data)
204    let fold2 = ut_fold_binary(&page_data[FIL_PAGE_DATA..end]);
205
206    // Mask to 32 bits (matching MySQL: checksum = checksum & 0xFFFFFFFF)
207    fold1.wrapping_add(fold2) as u32
208}
209
210/// Validate the LSN consistency between header and trailer.
211///
212/// The low 32 bits of the header LSN should match the trailer LSN field.
213pub fn validate_lsn(page_data: &[u8], page_size: u32) -> bool {
214    let ps = page_size as usize;
215    if page_data.len() < ps {
216        return false;
217    }
218    let header_lsn = BigEndian::read_u64(&page_data[FIL_PAGE_LSN..]);
219    let header_lsn_low32 = (header_lsn & 0xFFFFFFFF) as u32;
220
221    let trailer_offset = ps - SIZE_FIL_TRAILER;
222    let trailer_lsn_low32 = BigEndian::read_u32(&page_data[trailer_offset + 4..]);
223
224    header_lsn_low32 == trailer_lsn_low32
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    #[test]
232    fn test_all_zero_page_is_valid() {
233        let page = vec![0u8; 16384];
234        let result = validate_checksum(&page, 16384);
235        assert!(result.valid);
236    }
237
238    #[test]
239    fn test_no_checksum_magic() {
240        let mut page = vec![0u8; 16384];
241        BigEndian::write_u32(&mut page[0..], 0xDEADBEEF);
242        let result = validate_checksum(&page, 16384);
243        assert!(result.valid);
244        assert_eq!(result.algorithm, ChecksumAlgorithm::None);
245    }
246
247    #[test]
248    fn test_lsn_validation_matching() {
249        let mut page = vec![0u8; 16384];
250        // Write LSN = 0x0000000012345678 at offset 16
251        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0x12345678);
252        // Write low 32 bits at trailer + 4 (offset 16380)
253        BigEndian::write_u32(&mut page[16380..], 0x12345678);
254        assert!(validate_lsn(&page, 16384));
255    }
256
257    #[test]
258    fn test_lsn_validation_mismatch() {
259        let mut page = vec![0u8; 16384];
260        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0x12345678);
261        BigEndian::write_u32(&mut page[16380..], 0xAAAAAAAA);
262        assert!(!validate_lsn(&page, 16384));
263    }
264}