Skip to main content

idb/innodb/
corruption.rs

1//! Corruption pattern classification for InnoDB pages.
2//!
3//! Classifies the type of damage on corrupt pages by analyzing the byte
4//! patterns in the data area. This helps DBAs understand the likely cause
5//! of corruption (e.g., disk bitrot, torn writes, zero-fill from firmware).
6
7use byteorder::{BigEndian, ByteOrder};
8use serde::Serialize;
9
10use crate::innodb::checksum::calculate_crc32c;
11use crate::innodb::constants::*;
12
13/// Classification of corruption damage patterns.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
15#[serde(rename_all = "snake_case")]
16pub enum CorruptionPattern {
17    /// >90% of data area is zero bytes — firmware bug or partial write
18    ZeroFill,
19    /// Shannon entropy >7.5 bits/byte — random overwrite or uninitialized memory
20    RandomNoise,
21    /// Valid prefix then zeros/0xFF at sector boundary — interrupted I/O
22    TornWrite,
23    /// Only the FIL header area has invalid data — metadata corruption
24    HeaderOnly,
25    /// Small Hamming distance (<8 bits) between stored and calculated checksum — single-bit errors
26    Bitrot,
27    /// Does not match any known pattern
28    Unknown,
29}
30
31impl CorruptionPattern {
32    /// Human-readable name for the corruption pattern.
33    pub fn name(self) -> &'static str {
34        match self {
35            CorruptionPattern::ZeroFill => "zero-fill",
36            CorruptionPattern::RandomNoise => "random-noise",
37            CorruptionPattern::TornWrite => "torn-write",
38            CorruptionPattern::HeaderOnly => "header-only",
39            CorruptionPattern::Bitrot => "bitrot",
40            CorruptionPattern::Unknown => "unknown",
41        }
42    }
43
44    /// Short description of the likely cause.
45    pub fn description(self) -> &'static str {
46        match self {
47            CorruptionPattern::ZeroFill => {
48                "Data area mostly zeros — possible firmware bug or partial write"
49            }
50            CorruptionPattern::RandomNoise => {
51                "High entropy data — random overwrite or uninitialized memory"
52            }
53            CorruptionPattern::TornWrite => {
54                "Valid prefix followed by zeros at sector boundary — interrupted I/O"
55            }
56            CorruptionPattern::HeaderOnly => {
57                "Only FIL header damaged — metadata corruption, data likely intact"
58            }
59            CorruptionPattern::Bitrot => {
60                "Small number of bit flips — silent data corruption (bitrot)"
61            }
62            CorruptionPattern::Unknown => "Corruption pattern does not match any known category",
63        }
64    }
65}
66
67/// Compute Shannon entropy of a byte slice in bits per byte.
68fn shannon_entropy(data: &[u8]) -> f64 {
69    if data.is_empty() {
70        return 0.0;
71    }
72    let mut counts = [0u64; 256];
73    for &b in data {
74        counts[b as usize] += 1;
75    }
76    let len = data.len() as f64;
77    counts
78        .iter()
79        .filter(|&&c| c > 0)
80        .map(|&c| {
81            let p = c as f64 / len;
82            -p * p.log2()
83        })
84        .sum()
85}
86
87/// Count the number of differing bits between two u32 values.
88fn hamming_distance(a: u32, b: u32) -> u32 {
89    (a ^ b).count_ones()
90}
91
92/// Classify the corruption pattern of a page.
93///
94/// The page must already be known to have an invalid checksum. This function
95/// examines the byte patterns to determine the likely cause of corruption.
96///
97/// Detection heuristics are applied in priority order:
98/// 1. **ZeroFill** -- >90% of the data area is zero bytes
99/// 2. **TornWrite** -- valid prefix then all zeros/0xFF at a 512-byte sector boundary
100/// 3. **Bitrot** -- Hamming distance between stored and calculated checksum is <= 8
101/// 4. **HeaderOnly** -- data area CRC is valid but stored header checksum is wrong
102///    (with Hamming distance > 8, ruling out simple bitrot)
103/// 5. **RandomNoise** -- Shannon entropy of data area exceeds 7.5 bits/byte
104/// 6. **Unknown** -- fallback
105pub fn classify_corruption(page_data: &[u8], page_size: u32) -> CorruptionPattern {
106    let ps = page_size as usize;
107    if page_data.len() < ps || ps <= FIL_PAGE_DATA + SIZE_FIL_TRAILER {
108        return CorruptionPattern::Unknown;
109    }
110
111    let data_start = FIL_PAGE_DATA;
112    let data_end = ps - SIZE_FIL_TRAILER;
113    let data_area = &page_data[data_start..data_end];
114
115    // 1. ZeroFill: >90% of data area is zeros
116    let zero_count = data_area.iter().filter(|&&b| b == 0).count();
117    if zero_count as f64 / data_area.len() as f64 > 0.9 {
118        return CorruptionPattern::ZeroFill;
119    }
120
121    // 2. TornWrite: find a 512-byte sector boundary where valid data transitions
122    //    to all zeros or all 0xFF
123    let sector_size = 512;
124    if ps >= sector_size * 2 {
125        for sector_start in (sector_size..ps).step_by(sector_size) {
126            let sector_end = (sector_start + sector_size).min(ps);
127            let sector = &page_data[sector_start..sector_end];
128            let all_zero = sector.iter().all(|&b| b == 0);
129            let all_ff = sector.iter().all(|&b| b == 0xFF);
130            if all_zero || all_ff {
131                // Verify that earlier sectors have some non-trivial content
132                let prefix = &page_data[..sector_start];
133                let non_zero = prefix.iter().filter(|&&b| b != 0).count();
134                if non_zero > 10 {
135                    // Also verify that all remaining sectors are blank too
136                    let tail = &page_data[sector_start..ps];
137                    let tail_all_zero = tail.iter().all(|&b| b == 0);
138                    let tail_all_ff = tail.iter().all(|&b| b == 0xFF);
139                    if tail_all_zero || tail_all_ff {
140                        return CorruptionPattern::TornWrite;
141                    }
142                }
143            }
144        }
145    }
146
147    let stored = BigEndian::read_u32(&page_data[FIL_PAGE_SPACE_OR_CHKSUM..]);
148    let calculated = calculate_crc32c(page_data, ps);
149
150    // 3. Bitrot: small Hamming distance between stored and calculated checksum.
151    //    Checked before HeaderOnly so that single-bit flips in the checksum
152    //    field are classified as bitrot rather than header-only corruption.
153    if hamming_distance(stored, calculated) <= 8 {
154        return CorruptionPattern::Bitrot;
155    }
156
157    // 4. HeaderOnly: the stored checksum is wrong, but the rest of the page
158    //    is self-consistent. We verify this by checking that the trailer LSN
159    //    matches the header LSN (indicating the data area and trailer are
160    //    intact). We already ruled out small-distance bitrot above.
161    if stored != calculated {
162        let header_lsn = BigEndian::read_u64(&page_data[FIL_PAGE_LSN..]);
163        let header_lsn_low32 = (header_lsn & 0xFFFFFFFF) as u32;
164        let trailer_offset = ps - SIZE_FIL_TRAILER;
165        let trailer_lsn_low32 = BigEndian::read_u32(&page_data[trailer_offset + 4..]);
166        if header_lsn_low32 == trailer_lsn_low32 && header_lsn > 0 {
167            return CorruptionPattern::HeaderOnly;
168        }
169    }
170
171    // 5. RandomNoise: high Shannon entropy in data area
172    if shannon_entropy(data_area) > 7.5 {
173        return CorruptionPattern::RandomNoise;
174    }
175
176    // 6. Fallback
177    CorruptionPattern::Unknown
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183
184    /// Build a page fully populated with non-zero data in the data area,
185    /// with a correct CRC-32C checksum and matching trailer LSN.
186    fn build_valid_page(page_size: u32) -> Vec<u8> {
187        let ps = page_size as usize;
188        let mut page = vec![0u8; ps];
189
190        // FIL header
191        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
192        BigEndian::write_u32(&mut page[FIL_PAGE_PREV..], FIL_NULL);
193        BigEndian::write_u32(&mut page[FIL_PAGE_NEXT..], FIL_NULL);
194        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
195        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855); // INDEX
196        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
197
198        // Fill the ENTIRE data area with non-zero, varied data so ZeroFill
199        // never triggers on test pages that should match other patterns
200        let data_end = ps - SIZE_FIL_TRAILER;
201        for i in FIL_PAGE_DATA..data_end {
202            page[i] = ((i.wrapping_mul(7).wrapping_add(13)) & 0xFF) as u8;
203        }
204
205        // Trailer LSN (low 32 bits of header LSN)
206        let trailer = ps - SIZE_FIL_TRAILER;
207        BigEndian::write_u32(&mut page[trailer + 4..], (5000u64 & 0xFFFFFFFF) as u32);
208
209        // Compute and store correct CRC-32C checksum
210        let crc = calculate_crc32c(&page, ps);
211        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc);
212
213        page
214    }
215
216    #[test]
217    fn test_classify_zero_fill() {
218        let ps = 16384u32;
219        let mut page = vec![0u8; ps as usize];
220        // Set a non-zero header so it's not treated as empty by the caller,
221        // but leave the data area as all zeros
222        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
223        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 100);
224        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
225        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xBAAD);
226
227        let pattern = classify_corruption(&page, ps);
228        assert_eq!(pattern, CorruptionPattern::ZeroFill);
229    }
230
231    #[test]
232    fn test_classify_random_noise() {
233        let ps = 16384u32;
234        let mut page = vec![0u8; ps as usize];
235
236        // Fill entire page with pseudo-random high-entropy data
237        let mut state: u64 = 0xDEADBEEF_CAFEBABE;
238        for byte in page.iter_mut() {
239            state = state
240                .wrapping_mul(6364136223846793005)
241                .wrapping_add(1442695040888963407);
242            *byte = (state >> 33) as u8;
243        }
244
245        // Ensure the stored checksum doesn't match any valid algorithm
246        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0x12345678);
247
248        // Ensure header LSN does NOT match trailer LSN, so HeaderOnly
249        // doesn't trigger before RandomNoise
250        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0xAAAAAAAABBBBBBBB);
251        let trailer = ps as usize - SIZE_FIL_TRAILER;
252        BigEndian::write_u32(&mut page[trailer + 4..], 0xCCCCCCCC);
253
254        let pattern = classify_corruption(&page, ps);
255        assert_eq!(pattern, CorruptionPattern::RandomNoise);
256    }
257
258    #[test]
259    fn test_classify_torn_write() {
260        let ps = 16384u32;
261        let mut page = build_valid_page(ps);
262
263        // Simulate a torn write: keep the first ~75% of the page, zero out
264        // the rest at a 512-byte sector boundary. Using a late break point
265        // ensures the data area is NOT >90% zeros (which would trigger
266        // ZeroFill instead).
267        let break_point = 12288; // 24 sectors = 75% of 16384
268        for byte in page[break_point..].iter_mut() {
269            *byte = 0;
270        }
271
272        // The checksum is now wrong (data changed), and the page has a valid
273        // prefix followed by zeros at a sector boundary
274        let pattern = classify_corruption(&page, ps);
275        assert_eq!(pattern, CorruptionPattern::TornWrite);
276    }
277
278    #[test]
279    fn test_classify_header_only() {
280        let ps = 16384u32;
281        let mut page = build_valid_page(ps);
282
283        // Corrupt only the stored checksum in the header with a value that
284        // has >8 bit Hamming distance from the correct one (so bitrot doesn't
285        // trigger first)
286        let correct_crc = BigEndian::read_u32(&page[FIL_PAGE_SPACE_OR_CHKSUM..]);
287        // XOR with a value that flips many bits
288        BigEndian::write_u32(
289            &mut page[FIL_PAGE_SPACE_OR_CHKSUM..],
290            correct_crc ^ 0xFF00FF00,
291        );
292
293        // Verify precondition: Hamming distance > 8
294        let stored = BigEndian::read_u32(&page[FIL_PAGE_SPACE_OR_CHKSUM..]);
295        let calculated = calculate_crc32c(&page, ps as usize);
296        assert!(
297            hamming_distance(stored, calculated) > 8,
298            "Test setup: Hamming distance must be >8 to avoid bitrot classification"
299        );
300
301        let pattern = classify_corruption(&page, ps);
302        assert_eq!(pattern, CorruptionPattern::HeaderOnly);
303    }
304
305    #[test]
306    fn test_classify_bitrot() {
307        let ps = 16384u32;
308        let mut page = build_valid_page(ps);
309
310        // Flip exactly 1 bit in the stored checksum — Hamming distance = 1
311        let crc = BigEndian::read_u32(&page[FIL_PAGE_SPACE_OR_CHKSUM..]);
312        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc ^ 0x01);
313
314        let pattern = classify_corruption(&page, ps);
315        assert_eq!(pattern, CorruptionPattern::Bitrot);
316    }
317
318    #[test]
319    fn test_classify_bitrot_few_bits() {
320        let ps = 16384u32;
321        let mut page = build_valid_page(ps);
322
323        // Flip 4 bits in the stored checksum — still within bitrot threshold (<=8)
324        let crc = BigEndian::read_u32(&page[FIL_PAGE_SPACE_OR_CHKSUM..]);
325        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc ^ 0x0F);
326
327        let stored = BigEndian::read_u32(&page[FIL_PAGE_SPACE_OR_CHKSUM..]);
328        let calculated = calculate_crc32c(&page, ps as usize);
329        assert!(hamming_distance(stored, calculated) <= 8);
330
331        let pattern = classify_corruption(&page, ps);
332        assert_eq!(pattern, CorruptionPattern::Bitrot);
333    }
334
335    #[test]
336    fn test_classify_unknown() {
337        let ps = 16384u32;
338        // Fill with a uniform byte value — low entropy, not zeros, not random
339        let mut page = vec![0x42u8; ps as usize];
340        // Set a checksum with large Hamming distance from calculated
341        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xBAADF00D);
342        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
343        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 100);
344        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
345
346        // The data area is all 0x42 — only 1 unique byte, so HeaderOnly won't
347        // trigger (requires >4 unique bytes). Entropy is 0, so RandomNoise
348        // won't trigger. Not mostly zeros, not torn write.
349        let pattern = classify_corruption(&page, ps);
350        assert_eq!(pattern, CorruptionPattern::Unknown);
351    }
352
353    #[test]
354    fn test_pattern_name() {
355        assert_eq!(CorruptionPattern::ZeroFill.name(), "zero-fill");
356        assert_eq!(CorruptionPattern::RandomNoise.name(), "random-noise");
357        assert_eq!(CorruptionPattern::TornWrite.name(), "torn-write");
358        assert_eq!(CorruptionPattern::HeaderOnly.name(), "header-only");
359        assert_eq!(CorruptionPattern::Bitrot.name(), "bitrot");
360        assert_eq!(CorruptionPattern::Unknown.name(), "unknown");
361    }
362
363    #[test]
364    fn test_pattern_description() {
365        assert!(CorruptionPattern::ZeroFill.description().contains("zeros"));
366        assert!(CorruptionPattern::RandomNoise
367            .description()
368            .contains("entropy"));
369        assert!(CorruptionPattern::TornWrite
370            .description()
371            .contains("interrupted"));
372        assert!(CorruptionPattern::HeaderOnly
373            .description()
374            .contains("header"));
375        assert!(CorruptionPattern::Bitrot
376            .description()
377            .contains("bit flips"));
378        assert!(CorruptionPattern::Unknown
379            .description()
380            .contains("does not match"));
381    }
382
383    #[test]
384    fn test_shannon_entropy_uniform() {
385        // All same byte — entropy should be 0
386        let data = vec![0xAA; 1000];
387        assert_eq!(shannon_entropy(&data), 0.0);
388    }
389
390    #[test]
391    fn test_shannon_entropy_empty() {
392        assert_eq!(shannon_entropy(&[]), 0.0);
393    }
394
395    #[test]
396    fn test_shannon_entropy_two_values() {
397        // Exactly 50/50 split between two values — entropy should be 1.0
398        let mut data = vec![0u8; 1000];
399        for byte in data[..500].iter_mut() {
400            *byte = 1;
401        }
402        let entropy = shannon_entropy(&data);
403        assert!((entropy - 1.0).abs() < 0.001);
404    }
405
406    #[test]
407    fn test_hamming_distance_identical() {
408        assert_eq!(hamming_distance(0x12345678, 0x12345678), 0);
409    }
410
411    #[test]
412    fn test_hamming_distance_one_bit() {
413        assert_eq!(hamming_distance(0x00000000, 0x00000001), 1);
414    }
415
416    #[test]
417    fn test_hamming_distance_all_bits() {
418        assert_eq!(hamming_distance(0x00000000, 0xFFFFFFFF), 32);
419    }
420
421    #[test]
422    fn test_short_page_returns_unknown() {
423        let page = vec![0xFF; 10];
424        assert_eq!(
425            classify_corruption(&page, 16384),
426            CorruptionPattern::Unknown
427        );
428    }
429}