1use byteorder::{BigEndian, ByteOrder};
8use serde::Serialize;
9
10use crate::innodb::checksum::calculate_crc32c;
11use crate::innodb::constants::*;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
15#[serde(rename_all = "snake_case")]
16pub enum CorruptionPattern {
17 ZeroFill,
19 RandomNoise,
21 TornWrite,
23 HeaderOnly,
25 Bitrot,
27 Unknown,
29}
30
31impl CorruptionPattern {
32 pub fn name(self) -> &'static str {
34 match self {
35 CorruptionPattern::ZeroFill => "zero-fill",
36 CorruptionPattern::RandomNoise => "random-noise",
37 CorruptionPattern::TornWrite => "torn-write",
38 CorruptionPattern::HeaderOnly => "header-only",
39 CorruptionPattern::Bitrot => "bitrot",
40 CorruptionPattern::Unknown => "unknown",
41 }
42 }
43
44 pub fn description(self) -> &'static str {
46 match self {
47 CorruptionPattern::ZeroFill => {
48 "Data area mostly zeros — possible firmware bug or partial write"
49 }
50 CorruptionPattern::RandomNoise => {
51 "High entropy data — random overwrite or uninitialized memory"
52 }
53 CorruptionPattern::TornWrite => {
54 "Valid prefix followed by zeros at sector boundary — interrupted I/O"
55 }
56 CorruptionPattern::HeaderOnly => {
57 "Only FIL header damaged — metadata corruption, data likely intact"
58 }
59 CorruptionPattern::Bitrot => {
60 "Small number of bit flips — silent data corruption (bitrot)"
61 }
62 CorruptionPattern::Unknown => "Corruption pattern does not match any known category",
63 }
64 }
65}
66
67fn shannon_entropy(data: &[u8]) -> f64 {
69 if data.is_empty() {
70 return 0.0;
71 }
72 let mut counts = [0u64; 256];
73 for &b in data {
74 counts[b as usize] += 1;
75 }
76 let len = data.len() as f64;
77 counts
78 .iter()
79 .filter(|&&c| c > 0)
80 .map(|&c| {
81 let p = c as f64 / len;
82 -p * p.log2()
83 })
84 .sum()
85}
86
87fn hamming_distance(a: u32, b: u32) -> u32 {
89 (a ^ b).count_ones()
90}
91
92pub fn classify_corruption(page_data: &[u8], page_size: u32) -> CorruptionPattern {
106 let ps = page_size as usize;
107 if page_data.len() < ps || ps <= FIL_PAGE_DATA + SIZE_FIL_TRAILER {
108 return CorruptionPattern::Unknown;
109 }
110
111 let data_start = FIL_PAGE_DATA;
112 let data_end = ps - SIZE_FIL_TRAILER;
113 let data_area = &page_data[data_start..data_end];
114
115 let zero_count = data_area.iter().filter(|&&b| b == 0).count();
117 if zero_count as f64 / data_area.len() as f64 > 0.9 {
118 return CorruptionPattern::ZeroFill;
119 }
120
121 let sector_size = 512;
124 if ps >= sector_size * 2 {
125 for sector_start in (sector_size..ps).step_by(sector_size) {
126 let sector_end = (sector_start + sector_size).min(ps);
127 let sector = &page_data[sector_start..sector_end];
128 let all_zero = sector.iter().all(|&b| b == 0);
129 let all_ff = sector.iter().all(|&b| b == 0xFF);
130 if all_zero || all_ff {
131 let prefix = &page_data[..sector_start];
133 let non_zero = prefix.iter().filter(|&&b| b != 0).count();
134 if non_zero > 10 {
135 let tail = &page_data[sector_start..ps];
137 let tail_all_zero = tail.iter().all(|&b| b == 0);
138 let tail_all_ff = tail.iter().all(|&b| b == 0xFF);
139 if tail_all_zero || tail_all_ff {
140 return CorruptionPattern::TornWrite;
141 }
142 }
143 }
144 }
145 }
146
147 let stored = BigEndian::read_u32(&page_data[FIL_PAGE_SPACE_OR_CHKSUM..]);
148 let calculated = calculate_crc32c(page_data, ps);
149
150 if hamming_distance(stored, calculated) <= 8 {
154 return CorruptionPattern::Bitrot;
155 }
156
157 if stored != calculated {
162 let header_lsn = BigEndian::read_u64(&page_data[FIL_PAGE_LSN..]);
163 let header_lsn_low32 = (header_lsn & 0xFFFFFFFF) as u32;
164 let trailer_offset = ps - SIZE_FIL_TRAILER;
165 let trailer_lsn_low32 = BigEndian::read_u32(&page_data[trailer_offset + 4..]);
166 if header_lsn_low32 == trailer_lsn_low32 && header_lsn > 0 {
167 return CorruptionPattern::HeaderOnly;
168 }
169 }
170
171 if shannon_entropy(data_area) > 7.5 {
173 return CorruptionPattern::RandomNoise;
174 }
175
176 CorruptionPattern::Unknown
178}
179
180#[cfg(test)]
181mod tests {
182 use super::*;
183
184 fn build_valid_page(page_size: u32) -> Vec<u8> {
187 let ps = page_size as usize;
188 let mut page = vec![0u8; ps];
189
190 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
192 BigEndian::write_u32(&mut page[FIL_PAGE_PREV..], FIL_NULL);
193 BigEndian::write_u32(&mut page[FIL_PAGE_NEXT..], FIL_NULL);
194 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
195 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855); BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
197
198 let data_end = ps - SIZE_FIL_TRAILER;
201 for i in FIL_PAGE_DATA..data_end {
202 page[i] = ((i.wrapping_mul(7).wrapping_add(13)) & 0xFF) as u8;
203 }
204
205 let trailer = ps - SIZE_FIL_TRAILER;
207 BigEndian::write_u32(&mut page[trailer + 4..], (5000u64 & 0xFFFFFFFF) as u32);
208
209 let crc = calculate_crc32c(&page, ps);
211 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc);
212
213 page
214 }
215
216 #[test]
217 fn test_classify_zero_fill() {
218 let ps = 16384u32;
219 let mut page = vec![0u8; ps as usize];
220 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
223 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 100);
224 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
225 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xBAAD);
226
227 let pattern = classify_corruption(&page, ps);
228 assert_eq!(pattern, CorruptionPattern::ZeroFill);
229 }
230
231 #[test]
232 fn test_classify_random_noise() {
233 let ps = 16384u32;
234 let mut page = vec![0u8; ps as usize];
235
236 let mut state: u64 = 0xDEADBEEF_CAFEBABE;
238 for byte in page.iter_mut() {
239 state = state
240 .wrapping_mul(6364136223846793005)
241 .wrapping_add(1442695040888963407);
242 *byte = (state >> 33) as u8;
243 }
244
245 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0x12345678);
247
248 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 0xAAAAAAAABBBBBBBB);
251 let trailer = ps as usize - SIZE_FIL_TRAILER;
252 BigEndian::write_u32(&mut page[trailer + 4..], 0xCCCCCCCC);
253
254 let pattern = classify_corruption(&page, ps);
255 assert_eq!(pattern, CorruptionPattern::RandomNoise);
256 }
257
258 #[test]
259 fn test_classify_torn_write() {
260 let ps = 16384u32;
261 let mut page = build_valid_page(ps);
262
263 let break_point = 12288; for byte in page[break_point..].iter_mut() {
269 *byte = 0;
270 }
271
272 let pattern = classify_corruption(&page, ps);
275 assert_eq!(pattern, CorruptionPattern::TornWrite);
276 }
277
278 #[test]
279 fn test_classify_header_only() {
280 let ps = 16384u32;
281 let mut page = build_valid_page(ps);
282
283 let correct_crc = BigEndian::read_u32(&page[FIL_PAGE_SPACE_OR_CHKSUM..]);
287 BigEndian::write_u32(
289 &mut page[FIL_PAGE_SPACE_OR_CHKSUM..],
290 correct_crc ^ 0xFF00FF00,
291 );
292
293 let stored = BigEndian::read_u32(&page[FIL_PAGE_SPACE_OR_CHKSUM..]);
295 let calculated = calculate_crc32c(&page, ps as usize);
296 assert!(
297 hamming_distance(stored, calculated) > 8,
298 "Test setup: Hamming distance must be >8 to avoid bitrot classification"
299 );
300
301 let pattern = classify_corruption(&page, ps);
302 assert_eq!(pattern, CorruptionPattern::HeaderOnly);
303 }
304
305 #[test]
306 fn test_classify_bitrot() {
307 let ps = 16384u32;
308 let mut page = build_valid_page(ps);
309
310 let crc = BigEndian::read_u32(&page[FIL_PAGE_SPACE_OR_CHKSUM..]);
312 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc ^ 0x01);
313
314 let pattern = classify_corruption(&page, ps);
315 assert_eq!(pattern, CorruptionPattern::Bitrot);
316 }
317
318 #[test]
319 fn test_classify_bitrot_few_bits() {
320 let ps = 16384u32;
321 let mut page = build_valid_page(ps);
322
323 let crc = BigEndian::read_u32(&page[FIL_PAGE_SPACE_OR_CHKSUM..]);
325 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc ^ 0x0F);
326
327 let stored = BigEndian::read_u32(&page[FIL_PAGE_SPACE_OR_CHKSUM..]);
328 let calculated = calculate_crc32c(&page, ps as usize);
329 assert!(hamming_distance(stored, calculated) <= 8);
330
331 let pattern = classify_corruption(&page, ps);
332 assert_eq!(pattern, CorruptionPattern::Bitrot);
333 }
334
335 #[test]
336 fn test_classify_unknown() {
337 let ps = 16384u32;
338 let mut page = vec![0x42u8; ps as usize];
340 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xBAADF00D);
342 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
343 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 100);
344 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
345
346 let pattern = classify_corruption(&page, ps);
350 assert_eq!(pattern, CorruptionPattern::Unknown);
351 }
352
353 #[test]
354 fn test_pattern_name() {
355 assert_eq!(CorruptionPattern::ZeroFill.name(), "zero-fill");
356 assert_eq!(CorruptionPattern::RandomNoise.name(), "random-noise");
357 assert_eq!(CorruptionPattern::TornWrite.name(), "torn-write");
358 assert_eq!(CorruptionPattern::HeaderOnly.name(), "header-only");
359 assert_eq!(CorruptionPattern::Bitrot.name(), "bitrot");
360 assert_eq!(CorruptionPattern::Unknown.name(), "unknown");
361 }
362
363 #[test]
364 fn test_pattern_description() {
365 assert!(CorruptionPattern::ZeroFill.description().contains("zeros"));
366 assert!(CorruptionPattern::RandomNoise
367 .description()
368 .contains("entropy"));
369 assert!(CorruptionPattern::TornWrite
370 .description()
371 .contains("interrupted"));
372 assert!(CorruptionPattern::HeaderOnly
373 .description()
374 .contains("header"));
375 assert!(CorruptionPattern::Bitrot
376 .description()
377 .contains("bit flips"));
378 assert!(CorruptionPattern::Unknown
379 .description()
380 .contains("does not match"));
381 }
382
383 #[test]
384 fn test_shannon_entropy_uniform() {
385 let data = vec![0xAA; 1000];
387 assert_eq!(shannon_entropy(&data), 0.0);
388 }
389
390 #[test]
391 fn test_shannon_entropy_empty() {
392 assert_eq!(shannon_entropy(&[]), 0.0);
393 }
394
395 #[test]
396 fn test_shannon_entropy_two_values() {
397 let mut data = vec![0u8; 1000];
399 for byte in data[..500].iter_mut() {
400 *byte = 1;
401 }
402 let entropy = shannon_entropy(&data);
403 assert!((entropy - 1.0).abs() < 0.001);
404 }
405
406 #[test]
407 fn test_hamming_distance_identical() {
408 assert_eq!(hamming_distance(0x12345678, 0x12345678), 0);
409 }
410
411 #[test]
412 fn test_hamming_distance_one_bit() {
413 assert_eq!(hamming_distance(0x00000000, 0x00000001), 1);
414 }
415
416 #[test]
417 fn test_hamming_distance_all_bits() {
418 assert_eq!(hamming_distance(0x00000000, 0xFFFFFFFF), 32);
419 }
420
421 #[test]
422 fn test_short_page_returns_unknown() {
423 let page = vec![0xFF; 10];
424 assert_eq!(
425 classify_corruption(&page, 16384),
426 CorruptionPattern::Unknown
427 );
428 }
429}