1use std::io::Write;
2
3use colored::Colorize;
4use serde::Serialize;
5
6use crate::cli::{create_progress_bar, wprintln};
7use crate::innodb::checksum::{validate_checksum, validate_lsn};
8use crate::innodb::constants::*;
9use crate::innodb::page::FilHeader;
10use crate::innodb::page_types::PageType;
11use crate::innodb::record::walk_compact_records;
12use crate::innodb::tablespace::Tablespace;
13use crate::IdbError;
14
15pub struct RecoverOptions {
17 pub file: String,
19 pub page: Option<u64>,
21 pub verbose: bool,
23 pub json: bool,
25 pub force: bool,
27 pub page_size: Option<u32>,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
33#[serde(rename_all = "lowercase")]
34enum PageStatus {
35 Intact,
36 Corrupt,
37 Empty,
38 Unreadable,
39}
40
41impl PageStatus {
42 fn label(self) -> &'static str {
43 match self {
44 PageStatus::Intact => "intact",
45 PageStatus::Corrupt => "CORRUPT",
46 PageStatus::Empty => "empty",
47 PageStatus::Unreadable => "UNREADABLE",
48 }
49 }
50}
51
52#[derive(Serialize)]
54struct RecoverReport {
55 file: String,
56 file_size: u64,
57 page_size: u32,
58 #[serde(skip_serializing_if = "Option::is_none")]
59 page_size_source: Option<String>,
60 total_pages: u64,
61 summary: RecoverSummary,
62 recoverable_records: u64,
63 #[serde(skip_serializing_if = "Option::is_none")]
64 force_recoverable_records: Option<u64>,
65 #[serde(skip_serializing_if = "Vec::is_empty")]
66 pages: Vec<PageRecoveryInfo>,
67}
68
69#[derive(Serialize)]
71struct RecoverSummary {
72 intact: u64,
73 corrupt: u64,
74 empty: u64,
75 unreadable: u64,
76}
77
78#[derive(Serialize)]
80struct PageRecoveryInfo {
81 page_number: u64,
82 status: PageStatus,
83 page_type: String,
84 checksum_valid: bool,
85 lsn_valid: bool,
86 lsn: u64,
87 #[serde(skip_serializing_if = "Option::is_none")]
88 record_count: Option<usize>,
89 #[serde(skip_serializing_if = "Vec::is_empty")]
90 records: Vec<RecoveredRecord>,
91}
92
93#[derive(Serialize)]
95struct RecoveredRecord {
96 offset: usize,
97 heap_no: u16,
98 delete_mark: bool,
99 data_hex: String,
100}
101
102struct PageAnalysis {
104 page_number: u64,
105 status: PageStatus,
106 page_type: PageType,
107 checksum_valid: bool,
108 lsn_valid: bool,
109 lsn: u64,
110 record_count: Option<usize>,
111 records: Vec<RecoveredRecord>,
112}
113
114fn open_tablespace(
116 file: &str,
117 page_size_override: Option<u32>,
118 writer: &mut dyn Write,
119) -> Result<(Tablespace, Option<String>), IdbError> {
120 if let Some(ps) = page_size_override {
121 let ts = Tablespace::open_with_page_size(file, ps)?;
122 return Ok((ts, Some("user-specified".to_string())));
123 }
124
125 match Tablespace::open(file) {
126 Ok(ts) => Ok((ts, None)),
127 Err(_) => {
128 let candidates = [
130 SIZE_PAGE_16K,
131 SIZE_PAGE_8K,
132 SIZE_PAGE_4K,
133 SIZE_PAGE_32K,
134 SIZE_PAGE_64K,
135 ];
136
137 let file_size = std::fs::metadata(file)
138 .map_err(|e| IdbError::Io(format!("Cannot stat {}: {}", file, e)))?
139 .len();
140
141 for &ps in &candidates {
142 if file_size >= ps as u64 && file_size % ps as u64 == 0 {
143 if let Ok(ts) = Tablespace::open_with_page_size(file, ps) {
144 let _ = wprintln!(
145 writer,
146 "Warning: auto-detect failed, using page size {} (file size divisible)",
147 ps
148 );
149 return Ok((ts, Some(format!("fallback ({})", ps))));
150 }
151 }
152 }
153
154 let ts = Tablespace::open_with_page_size(file, SIZE_PAGE_DEFAULT)?;
156 let _ = wprintln!(
157 writer,
158 "Warning: using default page size {} (no size divides evenly)",
159 SIZE_PAGE_DEFAULT
160 );
161 Ok((ts, Some("default-fallback".to_string())))
162 }
163 }
164}
165
166fn analyze_page(
168 page_data: &[u8],
169 page_num: u64,
170 page_size: u32,
171 force: bool,
172 verbose_json: bool,
173) -> PageAnalysis {
174 if page_data.iter().all(|&b| b == 0) {
176 return PageAnalysis {
177 page_number: page_num,
178 status: PageStatus::Empty,
179 page_type: PageType::Allocated,
180 checksum_valid: true,
181 lsn_valid: true,
182 lsn: 0,
183 record_count: None,
184 records: Vec::new(),
185 };
186 }
187
188 let header = match FilHeader::parse(page_data) {
190 Some(h) => h,
191 None => {
192 return PageAnalysis {
193 page_number: page_num,
194 status: PageStatus::Unreadable,
195 page_type: PageType::Unknown,
196 checksum_valid: false,
197 lsn_valid: false,
198 lsn: 0,
199 record_count: None,
200 records: Vec::new(),
201 };
202 }
203 };
204
205 let csum_result = validate_checksum(page_data, page_size);
206 let lsn_valid = validate_lsn(page_data, page_size);
207 let status = if csum_result.valid && lsn_valid {
208 PageStatus::Intact
209 } else {
210 PageStatus::Corrupt
211 };
212
213 let (record_count, records) = if header.page_type == PageType::Index
215 && (status == PageStatus::Intact || force)
216 {
217 let recs = walk_compact_records(page_data);
218 let count = recs.len();
219 let recovered = if verbose_json {
220 extract_records(page_data, &recs, page_size)
221 } else {
222 Vec::new()
223 };
224 (Some(count), recovered)
225 } else {
226 (None, Vec::new())
227 };
228
229 PageAnalysis {
230 page_number: page_num,
231 status,
232 page_type: header.page_type,
233 checksum_valid: csum_result.valid,
234 lsn_valid,
235 lsn: header.lsn,
236 record_count,
237 records,
238 }
239}
240
241fn to_hex(data: &[u8]) -> String {
243 let mut s = String::with_capacity(data.len() * 2);
244 for &b in data {
245 use std::fmt::Write;
246 let _ = write!(s, "{:02x}", b);
247 }
248 s
249}
250
251fn extract_records(
253 page_data: &[u8],
254 recs: &[crate::innodb::record::RecordInfo],
255 page_size: u32,
256) -> Vec<RecoveredRecord> {
257 let ps = page_size as usize;
258 let data_end = ps - SIZE_FIL_TRAILER;
259
260 recs.iter()
261 .enumerate()
262 .map(|(i, rec)| {
263 let start = rec.offset;
264 let end = if i + 1 < recs.len() {
265 recs[i + 1].offset.saturating_sub(REC_N_NEW_EXTRA_BYTES)
267 } else {
268 data_end
270 };
271
272 let end = end.min(data_end);
273 let data = if start < end && end <= page_data.len() {
274 &page_data[start..end]
275 } else {
276 &[]
277 };
278
279 RecoveredRecord {
280 offset: rec.offset,
281 heap_no: rec.header.heap_no,
282 delete_mark: rec.header.delete_mark,
283 data_hex: to_hex(data),
284 }
285 })
286 .collect()
287}
288
289pub fn execute(opts: &RecoverOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
291 let (mut ts, page_size_source) = open_tablespace(&opts.file, opts.page_size, writer)?;
292 let page_size = ts.page_size();
293 let page_count = ts.page_count();
294 let file_size = ts.file_size();
295
296 let verbose_json = opts.verbose && opts.json;
297
298 let (start_page, end_page) = match opts.page {
300 Some(p) => {
301 if p >= page_count {
302 return Err(IdbError::Parse(format!(
303 "Page {} out of range (tablespace has {} pages)",
304 p, page_count
305 )));
306 }
307 (p, p + 1)
308 }
309 None => (0, page_count),
310 };
311 let scan_count = end_page - start_page;
312
313 let mut analyses = Vec::with_capacity(scan_count as usize);
315 let pb = if !opts.json && scan_count > 1 {
316 Some(create_progress_bar(scan_count, "pages"))
317 } else {
318 None
319 };
320
321 for page_num in start_page..end_page {
322 if let Some(ref pb) = pb {
323 pb.inc(1);
324 }
325
326 let page_data = match ts.read_page(page_num) {
327 Ok(data) => data,
328 Err(_) => {
329 analyses.push(PageAnalysis {
330 page_number: page_num,
331 status: PageStatus::Unreadable,
332 page_type: PageType::Unknown,
333 checksum_valid: false,
334 lsn_valid: false,
335 lsn: 0,
336 record_count: None,
337 records: Vec::new(),
338 });
339 continue;
340 }
341 };
342
343 analyses.push(analyze_page(
344 &page_data,
345 page_num,
346 page_size,
347 opts.force,
348 verbose_json,
349 ));
350 }
351
352 if let Some(pb) = pb {
353 pb.finish_and_clear();
354 }
355
356 let mut intact = 0u64;
358 let mut corrupt = 0u64;
359 let mut empty = 0u64;
360 let mut unreadable = 0u64;
361 let mut total_records = 0u64;
362 let mut corrupt_records = 0u64;
363 let mut corrupt_page_numbers = Vec::new();
364 let mut index_pages_total = 0u64;
365 let mut index_pages_recoverable = 0u64;
366
367 for a in &analyses {
368 match a.status {
369 PageStatus::Intact => intact += 1,
370 PageStatus::Corrupt => {
371 corrupt += 1;
372 corrupt_page_numbers.push(a.page_number);
373 }
374 PageStatus::Empty => empty += 1,
375 PageStatus::Unreadable => unreadable += 1,
376 }
377
378 if a.page_type == PageType::Index {
379 index_pages_total += 1;
380 if a.status == PageStatus::Intact {
381 index_pages_recoverable += 1;
382 }
383 if let Some(count) = a.record_count {
384 if a.status == PageStatus::Intact {
385 total_records += count as u64;
386 } else {
387 corrupt_records += count as u64;
388 }
389 }
390 }
391 }
392
393 if opts.force {
395 for a in &analyses {
396 if a.page_type == PageType::Index
397 && a.status == PageStatus::Corrupt
398 && a.record_count.is_some()
399 {
400 index_pages_recoverable += 1;
401 }
402 }
403 }
404
405 if opts.json {
406 output_json(
407 opts,
408 &analyses,
409 file_size,
410 page_size,
411 page_size_source,
412 scan_count,
413 intact,
414 corrupt,
415 empty,
416 unreadable,
417 total_records,
418 corrupt_records,
419 writer,
420 )
421 } else {
422 output_text(
423 opts,
424 &analyses,
425 file_size,
426 page_size,
427 page_size_source,
428 scan_count,
429 intact,
430 corrupt,
431 empty,
432 unreadable,
433 total_records,
434 corrupt_records,
435 &corrupt_page_numbers,
436 index_pages_total,
437 index_pages_recoverable,
438 writer,
439 )
440 }
441}
442
443#[allow(clippy::too_many_arguments)]
444fn output_text(
445 opts: &RecoverOptions,
446 analyses: &[PageAnalysis],
447 file_size: u64,
448 page_size: u32,
449 page_size_source: Option<String>,
450 scan_count: u64,
451 intact: u64,
452 corrupt: u64,
453 empty: u64,
454 unreadable: u64,
455 total_records: u64,
456 corrupt_records: u64,
457 corrupt_page_numbers: &[u64],
458 index_pages_total: u64,
459 index_pages_recoverable: u64,
460 writer: &mut dyn Write,
461) -> Result<(), IdbError> {
462 wprintln!(writer, "Recovery Analysis: {}", opts.file)?;
463 wprintln!(
464 writer,
465 "File size: {} bytes ({} pages x {} bytes)",
466 file_size, scan_count, page_size
467 )?;
468
469 let source_note = match &page_size_source {
470 Some(s) => format!(" ({})", s),
471 None => " (auto-detected)".to_string(),
472 };
473 wprintln!(writer, "Page size: {}{}", page_size, source_note)?;
474 wprintln!(writer)?;
475
476 if opts.verbose {
478 for a in analyses {
479 let status_str = match a.status {
480 PageStatus::Intact => a.status.label().to_string(),
481 PageStatus::Corrupt => format!("{}", a.status.label().red()),
482 PageStatus::Empty => a.status.label().to_string(),
483 PageStatus::Unreadable => format!("{}", a.status.label().red()),
484 };
485
486 let mut line = format!(
487 "Page {:>4}: {:<14} {:<12} LSN={}",
488 a.page_number,
489 a.page_type.name(),
490 status_str,
491 a.lsn,
492 );
493
494 if let Some(count) = a.record_count {
495 line.push_str(&format!(" records={}", count));
496 }
497
498 if a.status == PageStatus::Corrupt {
499 if !a.checksum_valid {
500 line.push_str(" checksum mismatch");
501 }
502 if !a.lsn_valid {
503 line.push_str(" LSN mismatch");
504 }
505 }
506
507 wprintln!(writer, "{}", line)?;
508 }
509 wprintln!(writer)?;
510 }
511
512 wprintln!(writer, "Page Status Summary:")?;
514 wprintln!(writer, " Intact: {:>4} pages", intact)?;
515 if corrupt > 0 {
516 let pages_str = if corrupt_page_numbers.len() <= 10 {
517 let nums: Vec<String> = corrupt_page_numbers.iter().map(|n| n.to_string()).collect();
518 format!(" (pages {})", nums.join(", "))
519 } else {
520 format!(" ({} pages)", corrupt)
521 };
522 wprintln!(
523 writer,
524 " Corrupt: {:>4} pages{}",
525 format!("{}", corrupt).red(),
526 pages_str
527 )?;
528 } else {
529 wprintln!(writer, " Corrupt: {:>4} pages", corrupt)?;
530 }
531 wprintln!(writer, " Empty: {:>4} pages", empty)?;
532 if unreadable > 0 {
533 wprintln!(
534 writer,
535 " Unreadable: {:>4} pages",
536 format!("{}", unreadable).red()
537 )?;
538 } else {
539 wprintln!(writer, " Unreadable: {:>4} pages", unreadable)?;
540 }
541 wprintln!(writer, " Total: {:>4} pages", scan_count)?;
542 wprintln!(writer)?;
543
544 if index_pages_total > 0 {
545 wprintln!(
546 writer,
547 "Recoverable INDEX Pages: {} of {}",
548 index_pages_recoverable, index_pages_total
549 )?;
550 wprintln!(writer, " Total user records: {}", total_records)?;
551 if corrupt_records > 0 && !opts.force {
552 wprintln!(
553 writer,
554 " Records on corrupt pages: {} (use --force to include)",
555 corrupt_records
556 )?;
557 } else if corrupt_records > 0 {
558 wprintln!(
559 writer,
560 " Records on corrupt pages: {} (included with --force)",
561 corrupt_records
562 )?;
563 }
564 wprintln!(writer)?;
565 }
566
567 let total_non_empty = intact + corrupt + unreadable;
568 if total_non_empty > 0 {
569 let pct = (intact as f64 / total_non_empty as f64) * 100.0;
570 wprintln!(writer, "Overall: {:.1}% of pages intact", pct)?;
571 }
572
573 Ok(())
574}
575
576#[allow(clippy::too_many_arguments)]
577fn output_json(
578 opts: &RecoverOptions,
579 analyses: &[PageAnalysis],
580 file_size: u64,
581 page_size: u32,
582 page_size_source: Option<String>,
583 scan_count: u64,
584 intact: u64,
585 corrupt: u64,
586 empty: u64,
587 unreadable: u64,
588 total_records: u64,
589 corrupt_records: u64,
590 writer: &mut dyn Write,
591) -> Result<(), IdbError> {
592 let all_records = total_records + if opts.force { corrupt_records } else { 0 };
593
594 let pages: Vec<PageRecoveryInfo> = if opts.verbose {
595 analyses
596 .iter()
597 .map(|a| PageRecoveryInfo {
598 page_number: a.page_number,
599 status: a.status,
600 page_type: a.page_type.name().to_string(),
601 checksum_valid: a.checksum_valid,
602 lsn_valid: a.lsn_valid,
603 lsn: a.lsn,
604 record_count: a.record_count,
605 records: a
606 .records
607 .iter()
608 .map(|r| RecoveredRecord {
609 offset: r.offset,
610 heap_no: r.heap_no,
611 delete_mark: r.delete_mark,
612 data_hex: r.data_hex.clone(),
613 })
614 .collect(),
615 })
616 .collect()
617 } else {
618 Vec::new()
619 };
620
621 let force_recs = if corrupt_records > 0 && !opts.force {
622 Some(corrupt_records)
623 } else {
624 None
625 };
626
627 let report = RecoverReport {
628 file: opts.file.clone(),
629 file_size,
630 page_size,
631 page_size_source,
632 total_pages: scan_count,
633 summary: RecoverSummary {
634 intact,
635 corrupt,
636 empty,
637 unreadable,
638 },
639 recoverable_records: all_records,
640 force_recoverable_records: force_recs,
641 pages,
642 };
643
644 let json = serde_json::to_string_pretty(&report)
645 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
646 wprintln!(writer, "{}", json)?;
647
648 Ok(())
649}
650
651#[cfg(test)]
652mod tests {
653 use super::*;
654
655 #[test]
656 fn test_page_status_label() {
657 assert_eq!(PageStatus::Intact.label(), "intact");
658 assert_eq!(PageStatus::Corrupt.label(), "CORRUPT");
659 assert_eq!(PageStatus::Empty.label(), "empty");
660 assert_eq!(PageStatus::Unreadable.label(), "UNREADABLE");
661 }
662
663 #[test]
664 fn test_analyze_empty_page() {
665 let page = vec![0u8; 16384];
666 let result = analyze_page(&page, 0, 16384, false, false);
667 assert_eq!(result.status, PageStatus::Empty);
668 assert_eq!(result.page_type, PageType::Allocated);
669 }
670
671 #[test]
672 fn test_analyze_short_page_is_unreadable() {
673 let page = vec![0xFF; 10];
674 let result = analyze_page(&page, 0, 16384, false, false);
675 assert_eq!(result.status, PageStatus::Unreadable);
676 }
677
678 #[test]
679 fn test_analyze_valid_index_page() {
680 use byteorder::{BigEndian, ByteOrder};
681
682 let mut page = vec![0u8; 16384];
683 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
684 BigEndian::write_u32(&mut page[FIL_PAGE_PREV..], FIL_NULL);
685 BigEndian::write_u32(&mut page[FIL_PAGE_NEXT..], FIL_NULL);
686 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
687 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855); BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
689
690 let trailer = 16384 - SIZE_FIL_TRAILER;
692 BigEndian::write_u32(&mut page[trailer + 4..], (5000u64 & 0xFFFFFFFF) as u32);
693
694 let end = 16384 - SIZE_FIL_TRAILER;
696 let crc1 = crc32c::crc32c(&page[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
697 let crc2 = crc32c::crc32c(&page[FIL_PAGE_DATA..end]);
698 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc1 ^ crc2);
699
700 let result = analyze_page(&page, 1, 16384, false, false);
701 assert_eq!(result.status, PageStatus::Intact);
702 assert_eq!(result.page_type, PageType::Index);
703 assert!(result.record_count.is_some());
704 }
705
706 #[test]
707 fn test_analyze_corrupt_page() {
708 use byteorder::{BigEndian, ByteOrder};
709
710 let mut page = vec![0u8; 16384];
711 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
712 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
713 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
714 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
715 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
717
718 let result = analyze_page(&page, 1, 16384, false, false);
719 assert_eq!(result.status, PageStatus::Corrupt);
720 assert!(result.record_count.is_none());
722 }
723
724 #[test]
725 fn test_analyze_corrupt_page_with_force() {
726 use byteorder::{BigEndian, ByteOrder};
727
728 let mut page = vec![0u8; 16384];
729 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
730 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
731 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
732 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
733 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
734
735 let result = analyze_page(&page, 1, 16384, true, false);
736 assert_eq!(result.status, PageStatus::Corrupt);
737 assert!(result.record_count.is_some());
739 }
740}