1use std::io::Write;
2
3use colored::Colorize;
4use serde::Serialize;
5
6use crate::cli::{create_progress_bar, wprintln};
7use crate::innodb::checksum::{validate_checksum, validate_lsn};
8use crate::innodb::constants::*;
9use crate::innodb::page::FilHeader;
10use crate::innodb::page_types::PageType;
11use crate::innodb::record::walk_compact_records;
12use crate::innodb::tablespace::Tablespace;
13use crate::IdbError;
14
15pub struct RecoverOptions {
17 pub file: String,
19 pub page: Option<u64>,
21 pub verbose: bool,
23 pub json: bool,
25 pub force: bool,
27 pub page_size: Option<u32>,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
33#[serde(rename_all = "lowercase")]
34enum PageStatus {
35 Intact,
36 Corrupt,
37 Empty,
38 Unreadable,
39}
40
41impl PageStatus {
42 fn label(self) -> &'static str {
43 match self {
44 PageStatus::Intact => "intact",
45 PageStatus::Corrupt => "CORRUPT",
46 PageStatus::Empty => "empty",
47 PageStatus::Unreadable => "UNREADABLE",
48 }
49 }
50}
51
52#[derive(Serialize)]
54struct RecoverReport {
55 file: String,
56 file_size: u64,
57 page_size: u32,
58 #[serde(skip_serializing_if = "Option::is_none")]
59 page_size_source: Option<String>,
60 total_pages: u64,
61 summary: RecoverSummary,
62 recoverable_records: u64,
63 #[serde(skip_serializing_if = "Option::is_none")]
64 force_recoverable_records: Option<u64>,
65 #[serde(skip_serializing_if = "Vec::is_empty")]
66 pages: Vec<PageRecoveryInfo>,
67}
68
69#[derive(Serialize)]
71struct RecoverSummary {
72 intact: u64,
73 corrupt: u64,
74 empty: u64,
75 unreadable: u64,
76}
77
78#[derive(Serialize)]
80struct PageRecoveryInfo {
81 page_number: u64,
82 status: PageStatus,
83 page_type: String,
84 checksum_valid: bool,
85 lsn_valid: bool,
86 lsn: u64,
87 #[serde(skip_serializing_if = "Option::is_none")]
88 record_count: Option<usize>,
89 #[serde(skip_serializing_if = "Vec::is_empty")]
90 records: Vec<RecoveredRecord>,
91}
92
93#[derive(Serialize)]
95struct RecoveredRecord {
96 offset: usize,
97 heap_no: u16,
98 delete_mark: bool,
99 data_hex: String,
100}
101
102struct RecoverStats {
104 file_size: u64,
105 page_size: u32,
106 page_size_source: Option<String>,
107 scan_count: u64,
108 intact: u64,
109 corrupt: u64,
110 empty: u64,
111 unreadable: u64,
112 total_records: u64,
113 corrupt_records: u64,
114 corrupt_page_numbers: Vec<u64>,
115 index_pages_total: u64,
116 index_pages_recoverable: u64,
117}
118
119struct PageAnalysis {
121 page_number: u64,
122 status: PageStatus,
123 page_type: PageType,
124 checksum_valid: bool,
125 lsn_valid: bool,
126 lsn: u64,
127 record_count: Option<usize>,
128 records: Vec<RecoveredRecord>,
129}
130
131fn open_tablespace(
133 file: &str,
134 page_size_override: Option<u32>,
135 writer: &mut dyn Write,
136) -> Result<(Tablespace, Option<String>), IdbError> {
137 if let Some(ps) = page_size_override {
138 let ts = Tablespace::open_with_page_size(file, ps)?;
139 return Ok((ts, Some("user-specified".to_string())));
140 }
141
142 match Tablespace::open(file) {
143 Ok(ts) => Ok((ts, None)),
144 Err(_) => {
145 let candidates = [
147 SIZE_PAGE_16K,
148 SIZE_PAGE_8K,
149 SIZE_PAGE_4K,
150 SIZE_PAGE_32K,
151 SIZE_PAGE_64K,
152 ];
153
154 let file_size = std::fs::metadata(file)
155 .map_err(|e| IdbError::Io(format!("Cannot stat {}: {}", file, e)))?
156 .len();
157
158 for &ps in &candidates {
159 if file_size >= ps as u64 && file_size % ps as u64 == 0 {
160 if let Ok(ts) = Tablespace::open_with_page_size(file, ps) {
161 let _ = wprintln!(
162 writer,
163 "Warning: auto-detect failed, using page size {} (file size divisible)",
164 ps
165 );
166 return Ok((ts, Some(format!("fallback ({})", ps))));
167 }
168 }
169 }
170
171 let ts = Tablespace::open_with_page_size(file, SIZE_PAGE_DEFAULT)?;
173 let _ = wprintln!(
174 writer,
175 "Warning: using default page size {} (no size divides evenly)",
176 SIZE_PAGE_DEFAULT
177 );
178 Ok((ts, Some("default-fallback".to_string())))
179 }
180 }
181}
182
183fn analyze_page(
185 page_data: &[u8],
186 page_num: u64,
187 page_size: u32,
188 force: bool,
189 verbose_json: bool,
190) -> PageAnalysis {
191 if page_data.iter().all(|&b| b == 0) {
193 return PageAnalysis {
194 page_number: page_num,
195 status: PageStatus::Empty,
196 page_type: PageType::Allocated,
197 checksum_valid: true,
198 lsn_valid: true,
199 lsn: 0,
200 record_count: None,
201 records: Vec::new(),
202 };
203 }
204
205 let header = match FilHeader::parse(page_data) {
207 Some(h) => h,
208 None => {
209 return PageAnalysis {
210 page_number: page_num,
211 status: PageStatus::Unreadable,
212 page_type: PageType::Unknown,
213 checksum_valid: false,
214 lsn_valid: false,
215 lsn: 0,
216 record_count: None,
217 records: Vec::new(),
218 };
219 }
220 };
221
222 let csum_result = validate_checksum(page_data, page_size);
223 let lsn_valid = validate_lsn(page_data, page_size);
224 let status = if csum_result.valid && lsn_valid {
225 PageStatus::Intact
226 } else {
227 PageStatus::Corrupt
228 };
229
230 let (record_count, records) =
232 if header.page_type == PageType::Index && (status == PageStatus::Intact || force) {
233 let recs = walk_compact_records(page_data);
234 let count = recs.len();
235 let recovered = if verbose_json {
236 extract_records(page_data, &recs, page_size)
237 } else {
238 Vec::new()
239 };
240 (Some(count), recovered)
241 } else {
242 (None, Vec::new())
243 };
244
245 PageAnalysis {
246 page_number: page_num,
247 status,
248 page_type: header.page_type,
249 checksum_valid: csum_result.valid,
250 lsn_valid,
251 lsn: header.lsn,
252 record_count,
253 records,
254 }
255}
256
257fn to_hex(data: &[u8]) -> String {
259 let mut s = String::with_capacity(data.len() * 2);
260 for &b in data {
261 use std::fmt::Write;
262 let _ = write!(s, "{:02x}", b);
263 }
264 s
265}
266
267fn extract_records(
269 page_data: &[u8],
270 recs: &[crate::innodb::record::RecordInfo],
271 page_size: u32,
272) -> Vec<RecoveredRecord> {
273 let ps = page_size as usize;
274 let data_end = ps - SIZE_FIL_TRAILER;
275
276 recs.iter()
277 .enumerate()
278 .map(|(i, rec)| {
279 let start = rec.offset;
280 let end = if i + 1 < recs.len() {
281 recs[i + 1].offset.saturating_sub(REC_N_NEW_EXTRA_BYTES)
283 } else {
284 data_end
286 };
287
288 let end = end.min(data_end);
289 let data = if start < end && end <= page_data.len() {
290 &page_data[start..end]
291 } else {
292 &[]
293 };
294
295 RecoveredRecord {
296 offset: rec.offset,
297 heap_no: rec.header.heap_no,
298 delete_mark: rec.header.delete_mark,
299 data_hex: to_hex(data),
300 }
301 })
302 .collect()
303}
304
305pub fn execute(opts: &RecoverOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
307 let (mut ts, page_size_source) = open_tablespace(&opts.file, opts.page_size, writer)?;
308 let page_size = ts.page_size();
309 let page_count = ts.page_count();
310 let file_size = ts.file_size();
311
312 let verbose_json = opts.verbose && opts.json;
313
314 let (start_page, end_page) = match opts.page {
316 Some(p) => {
317 if p >= page_count {
318 return Err(IdbError::Parse(format!(
319 "Page {} out of range (tablespace has {} pages)",
320 p, page_count
321 )));
322 }
323 (p, p + 1)
324 }
325 None => (0, page_count),
326 };
327 let scan_count = end_page - start_page;
328
329 let mut analyses = Vec::with_capacity(scan_count as usize);
331 let pb = if !opts.json && scan_count > 1 {
332 Some(create_progress_bar(scan_count, "pages"))
333 } else {
334 None
335 };
336
337 for page_num in start_page..end_page {
338 if let Some(ref pb) = pb {
339 pb.inc(1);
340 }
341
342 let page_data = match ts.read_page(page_num) {
343 Ok(data) => data,
344 Err(_) => {
345 analyses.push(PageAnalysis {
346 page_number: page_num,
347 status: PageStatus::Unreadable,
348 page_type: PageType::Unknown,
349 checksum_valid: false,
350 lsn_valid: false,
351 lsn: 0,
352 record_count: None,
353 records: Vec::new(),
354 });
355 continue;
356 }
357 };
358
359 analyses.push(analyze_page(
360 &page_data,
361 page_num,
362 page_size,
363 opts.force,
364 verbose_json,
365 ));
366 }
367
368 if let Some(pb) = pb {
369 pb.finish_and_clear();
370 }
371
372 let mut intact = 0u64;
374 let mut corrupt = 0u64;
375 let mut empty = 0u64;
376 let mut unreadable = 0u64;
377 let mut total_records = 0u64;
378 let mut corrupt_records = 0u64;
379 let mut corrupt_page_numbers = Vec::new();
380 let mut index_pages_total = 0u64;
381 let mut index_pages_recoverable = 0u64;
382
383 for a in &analyses {
384 match a.status {
385 PageStatus::Intact => intact += 1,
386 PageStatus::Corrupt => {
387 corrupt += 1;
388 corrupt_page_numbers.push(a.page_number);
389 }
390 PageStatus::Empty => empty += 1,
391 PageStatus::Unreadable => unreadable += 1,
392 }
393
394 if a.page_type == PageType::Index {
395 index_pages_total += 1;
396 if a.status == PageStatus::Intact {
397 index_pages_recoverable += 1;
398 }
399 if let Some(count) = a.record_count {
400 if a.status == PageStatus::Intact {
401 total_records += count as u64;
402 } else {
403 corrupt_records += count as u64;
404 }
405 }
406 }
407 }
408
409 if opts.force {
411 for a in &analyses {
412 if a.page_type == PageType::Index
413 && a.status == PageStatus::Corrupt
414 && a.record_count.is_some()
415 {
416 index_pages_recoverable += 1;
417 }
418 }
419 }
420
421 let stats = RecoverStats {
422 file_size,
423 page_size,
424 page_size_source,
425 scan_count,
426 intact,
427 corrupt,
428 empty,
429 unreadable,
430 total_records,
431 corrupt_records,
432 corrupt_page_numbers,
433 index_pages_total,
434 index_pages_recoverable,
435 };
436
437 if opts.json {
438 output_json(opts, &analyses, &stats, writer)
439 } else {
440 output_text(opts, &analyses, &stats, writer)
441 }
442}
443
444fn output_text(
445 opts: &RecoverOptions,
446 analyses: &[PageAnalysis],
447 stats: &RecoverStats,
448 writer: &mut dyn Write,
449) -> Result<(), IdbError> {
450 wprintln!(writer, "Recovery Analysis: {}", opts.file)?;
451 wprintln!(
452 writer,
453 "File size: {} bytes ({} pages x {} bytes)",
454 stats.file_size,
455 stats.scan_count,
456 stats.page_size
457 )?;
458
459 let source_note = match &stats.page_size_source {
460 Some(s) => format!(" ({})", s),
461 None => " (auto-detected)".to_string(),
462 };
463 wprintln!(writer, "Page size: {}{}", stats.page_size, source_note)?;
464 wprintln!(writer)?;
465
466 if opts.verbose {
468 for a in analyses {
469 let status_str = match a.status {
470 PageStatus::Intact => a.status.label().to_string(),
471 PageStatus::Corrupt => format!("{}", a.status.label().red()),
472 PageStatus::Empty => a.status.label().to_string(),
473 PageStatus::Unreadable => format!("{}", a.status.label().red()),
474 };
475
476 let mut line = format!(
477 "Page {:>4}: {:<14} {:<12} LSN={}",
478 a.page_number,
479 a.page_type.name(),
480 status_str,
481 a.lsn,
482 );
483
484 if let Some(count) = a.record_count {
485 line.push_str(&format!(" records={}", count));
486 }
487
488 if a.status == PageStatus::Corrupt {
489 if !a.checksum_valid {
490 line.push_str(" checksum mismatch");
491 }
492 if !a.lsn_valid {
493 line.push_str(" LSN mismatch");
494 }
495 }
496
497 wprintln!(writer, "{}", line)?;
498 }
499 wprintln!(writer)?;
500 }
501
502 wprintln!(writer, "Page Status Summary:")?;
504 wprintln!(writer, " Intact: {:>4} pages", stats.intact)?;
505 if stats.corrupt > 0 {
506 let pages_str = if stats.corrupt_page_numbers.len() <= 10 {
507 let nums: Vec<String> = stats
508 .corrupt_page_numbers
509 .iter()
510 .map(|n| n.to_string())
511 .collect();
512 format!(" (pages {})", nums.join(", "))
513 } else {
514 format!(" ({} pages)", stats.corrupt)
515 };
516 wprintln!(
517 writer,
518 " Corrupt: {:>4} pages{}",
519 format!("{}", stats.corrupt).red(),
520 pages_str
521 )?;
522 } else {
523 wprintln!(writer, " Corrupt: {:>4} pages", stats.corrupt)?;
524 }
525 wprintln!(writer, " Empty: {:>4} pages", stats.empty)?;
526 if stats.unreadable > 0 {
527 wprintln!(
528 writer,
529 " Unreadable: {:>4} pages",
530 format!("{}", stats.unreadable).red()
531 )?;
532 } else {
533 wprintln!(writer, " Unreadable: {:>4} pages", stats.unreadable)?;
534 }
535 wprintln!(writer, " Total: {:>4} pages", stats.scan_count)?;
536 wprintln!(writer)?;
537
538 if stats.index_pages_total > 0 {
539 wprintln!(
540 writer,
541 "Recoverable INDEX Pages: {} of {}",
542 stats.index_pages_recoverable,
543 stats.index_pages_total
544 )?;
545 wprintln!(writer, " Total user records: {}", stats.total_records)?;
546 if stats.corrupt_records > 0 && !opts.force {
547 wprintln!(
548 writer,
549 " Records on corrupt pages: {} (use --force to include)",
550 stats.corrupt_records
551 )?;
552 } else if stats.corrupt_records > 0 {
553 wprintln!(
554 writer,
555 " Records on corrupt pages: {} (included with --force)",
556 stats.corrupt_records
557 )?;
558 }
559 wprintln!(writer)?;
560 }
561
562 let total_non_empty = stats.intact + stats.corrupt + stats.unreadable;
563 if total_non_empty > 0 {
564 let pct = (stats.intact as f64 / total_non_empty as f64) * 100.0;
565 wprintln!(writer, "Overall: {:.1}% of pages intact", pct)?;
566 }
567
568 Ok(())
569}
570
571fn output_json(
572 opts: &RecoverOptions,
573 analyses: &[PageAnalysis],
574 stats: &RecoverStats,
575 writer: &mut dyn Write,
576) -> Result<(), IdbError> {
577 let all_records = stats.total_records + if opts.force { stats.corrupt_records } else { 0 };
578
579 let pages: Vec<PageRecoveryInfo> = if opts.verbose {
580 analyses
581 .iter()
582 .map(|a| PageRecoveryInfo {
583 page_number: a.page_number,
584 status: a.status,
585 page_type: a.page_type.name().to_string(),
586 checksum_valid: a.checksum_valid,
587 lsn_valid: a.lsn_valid,
588 lsn: a.lsn,
589 record_count: a.record_count,
590 records: a
591 .records
592 .iter()
593 .map(|r| RecoveredRecord {
594 offset: r.offset,
595 heap_no: r.heap_no,
596 delete_mark: r.delete_mark,
597 data_hex: r.data_hex.clone(),
598 })
599 .collect(),
600 })
601 .collect()
602 } else {
603 Vec::new()
604 };
605
606 let force_recs = if stats.corrupt_records > 0 && !opts.force {
607 Some(stats.corrupt_records)
608 } else {
609 None
610 };
611
612 let report = RecoverReport {
613 file: opts.file.clone(),
614 file_size: stats.file_size,
615 page_size: stats.page_size,
616 page_size_source: stats.page_size_source.clone(),
617 total_pages: stats.scan_count,
618 summary: RecoverSummary {
619 intact: stats.intact,
620 corrupt: stats.corrupt,
621 empty: stats.empty,
622 unreadable: stats.unreadable,
623 },
624 recoverable_records: all_records,
625 force_recoverable_records: force_recs,
626 pages,
627 };
628
629 let json = serde_json::to_string_pretty(&report)
630 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
631 wprintln!(writer, "{}", json)?;
632
633 Ok(())
634}
635
636#[cfg(test)]
637mod tests {
638 use super::*;
639
640 #[test]
641 fn test_page_status_label() {
642 assert_eq!(PageStatus::Intact.label(), "intact");
643 assert_eq!(PageStatus::Corrupt.label(), "CORRUPT");
644 assert_eq!(PageStatus::Empty.label(), "empty");
645 assert_eq!(PageStatus::Unreadable.label(), "UNREADABLE");
646 }
647
648 #[test]
649 fn test_analyze_empty_page() {
650 let page = vec![0u8; 16384];
651 let result = analyze_page(&page, 0, 16384, false, false);
652 assert_eq!(result.status, PageStatus::Empty);
653 assert_eq!(result.page_type, PageType::Allocated);
654 }
655
656 #[test]
657 fn test_analyze_short_page_is_unreadable() {
658 let page = vec![0xFF; 10];
659 let result = analyze_page(&page, 0, 16384, false, false);
660 assert_eq!(result.status, PageStatus::Unreadable);
661 }
662
663 #[test]
664 fn test_analyze_valid_index_page() {
665 use byteorder::{BigEndian, ByteOrder};
666
667 let mut page = vec![0u8; 16384];
668 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
669 BigEndian::write_u32(&mut page[FIL_PAGE_PREV..], FIL_NULL);
670 BigEndian::write_u32(&mut page[FIL_PAGE_NEXT..], FIL_NULL);
671 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
672 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855); BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
674
675 let trailer = 16384 - SIZE_FIL_TRAILER;
677 BigEndian::write_u32(&mut page[trailer + 4..], (5000u64 & 0xFFFFFFFF) as u32);
678
679 let end = 16384 - SIZE_FIL_TRAILER;
681 let crc1 = crc32c::crc32c(&page[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
682 let crc2 = crc32c::crc32c(&page[FIL_PAGE_DATA..end]);
683 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc1 ^ crc2);
684
685 let result = analyze_page(&page, 1, 16384, false, false);
686 assert_eq!(result.status, PageStatus::Intact);
687 assert_eq!(result.page_type, PageType::Index);
688 assert!(result.record_count.is_some());
689 }
690
691 #[test]
692 fn test_analyze_corrupt_page() {
693 use byteorder::{BigEndian, ByteOrder};
694
695 let mut page = vec![0u8; 16384];
696 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
697 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
698 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
699 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
700 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
702
703 let result = analyze_page(&page, 1, 16384, false, false);
704 assert_eq!(result.status, PageStatus::Corrupt);
705 assert!(result.record_count.is_none());
707 }
708
709 #[test]
710 fn test_analyze_corrupt_page_with_force() {
711 use byteorder::{BigEndian, ByteOrder};
712
713 let mut page = vec![0u8; 16384];
714 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
715 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
716 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
717 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
718 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
719
720 let result = analyze_page(&page, 1, 16384, true, false);
721 assert_eq!(result.status, PageStatus::Corrupt);
722 assert!(result.record_count.is_some());
724 }
725}