1use std::io::Write;
2
3use colored::Colorize;
4use serde::Serialize;
5
6use crate::cli::{create_progress_bar, wprintln};
7use crate::innodb::checksum::{validate_checksum, validate_lsn};
8use crate::innodb::constants::*;
9use crate::innodb::page::FilHeader;
10use crate::innodb::page_types::PageType;
11use crate::innodb::record::walk_compact_records;
12use crate::innodb::tablespace::Tablespace;
13use crate::IdbError;
14
15pub struct RecoverOptions {
17 pub file: String,
19 pub page: Option<u64>,
21 pub verbose: bool,
23 pub json: bool,
25 pub force: bool,
27 pub page_size: Option<u32>,
29 pub keyring: Option<String>,
31}
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
35#[serde(rename_all = "lowercase")]
36enum PageStatus {
37 Intact,
38 Corrupt,
39 Empty,
40 Unreadable,
41}
42
43impl PageStatus {
44 fn label(self) -> &'static str {
45 match self {
46 PageStatus::Intact => "intact",
47 PageStatus::Corrupt => "CORRUPT",
48 PageStatus::Empty => "empty",
49 PageStatus::Unreadable => "UNREADABLE",
50 }
51 }
52}
53
54#[derive(Serialize)]
56struct RecoverReport {
57 file: String,
58 file_size: u64,
59 page_size: u32,
60 #[serde(skip_serializing_if = "Option::is_none")]
61 page_size_source: Option<String>,
62 total_pages: u64,
63 summary: RecoverSummary,
64 recoverable_records: u64,
65 #[serde(skip_serializing_if = "Option::is_none")]
66 force_recoverable_records: Option<u64>,
67 #[serde(skip_serializing_if = "Vec::is_empty")]
68 pages: Vec<PageRecoveryInfo>,
69}
70
71#[derive(Serialize)]
73struct RecoverSummary {
74 intact: u64,
75 corrupt: u64,
76 empty: u64,
77 unreadable: u64,
78}
79
80#[derive(Serialize)]
82struct PageRecoveryInfo {
83 page_number: u64,
84 status: PageStatus,
85 page_type: String,
86 checksum_valid: bool,
87 lsn_valid: bool,
88 lsn: u64,
89 #[serde(skip_serializing_if = "Option::is_none")]
90 record_count: Option<usize>,
91 #[serde(skip_serializing_if = "Vec::is_empty")]
92 records: Vec<RecoveredRecord>,
93}
94
95#[derive(Serialize)]
97struct RecoveredRecord {
98 offset: usize,
99 heap_no: u16,
100 delete_mark: bool,
101 data_hex: String,
102}
103
104struct RecoverStats {
106 file_size: u64,
107 page_size: u32,
108 page_size_source: Option<String>,
109 scan_count: u64,
110 intact: u64,
111 corrupt: u64,
112 empty: u64,
113 unreadable: u64,
114 total_records: u64,
115 corrupt_records: u64,
116 corrupt_page_numbers: Vec<u64>,
117 index_pages_total: u64,
118 index_pages_recoverable: u64,
119}
120
121struct PageAnalysis {
123 page_number: u64,
124 status: PageStatus,
125 page_type: PageType,
126 checksum_valid: bool,
127 lsn_valid: bool,
128 lsn: u64,
129 record_count: Option<usize>,
130 records: Vec<RecoveredRecord>,
131}
132
133fn open_tablespace(
135 file: &str,
136 page_size_override: Option<u32>,
137 writer: &mut dyn Write,
138) -> Result<(Tablespace, Option<String>), IdbError> {
139 if let Some(ps) = page_size_override {
140 let ts = Tablespace::open_with_page_size(file, ps)?;
141 return Ok((ts, Some("user-specified".to_string())));
142 }
143
144 match Tablespace::open(file) {
145 Ok(ts) => Ok((ts, None)),
146 Err(_) => {
147 let candidates = [
149 SIZE_PAGE_16K,
150 SIZE_PAGE_8K,
151 SIZE_PAGE_4K,
152 SIZE_PAGE_32K,
153 SIZE_PAGE_64K,
154 ];
155
156 let file_size = std::fs::metadata(file)
157 .map_err(|e| IdbError::Io(format!("Cannot stat {}: {}", file, e)))?
158 .len();
159
160 for &ps in &candidates {
161 if file_size >= ps as u64 && file_size % ps as u64 == 0 {
162 if let Ok(ts) = Tablespace::open_with_page_size(file, ps) {
163 let _ = wprintln!(
164 writer,
165 "Warning: auto-detect failed, using page size {} (file size divisible)",
166 ps
167 );
168 return Ok((ts, Some(format!("fallback ({})", ps))));
169 }
170 }
171 }
172
173 let ts = Tablespace::open_with_page_size(file, SIZE_PAGE_DEFAULT)?;
175 let _ = wprintln!(
176 writer,
177 "Warning: using default page size {} (no size divides evenly)",
178 SIZE_PAGE_DEFAULT
179 );
180 Ok((ts, Some("default-fallback".to_string())))
181 }
182 }
183}
184
185fn analyze_page(
187 page_data: &[u8],
188 page_num: u64,
189 page_size: u32,
190 force: bool,
191 verbose_json: bool,
192) -> PageAnalysis {
193 if page_data.iter().all(|&b| b == 0) {
195 return PageAnalysis {
196 page_number: page_num,
197 status: PageStatus::Empty,
198 page_type: PageType::Allocated,
199 checksum_valid: true,
200 lsn_valid: true,
201 lsn: 0,
202 record_count: None,
203 records: Vec::new(),
204 };
205 }
206
207 let header = match FilHeader::parse(page_data) {
209 Some(h) => h,
210 None => {
211 return PageAnalysis {
212 page_number: page_num,
213 status: PageStatus::Unreadable,
214 page_type: PageType::Unknown,
215 checksum_valid: false,
216 lsn_valid: false,
217 lsn: 0,
218 record_count: None,
219 records: Vec::new(),
220 };
221 }
222 };
223
224 let csum_result = validate_checksum(page_data, page_size, None);
225 let lsn_valid = validate_lsn(page_data, page_size);
226 let status = if csum_result.valid && lsn_valid {
227 PageStatus::Intact
228 } else {
229 PageStatus::Corrupt
230 };
231
232 let (record_count, records) =
234 if header.page_type == PageType::Index && (status == PageStatus::Intact || force) {
235 let recs = walk_compact_records(page_data);
236 let count = recs.len();
237 let recovered = if verbose_json {
238 extract_records(page_data, &recs, page_size)
239 } else {
240 Vec::new()
241 };
242 (Some(count), recovered)
243 } else {
244 (None, Vec::new())
245 };
246
247 PageAnalysis {
248 page_number: page_num,
249 status,
250 page_type: header.page_type,
251 checksum_valid: csum_result.valid,
252 lsn_valid,
253 lsn: header.lsn,
254 record_count,
255 records,
256 }
257}
258
259fn to_hex(data: &[u8]) -> String {
261 let mut s = String::with_capacity(data.len() * 2);
262 for &b in data {
263 use std::fmt::Write;
264 let _ = write!(s, "{:02x}", b);
265 }
266 s
267}
268
269fn extract_records(
271 page_data: &[u8],
272 recs: &[crate::innodb::record::RecordInfo],
273 page_size: u32,
274) -> Vec<RecoveredRecord> {
275 let ps = page_size as usize;
276 let data_end = ps - SIZE_FIL_TRAILER;
277
278 recs.iter()
279 .enumerate()
280 .map(|(i, rec)| {
281 let start = rec.offset;
282 let end = if i + 1 < recs.len() {
283 recs[i + 1].offset.saturating_sub(REC_N_NEW_EXTRA_BYTES)
285 } else {
286 data_end
288 };
289
290 let end = end.min(data_end);
291 let data = if start < end && end <= page_data.len() {
292 &page_data[start..end]
293 } else {
294 &[]
295 };
296
297 RecoveredRecord {
298 offset: rec.offset,
299 heap_no: rec.header.heap_no,
300 delete_mark: rec.header.delete_mark,
301 data_hex: to_hex(data),
302 }
303 })
304 .collect()
305}
306
307pub fn execute(opts: &RecoverOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
309 let (mut ts, page_size_source) = open_tablespace(&opts.file, opts.page_size, writer)?;
310
311 if let Some(ref keyring_path) = opts.keyring {
312 crate::cli::setup_decryption(&mut ts, keyring_path)?;
313 }
314
315 let page_size = ts.page_size();
316 let page_count = ts.page_count();
317 let file_size = ts.file_size();
318
319 let verbose_json = opts.verbose && opts.json;
320
321 let (start_page, end_page) = match opts.page {
323 Some(p) => {
324 if p >= page_count {
325 return Err(IdbError::Parse(format!(
326 "Page {} out of range (tablespace has {} pages)",
327 p, page_count
328 )));
329 }
330 (p, p + 1)
331 }
332 None => (0, page_count),
333 };
334 let scan_count = end_page - start_page;
335
336 let mut analyses = Vec::with_capacity(scan_count as usize);
338 let pb = if !opts.json && scan_count > 1 {
339 Some(create_progress_bar(scan_count, "pages"))
340 } else {
341 None
342 };
343
344 for page_num in start_page..end_page {
345 if let Some(ref pb) = pb {
346 pb.inc(1);
347 }
348
349 let page_data = match ts.read_page(page_num) {
350 Ok(data) => data,
351 Err(_) => {
352 analyses.push(PageAnalysis {
353 page_number: page_num,
354 status: PageStatus::Unreadable,
355 page_type: PageType::Unknown,
356 checksum_valid: false,
357 lsn_valid: false,
358 lsn: 0,
359 record_count: None,
360 records: Vec::new(),
361 });
362 continue;
363 }
364 };
365
366 analyses.push(analyze_page(
367 &page_data,
368 page_num,
369 page_size,
370 opts.force,
371 verbose_json,
372 ));
373 }
374
375 if let Some(pb) = pb {
376 pb.finish_and_clear();
377 }
378
379 let mut intact = 0u64;
381 let mut corrupt = 0u64;
382 let mut empty = 0u64;
383 let mut unreadable = 0u64;
384 let mut total_records = 0u64;
385 let mut corrupt_records = 0u64;
386 let mut corrupt_page_numbers = Vec::new();
387 let mut index_pages_total = 0u64;
388 let mut index_pages_recoverable = 0u64;
389
390 for a in &analyses {
391 match a.status {
392 PageStatus::Intact => intact += 1,
393 PageStatus::Corrupt => {
394 corrupt += 1;
395 corrupt_page_numbers.push(a.page_number);
396 }
397 PageStatus::Empty => empty += 1,
398 PageStatus::Unreadable => unreadable += 1,
399 }
400
401 if a.page_type == PageType::Index {
402 index_pages_total += 1;
403 if a.status == PageStatus::Intact {
404 index_pages_recoverable += 1;
405 }
406 if let Some(count) = a.record_count {
407 if a.status == PageStatus::Intact {
408 total_records += count as u64;
409 } else {
410 corrupt_records += count as u64;
411 }
412 }
413 }
414 }
415
416 if opts.force {
418 for a in &analyses {
419 if a.page_type == PageType::Index
420 && a.status == PageStatus::Corrupt
421 && a.record_count.is_some()
422 {
423 index_pages_recoverable += 1;
424 }
425 }
426 }
427
428 let stats = RecoverStats {
429 file_size,
430 page_size,
431 page_size_source,
432 scan_count,
433 intact,
434 corrupt,
435 empty,
436 unreadable,
437 total_records,
438 corrupt_records,
439 corrupt_page_numbers,
440 index_pages_total,
441 index_pages_recoverable,
442 };
443
444 if opts.json {
445 output_json(opts, &analyses, &stats, writer)
446 } else {
447 output_text(opts, &analyses, &stats, writer)
448 }
449}
450
451fn output_text(
452 opts: &RecoverOptions,
453 analyses: &[PageAnalysis],
454 stats: &RecoverStats,
455 writer: &mut dyn Write,
456) -> Result<(), IdbError> {
457 wprintln!(writer, "Recovery Analysis: {}", opts.file)?;
458 wprintln!(
459 writer,
460 "File size: {} bytes ({} pages x {} bytes)",
461 stats.file_size,
462 stats.scan_count,
463 stats.page_size
464 )?;
465
466 let source_note = match &stats.page_size_source {
467 Some(s) => format!(" ({})", s),
468 None => " (auto-detected)".to_string(),
469 };
470 wprintln!(writer, "Page size: {}{}", stats.page_size, source_note)?;
471 wprintln!(writer)?;
472
473 if opts.verbose {
475 for a in analyses {
476 let status_str = match a.status {
477 PageStatus::Intact => a.status.label().to_string(),
478 PageStatus::Corrupt => format!("{}", a.status.label().red()),
479 PageStatus::Empty => a.status.label().to_string(),
480 PageStatus::Unreadable => format!("{}", a.status.label().red()),
481 };
482
483 let mut line = format!(
484 "Page {:>4}: {:<14} {:<12} LSN={}",
485 a.page_number,
486 a.page_type.name(),
487 status_str,
488 a.lsn,
489 );
490
491 if let Some(count) = a.record_count {
492 line.push_str(&format!(" records={}", count));
493 }
494
495 if a.status == PageStatus::Corrupt {
496 if !a.checksum_valid {
497 line.push_str(" checksum mismatch");
498 }
499 if !a.lsn_valid {
500 line.push_str(" LSN mismatch");
501 }
502 }
503
504 wprintln!(writer, "{}", line)?;
505 }
506 wprintln!(writer)?;
507 }
508
509 wprintln!(writer, "Page Status Summary:")?;
511 wprintln!(writer, " Intact: {:>4} pages", stats.intact)?;
512 if stats.corrupt > 0 {
513 let pages_str = if stats.corrupt_page_numbers.len() <= 10 {
514 let nums: Vec<String> = stats
515 .corrupt_page_numbers
516 .iter()
517 .map(|n| n.to_string())
518 .collect();
519 format!(" (pages {})", nums.join(", "))
520 } else {
521 format!(" ({} pages)", stats.corrupt)
522 };
523 wprintln!(
524 writer,
525 " Corrupt: {:>4} pages{}",
526 format!("{}", stats.corrupt).red(),
527 pages_str
528 )?;
529 } else {
530 wprintln!(writer, " Corrupt: {:>4} pages", stats.corrupt)?;
531 }
532 wprintln!(writer, " Empty: {:>4} pages", stats.empty)?;
533 if stats.unreadable > 0 {
534 wprintln!(
535 writer,
536 " Unreadable: {:>4} pages",
537 format!("{}", stats.unreadable).red()
538 )?;
539 } else {
540 wprintln!(writer, " Unreadable: {:>4} pages", stats.unreadable)?;
541 }
542 wprintln!(writer, " Total: {:>4} pages", stats.scan_count)?;
543 wprintln!(writer)?;
544
545 if stats.index_pages_total > 0 {
546 wprintln!(
547 writer,
548 "Recoverable INDEX Pages: {} of {}",
549 stats.index_pages_recoverable,
550 stats.index_pages_total
551 )?;
552 wprintln!(writer, " Total user records: {}", stats.total_records)?;
553 if stats.corrupt_records > 0 && !opts.force {
554 wprintln!(
555 writer,
556 " Records on corrupt pages: {} (use --force to include)",
557 stats.corrupt_records
558 )?;
559 } else if stats.corrupt_records > 0 {
560 wprintln!(
561 writer,
562 " Records on corrupt pages: {} (included with --force)",
563 stats.corrupt_records
564 )?;
565 }
566 wprintln!(writer)?;
567 }
568
569 let total_non_empty = stats.intact + stats.corrupt + stats.unreadable;
570 if total_non_empty > 0 {
571 let pct = (stats.intact as f64 / total_non_empty as f64) * 100.0;
572 wprintln!(writer, "Overall: {:.1}% of pages intact", pct)?;
573 }
574
575 Ok(())
576}
577
578fn output_json(
579 opts: &RecoverOptions,
580 analyses: &[PageAnalysis],
581 stats: &RecoverStats,
582 writer: &mut dyn Write,
583) -> Result<(), IdbError> {
584 let all_records = stats.total_records + if opts.force { stats.corrupt_records } else { 0 };
585
586 let pages: Vec<PageRecoveryInfo> = if opts.verbose {
587 analyses
588 .iter()
589 .map(|a| PageRecoveryInfo {
590 page_number: a.page_number,
591 status: a.status,
592 page_type: a.page_type.name().to_string(),
593 checksum_valid: a.checksum_valid,
594 lsn_valid: a.lsn_valid,
595 lsn: a.lsn,
596 record_count: a.record_count,
597 records: a
598 .records
599 .iter()
600 .map(|r| RecoveredRecord {
601 offset: r.offset,
602 heap_no: r.heap_no,
603 delete_mark: r.delete_mark,
604 data_hex: r.data_hex.clone(),
605 })
606 .collect(),
607 })
608 .collect()
609 } else {
610 Vec::new()
611 };
612
613 let force_recs = if stats.corrupt_records > 0 && !opts.force {
614 Some(stats.corrupt_records)
615 } else {
616 None
617 };
618
619 let report = RecoverReport {
620 file: opts.file.clone(),
621 file_size: stats.file_size,
622 page_size: stats.page_size,
623 page_size_source: stats.page_size_source.clone(),
624 total_pages: stats.scan_count,
625 summary: RecoverSummary {
626 intact: stats.intact,
627 corrupt: stats.corrupt,
628 empty: stats.empty,
629 unreadable: stats.unreadable,
630 },
631 recoverable_records: all_records,
632 force_recoverable_records: force_recs,
633 pages,
634 };
635
636 let json = serde_json::to_string_pretty(&report)
637 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
638 wprintln!(writer, "{}", json)?;
639
640 Ok(())
641}
642
643#[cfg(test)]
644mod tests {
645 use super::*;
646
647 #[test]
648 fn test_page_status_label() {
649 assert_eq!(PageStatus::Intact.label(), "intact");
650 assert_eq!(PageStatus::Corrupt.label(), "CORRUPT");
651 assert_eq!(PageStatus::Empty.label(), "empty");
652 assert_eq!(PageStatus::Unreadable.label(), "UNREADABLE");
653 }
654
655 #[test]
656 fn test_analyze_empty_page() {
657 let page = vec![0u8; 16384];
658 let result = analyze_page(&page, 0, 16384, false, false);
659 assert_eq!(result.status, PageStatus::Empty);
660 assert_eq!(result.page_type, PageType::Allocated);
661 }
662
663 #[test]
664 fn test_analyze_short_page_is_unreadable() {
665 let page = vec![0xFF; 10];
666 let result = analyze_page(&page, 0, 16384, false, false);
667 assert_eq!(result.status, PageStatus::Unreadable);
668 }
669
670 #[test]
671 fn test_analyze_valid_index_page() {
672 use byteorder::{BigEndian, ByteOrder};
673
674 let mut page = vec![0u8; 16384];
675 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
676 BigEndian::write_u32(&mut page[FIL_PAGE_PREV..], FIL_NULL);
677 BigEndian::write_u32(&mut page[FIL_PAGE_NEXT..], FIL_NULL);
678 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
679 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855); BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
681
682 let trailer = 16384 - SIZE_FIL_TRAILER;
684 BigEndian::write_u32(&mut page[trailer + 4..], (5000u64 & 0xFFFFFFFF) as u32);
685
686 let end = 16384 - SIZE_FIL_TRAILER;
688 let crc1 = crc32c::crc32c(&page[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
689 let crc2 = crc32c::crc32c(&page[FIL_PAGE_DATA..end]);
690 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc1 ^ crc2);
691
692 let result = analyze_page(&page, 1, 16384, false, false);
693 assert_eq!(result.status, PageStatus::Intact);
694 assert_eq!(result.page_type, PageType::Index);
695 assert!(result.record_count.is_some());
696 }
697
698 #[test]
699 fn test_analyze_corrupt_page() {
700 use byteorder::{BigEndian, ByteOrder};
701
702 let mut page = vec![0u8; 16384];
703 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
704 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
705 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
706 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
707 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
709
710 let result = analyze_page(&page, 1, 16384, false, false);
711 assert_eq!(result.status, PageStatus::Corrupt);
712 assert!(result.record_count.is_none());
714 }
715
716 #[test]
717 fn test_analyze_corrupt_page_with_force() {
718 use byteorder::{BigEndian, ByteOrder};
719
720 let mut page = vec![0u8; 16384];
721 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
722 BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
723 BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
724 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
725 BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
726
727 let result = analyze_page(&page, 1, 16384, true, false);
728 assert_eq!(result.status, PageStatus::Corrupt);
729 assert!(result.record_count.is_some());
731 }
732}