Skip to main content

idb/cli/
recover.rs

1use std::io::Write;
2
3use colored::Colorize;
4use serde::Serialize;
5
6use crate::cli::{create_progress_bar, wprintln};
7use crate::innodb::checksum::{validate_checksum, validate_lsn};
8use crate::innodb::constants::*;
9use crate::innodb::page::FilHeader;
10use crate::innodb::page_types::PageType;
11use crate::innodb::record::walk_compact_records;
12use crate::innodb::tablespace::Tablespace;
13use crate::IdbError;
14
15/// Options for the `inno recover` subcommand.
16pub struct RecoverOptions {
17    /// Path to the InnoDB tablespace file (.ibd).
18    pub file: String,
19    /// Analyze a single page instead of full scan.
20    pub page: Option<u64>,
21    /// Show per-page details.
22    pub verbose: bool,
23    /// Emit output as JSON.
24    pub json: bool,
25    /// Extract records from corrupt pages with valid headers.
26    pub force: bool,
27    /// Override the auto-detected page size.
28    pub page_size: Option<u32>,
29}
30
31/// Page integrity status.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
33#[serde(rename_all = "lowercase")]
34enum PageStatus {
35    Intact,
36    Corrupt,
37    Empty,
38    Unreadable,
39}
40
41impl PageStatus {
42    fn label(self) -> &'static str {
43        match self {
44            PageStatus::Intact => "intact",
45            PageStatus::Corrupt => "CORRUPT",
46            PageStatus::Empty => "empty",
47            PageStatus::Unreadable => "UNREADABLE",
48        }
49    }
50}
51
52/// Top-level JSON output for the recovery report.
53#[derive(Serialize)]
54struct RecoverReport {
55    file: String,
56    file_size: u64,
57    page_size: u32,
58    #[serde(skip_serializing_if = "Option::is_none")]
59    page_size_source: Option<String>,
60    total_pages: u64,
61    summary: RecoverSummary,
62    recoverable_records: u64,
63    #[serde(skip_serializing_if = "Option::is_none")]
64    force_recoverable_records: Option<u64>,
65    #[serde(skip_serializing_if = "Vec::is_empty")]
66    pages: Vec<PageRecoveryInfo>,
67}
68
69/// Status counts by category.
70#[derive(Serialize)]
71struct RecoverSummary {
72    intact: u64,
73    corrupt: u64,
74    empty: u64,
75    unreadable: u64,
76}
77
78/// Per-page recovery info for JSON output.
79#[derive(Serialize)]
80struct PageRecoveryInfo {
81    page_number: u64,
82    status: PageStatus,
83    page_type: String,
84    checksum_valid: bool,
85    lsn_valid: bool,
86    lsn: u64,
87    #[serde(skip_serializing_if = "Option::is_none")]
88    record_count: Option<usize>,
89    #[serde(skip_serializing_if = "Vec::is_empty")]
90    records: Vec<RecoveredRecord>,
91}
92
93/// A single recovered record for verbose JSON output.
94#[derive(Serialize)]
95struct RecoveredRecord {
96    offset: usize,
97    heap_no: u16,
98    delete_mark: bool,
99    data_hex: String,
100}
101
102/// Computed statistics from page analysis, used by output functions.
103struct RecoverStats {
104    file_size: u64,
105    page_size: u32,
106    page_size_source: Option<String>,
107    scan_count: u64,
108    intact: u64,
109    corrupt: u64,
110    empty: u64,
111    unreadable: u64,
112    total_records: u64,
113    corrupt_records: u64,
114    corrupt_page_numbers: Vec<u64>,
115    index_pages_total: u64,
116    index_pages_recoverable: u64,
117}
118
119/// Internal per-page analysis result.
120struct PageAnalysis {
121    page_number: u64,
122    status: PageStatus,
123    page_type: PageType,
124    checksum_valid: bool,
125    lsn_valid: bool,
126    lsn: u64,
127    record_count: Option<usize>,
128    records: Vec<RecoveredRecord>,
129}
130
131/// Try to open the tablespace, with smart page size fallback when page 0 is damaged.
132fn open_tablespace(
133    file: &str,
134    page_size_override: Option<u32>,
135    writer: &mut dyn Write,
136) -> Result<(Tablespace, Option<String>), IdbError> {
137    if let Some(ps) = page_size_override {
138        let ts = Tablespace::open_with_page_size(file, ps)?;
139        return Ok((ts, Some("user-specified".to_string())));
140    }
141
142    match Tablespace::open(file) {
143        Ok(ts) => Ok((ts, None)),
144        Err(_) => {
145            // Page 0 may be corrupt — try common page sizes
146            let candidates = [
147                SIZE_PAGE_16K,
148                SIZE_PAGE_8K,
149                SIZE_PAGE_4K,
150                SIZE_PAGE_32K,
151                SIZE_PAGE_64K,
152            ];
153
154            let file_size = std::fs::metadata(file)
155                .map_err(|e| IdbError::Io(format!("Cannot stat {}: {}", file, e)))?
156                .len();
157
158            for &ps in &candidates {
159                if file_size >= ps as u64 && file_size % ps as u64 == 0 {
160                    if let Ok(ts) = Tablespace::open_with_page_size(file, ps) {
161                        let _ = wprintln!(
162                            writer,
163                            "Warning: auto-detect failed, using page size {} (file size divisible)",
164                            ps
165                        );
166                        return Ok((ts, Some(format!("fallback ({})", ps))));
167                    }
168                }
169            }
170
171            // Last resort: default 16K
172            let ts = Tablespace::open_with_page_size(file, SIZE_PAGE_DEFAULT)?;
173            let _ = wprintln!(
174                writer,
175                "Warning: using default page size {} (no size divides evenly)",
176                SIZE_PAGE_DEFAULT
177            );
178            Ok((ts, Some("default-fallback".to_string())))
179        }
180    }
181}
182
183/// Analyze a single page and return its status and recovery info.
184fn analyze_page(
185    page_data: &[u8],
186    page_num: u64,
187    page_size: u32,
188    force: bool,
189    verbose_json: bool,
190) -> PageAnalysis {
191    // Check all-zeros (empty/allocated page)
192    if page_data.iter().all(|&b| b == 0) {
193        return PageAnalysis {
194            page_number: page_num,
195            status: PageStatus::Empty,
196            page_type: PageType::Allocated,
197            checksum_valid: true,
198            lsn_valid: true,
199            lsn: 0,
200            record_count: None,
201            records: Vec::new(),
202        };
203    }
204
205    // Parse FIL header
206    let header = match FilHeader::parse(page_data) {
207        Some(h) => h,
208        None => {
209            return PageAnalysis {
210                page_number: page_num,
211                status: PageStatus::Unreadable,
212                page_type: PageType::Unknown,
213                checksum_valid: false,
214                lsn_valid: false,
215                lsn: 0,
216                record_count: None,
217                records: Vec::new(),
218            };
219        }
220    };
221
222    let csum_result = validate_checksum(page_data, page_size);
223    let lsn_valid = validate_lsn(page_data, page_size);
224    let status = if csum_result.valid && lsn_valid {
225        PageStatus::Intact
226    } else {
227        PageStatus::Corrupt
228    };
229
230    // Count records on INDEX pages
231    let (record_count, records) =
232        if header.page_type == PageType::Index && (status == PageStatus::Intact || force) {
233            let recs = walk_compact_records(page_data);
234            let count = recs.len();
235            let recovered = if verbose_json {
236                extract_records(page_data, &recs, page_size)
237            } else {
238                Vec::new()
239            };
240            (Some(count), recovered)
241        } else {
242            (None, Vec::new())
243        };
244
245    PageAnalysis {
246        page_number: page_num,
247        status,
248        page_type: header.page_type,
249        checksum_valid: csum_result.valid,
250        lsn_valid,
251        lsn: header.lsn,
252        record_count,
253        records,
254    }
255}
256
257/// Encode bytes as a lowercase hex string.
258fn to_hex(data: &[u8]) -> String {
259    let mut s = String::with_capacity(data.len() * 2);
260    for &b in data {
261        use std::fmt::Write;
262        let _ = write!(s, "{:02x}", b);
263    }
264    s
265}
266
267/// Extract raw record bytes as hex from an INDEX page.
268fn extract_records(
269    page_data: &[u8],
270    recs: &[crate::innodb::record::RecordInfo],
271    page_size: u32,
272) -> Vec<RecoveredRecord> {
273    let ps = page_size as usize;
274    let data_end = ps - SIZE_FIL_TRAILER;
275
276    recs.iter()
277        .enumerate()
278        .map(|(i, rec)| {
279            let start = rec.offset;
280            let end = if i + 1 < recs.len() {
281                // Next record's origin minus its extra header
282                recs[i + 1].offset.saturating_sub(REC_N_NEW_EXTRA_BYTES)
283            } else {
284                // Use heap top or end of data area
285                data_end
286            };
287
288            let end = end.min(data_end);
289            let data = if start < end && end <= page_data.len() {
290                &page_data[start..end]
291            } else {
292                &[]
293            };
294
295            RecoveredRecord {
296                offset: rec.offset,
297                heap_no: rec.header.heap_no,
298                delete_mark: rec.header.delete_mark,
299                data_hex: to_hex(data),
300            }
301        })
302        .collect()
303}
304
305/// Run the recovery analysis and output results.
306pub fn execute(opts: &RecoverOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
307    let (mut ts, page_size_source) = open_tablespace(&opts.file, opts.page_size, writer)?;
308    let page_size = ts.page_size();
309    let page_count = ts.page_count();
310    let file_size = ts.file_size();
311
312    let verbose_json = opts.verbose && opts.json;
313
314    // Determine which pages to analyze
315    let (start_page, end_page) = match opts.page {
316        Some(p) => {
317            if p >= page_count {
318                return Err(IdbError::Parse(format!(
319                    "Page {} out of range (tablespace has {} pages)",
320                    p, page_count
321                )));
322            }
323            (p, p + 1)
324        }
325        None => (0, page_count),
326    };
327    let scan_count = end_page - start_page;
328
329    // Analyze pages
330    let mut analyses = Vec::with_capacity(scan_count as usize);
331    let pb = if !opts.json && scan_count > 1 {
332        Some(create_progress_bar(scan_count, "pages"))
333    } else {
334        None
335    };
336
337    for page_num in start_page..end_page {
338        if let Some(ref pb) = pb {
339            pb.inc(1);
340        }
341
342        let page_data = match ts.read_page(page_num) {
343            Ok(data) => data,
344            Err(_) => {
345                analyses.push(PageAnalysis {
346                    page_number: page_num,
347                    status: PageStatus::Unreadable,
348                    page_type: PageType::Unknown,
349                    checksum_valid: false,
350                    lsn_valid: false,
351                    lsn: 0,
352                    record_count: None,
353                    records: Vec::new(),
354                });
355                continue;
356            }
357        };
358
359        analyses.push(analyze_page(
360            &page_data,
361            page_num,
362            page_size,
363            opts.force,
364            verbose_json,
365        ));
366    }
367
368    if let Some(pb) = pb {
369        pb.finish_and_clear();
370    }
371
372    // Compute summary
373    let mut intact = 0u64;
374    let mut corrupt = 0u64;
375    let mut empty = 0u64;
376    let mut unreadable = 0u64;
377    let mut total_records = 0u64;
378    let mut corrupt_records = 0u64;
379    let mut corrupt_page_numbers = Vec::new();
380    let mut index_pages_total = 0u64;
381    let mut index_pages_recoverable = 0u64;
382
383    for a in &analyses {
384        match a.status {
385            PageStatus::Intact => intact += 1,
386            PageStatus::Corrupt => {
387                corrupt += 1;
388                corrupt_page_numbers.push(a.page_number);
389            }
390            PageStatus::Empty => empty += 1,
391            PageStatus::Unreadable => unreadable += 1,
392        }
393
394        if a.page_type == PageType::Index {
395            index_pages_total += 1;
396            if a.status == PageStatus::Intact {
397                index_pages_recoverable += 1;
398            }
399            if let Some(count) = a.record_count {
400                if a.status == PageStatus::Intact {
401                    total_records += count as u64;
402                } else {
403                    corrupt_records += count as u64;
404                }
405            }
406        }
407    }
408
409    // If --force, corrupt INDEX pages with records are also recoverable
410    if opts.force {
411        for a in &analyses {
412            if a.page_type == PageType::Index
413                && a.status == PageStatus::Corrupt
414                && a.record_count.is_some()
415            {
416                index_pages_recoverable += 1;
417            }
418        }
419    }
420
421    let stats = RecoverStats {
422        file_size,
423        page_size,
424        page_size_source,
425        scan_count,
426        intact,
427        corrupt,
428        empty,
429        unreadable,
430        total_records,
431        corrupt_records,
432        corrupt_page_numbers,
433        index_pages_total,
434        index_pages_recoverable,
435    };
436
437    if opts.json {
438        output_json(opts, &analyses, &stats, writer)
439    } else {
440        output_text(opts, &analyses, &stats, writer)
441    }
442}
443
444fn output_text(
445    opts: &RecoverOptions,
446    analyses: &[PageAnalysis],
447    stats: &RecoverStats,
448    writer: &mut dyn Write,
449) -> Result<(), IdbError> {
450    wprintln!(writer, "Recovery Analysis: {}", opts.file)?;
451    wprintln!(
452        writer,
453        "File size: {} bytes ({} pages x {} bytes)",
454        stats.file_size,
455        stats.scan_count,
456        stats.page_size
457    )?;
458
459    let source_note = match &stats.page_size_source {
460        Some(s) => format!(" ({})", s),
461        None => " (auto-detected)".to_string(),
462    };
463    wprintln!(writer, "Page size: {}{}", stats.page_size, source_note)?;
464    wprintln!(writer)?;
465
466    // Verbose: per-page detail
467    if opts.verbose {
468        for a in analyses {
469            let status_str = match a.status {
470                PageStatus::Intact => a.status.label().to_string(),
471                PageStatus::Corrupt => format!("{}", a.status.label().red()),
472                PageStatus::Empty => a.status.label().to_string(),
473                PageStatus::Unreadable => format!("{}", a.status.label().red()),
474            };
475
476            let mut line = format!(
477                "Page {:>4}: {:<14} {:<12} LSN={}",
478                a.page_number,
479                a.page_type.name(),
480                status_str,
481                a.lsn,
482            );
483
484            if let Some(count) = a.record_count {
485                line.push_str(&format!("  records={}", count));
486            }
487
488            if a.status == PageStatus::Corrupt {
489                if !a.checksum_valid {
490                    line.push_str("  checksum mismatch");
491                }
492                if !a.lsn_valid {
493                    line.push_str("  LSN mismatch");
494                }
495            }
496
497            wprintln!(writer, "{}", line)?;
498        }
499        wprintln!(writer)?;
500    }
501
502    // Summary
503    wprintln!(writer, "Page Status Summary:")?;
504    wprintln!(writer, "  Intact:      {:>4} pages", stats.intact)?;
505    if stats.corrupt > 0 {
506        let pages_str = if stats.corrupt_page_numbers.len() <= 10 {
507            let nums: Vec<String> = stats
508                .corrupt_page_numbers
509                .iter()
510                .map(|n| n.to_string())
511                .collect();
512            format!(" (pages {})", nums.join(", "))
513        } else {
514            format!(" ({} pages)", stats.corrupt)
515        };
516        wprintln!(
517            writer,
518            "  Corrupt:     {:>4} pages{}",
519            format!("{}", stats.corrupt).red(),
520            pages_str
521        )?;
522    } else {
523        wprintln!(writer, "  Corrupt:     {:>4} pages", stats.corrupt)?;
524    }
525    wprintln!(writer, "  Empty:       {:>4} pages", stats.empty)?;
526    if stats.unreadable > 0 {
527        wprintln!(
528            writer,
529            "  Unreadable:  {:>4} pages",
530            format!("{}", stats.unreadable).red()
531        )?;
532    } else {
533        wprintln!(writer, "  Unreadable:  {:>4} pages", stats.unreadable)?;
534    }
535    wprintln!(writer, "  Total:       {:>4} pages", stats.scan_count)?;
536    wprintln!(writer)?;
537
538    if stats.index_pages_total > 0 {
539        wprintln!(
540            writer,
541            "Recoverable INDEX Pages: {} of {}",
542            stats.index_pages_recoverable,
543            stats.index_pages_total
544        )?;
545        wprintln!(writer, "  Total user records: {}", stats.total_records)?;
546        if stats.corrupt_records > 0 && !opts.force {
547            wprintln!(
548                writer,
549                "  Records on corrupt pages: {} (use --force to include)",
550                stats.corrupt_records
551            )?;
552        } else if stats.corrupt_records > 0 {
553            wprintln!(
554                writer,
555                "  Records on corrupt pages: {} (included with --force)",
556                stats.corrupt_records
557            )?;
558        }
559        wprintln!(writer)?;
560    }
561
562    let total_non_empty = stats.intact + stats.corrupt + stats.unreadable;
563    if total_non_empty > 0 {
564        let pct = (stats.intact as f64 / total_non_empty as f64) * 100.0;
565        wprintln!(writer, "Overall: {:.1}% of pages intact", pct)?;
566    }
567
568    Ok(())
569}
570
571fn output_json(
572    opts: &RecoverOptions,
573    analyses: &[PageAnalysis],
574    stats: &RecoverStats,
575    writer: &mut dyn Write,
576) -> Result<(), IdbError> {
577    let all_records = stats.total_records + if opts.force { stats.corrupt_records } else { 0 };
578
579    let pages: Vec<PageRecoveryInfo> = if opts.verbose {
580        analyses
581            .iter()
582            .map(|a| PageRecoveryInfo {
583                page_number: a.page_number,
584                status: a.status,
585                page_type: a.page_type.name().to_string(),
586                checksum_valid: a.checksum_valid,
587                lsn_valid: a.lsn_valid,
588                lsn: a.lsn,
589                record_count: a.record_count,
590                records: a
591                    .records
592                    .iter()
593                    .map(|r| RecoveredRecord {
594                        offset: r.offset,
595                        heap_no: r.heap_no,
596                        delete_mark: r.delete_mark,
597                        data_hex: r.data_hex.clone(),
598                    })
599                    .collect(),
600            })
601            .collect()
602    } else {
603        Vec::new()
604    };
605
606    let force_recs = if stats.corrupt_records > 0 && !opts.force {
607        Some(stats.corrupt_records)
608    } else {
609        None
610    };
611
612    let report = RecoverReport {
613        file: opts.file.clone(),
614        file_size: stats.file_size,
615        page_size: stats.page_size,
616        page_size_source: stats.page_size_source.clone(),
617        total_pages: stats.scan_count,
618        summary: RecoverSummary {
619            intact: stats.intact,
620            corrupt: stats.corrupt,
621            empty: stats.empty,
622            unreadable: stats.unreadable,
623        },
624        recoverable_records: all_records,
625        force_recoverable_records: force_recs,
626        pages,
627    };
628
629    let json = serde_json::to_string_pretty(&report)
630        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
631    wprintln!(writer, "{}", json)?;
632
633    Ok(())
634}
635
636#[cfg(test)]
637mod tests {
638    use super::*;
639
640    #[test]
641    fn test_page_status_label() {
642        assert_eq!(PageStatus::Intact.label(), "intact");
643        assert_eq!(PageStatus::Corrupt.label(), "CORRUPT");
644        assert_eq!(PageStatus::Empty.label(), "empty");
645        assert_eq!(PageStatus::Unreadable.label(), "UNREADABLE");
646    }
647
648    #[test]
649    fn test_analyze_empty_page() {
650        let page = vec![0u8; 16384];
651        let result = analyze_page(&page, 0, 16384, false, false);
652        assert_eq!(result.status, PageStatus::Empty);
653        assert_eq!(result.page_type, PageType::Allocated);
654    }
655
656    #[test]
657    fn test_analyze_short_page_is_unreadable() {
658        let page = vec![0xFF; 10];
659        let result = analyze_page(&page, 0, 16384, false, false);
660        assert_eq!(result.status, PageStatus::Unreadable);
661    }
662
663    #[test]
664    fn test_analyze_valid_index_page() {
665        use byteorder::{BigEndian, ByteOrder};
666
667        let mut page = vec![0u8; 16384];
668        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
669        BigEndian::write_u32(&mut page[FIL_PAGE_PREV..], FIL_NULL);
670        BigEndian::write_u32(&mut page[FIL_PAGE_NEXT..], FIL_NULL);
671        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
672        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855); // INDEX
673        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
674
675        // Trailer
676        let trailer = 16384 - SIZE_FIL_TRAILER;
677        BigEndian::write_u32(&mut page[trailer + 4..], (5000u64 & 0xFFFFFFFF) as u32);
678
679        // CRC-32C checksum
680        let end = 16384 - SIZE_FIL_TRAILER;
681        let crc1 = crc32c::crc32c(&page[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
682        let crc2 = crc32c::crc32c(&page[FIL_PAGE_DATA..end]);
683        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc1 ^ crc2);
684
685        let result = analyze_page(&page, 1, 16384, false, false);
686        assert_eq!(result.status, PageStatus::Intact);
687        assert_eq!(result.page_type, PageType::Index);
688        assert!(result.record_count.is_some());
689    }
690
691    #[test]
692    fn test_analyze_corrupt_page() {
693        use byteorder::{BigEndian, ByteOrder};
694
695        let mut page = vec![0u8; 16384];
696        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
697        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
698        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
699        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
700        // Bad checksum — leave it as 0 while page has data
701        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
702
703        let result = analyze_page(&page, 1, 16384, false, false);
704        assert_eq!(result.status, PageStatus::Corrupt);
705        // Without --force, no record count on corrupt pages
706        assert!(result.record_count.is_none());
707    }
708
709    #[test]
710    fn test_analyze_corrupt_page_with_force() {
711        use byteorder::{BigEndian, ByteOrder};
712
713        let mut page = vec![0u8; 16384];
714        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
715        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
716        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
717        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
718        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
719
720        let result = analyze_page(&page, 1, 16384, true, false);
721        assert_eq!(result.status, PageStatus::Corrupt);
722        // With --force, records are counted even on corrupt pages
723        assert!(result.record_count.is_some());
724    }
725}