Skip to main content

idb/cli/
recover.rs

1use std::io::Write;
2
3use colored::Colorize;
4use serde::Serialize;
5
6use crate::cli::{create_progress_bar, wprintln};
7use crate::innodb::checksum::{validate_checksum, validate_lsn};
8use crate::innodb::constants::*;
9use crate::innodb::page::FilHeader;
10use crate::innodb::page_types::PageType;
11use crate::innodb::record::walk_compact_records;
12use crate::innodb::tablespace::Tablespace;
13use crate::IdbError;
14
15/// Options for the `inno recover` subcommand.
16pub struct RecoverOptions {
17    /// Path to the InnoDB tablespace file (.ibd).
18    pub file: String,
19    /// Analyze a single page instead of full scan.
20    pub page: Option<u64>,
21    /// Show per-page details.
22    pub verbose: bool,
23    /// Emit output as JSON.
24    pub json: bool,
25    /// Extract records from corrupt pages with valid headers.
26    pub force: bool,
27    /// Override the auto-detected page size.
28    pub page_size: Option<u32>,
29}
30
31/// Page integrity status.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
33#[serde(rename_all = "lowercase")]
34enum PageStatus {
35    Intact,
36    Corrupt,
37    Empty,
38    Unreadable,
39}
40
41impl PageStatus {
42    fn label(self) -> &'static str {
43        match self {
44            PageStatus::Intact => "intact",
45            PageStatus::Corrupt => "CORRUPT",
46            PageStatus::Empty => "empty",
47            PageStatus::Unreadable => "UNREADABLE",
48        }
49    }
50}
51
52/// Top-level JSON output for the recovery report.
53#[derive(Serialize)]
54struct RecoverReport {
55    file: String,
56    file_size: u64,
57    page_size: u32,
58    #[serde(skip_serializing_if = "Option::is_none")]
59    page_size_source: Option<String>,
60    total_pages: u64,
61    summary: RecoverSummary,
62    recoverable_records: u64,
63    #[serde(skip_serializing_if = "Option::is_none")]
64    force_recoverable_records: Option<u64>,
65    #[serde(skip_serializing_if = "Vec::is_empty")]
66    pages: Vec<PageRecoveryInfo>,
67}
68
69/// Status counts by category.
70#[derive(Serialize)]
71struct RecoverSummary {
72    intact: u64,
73    corrupt: u64,
74    empty: u64,
75    unreadable: u64,
76}
77
78/// Per-page recovery info for JSON output.
79#[derive(Serialize)]
80struct PageRecoveryInfo {
81    page_number: u64,
82    status: PageStatus,
83    page_type: String,
84    checksum_valid: bool,
85    lsn_valid: bool,
86    lsn: u64,
87    #[serde(skip_serializing_if = "Option::is_none")]
88    record_count: Option<usize>,
89    #[serde(skip_serializing_if = "Vec::is_empty")]
90    records: Vec<RecoveredRecord>,
91}
92
93/// A single recovered record for verbose JSON output.
94#[derive(Serialize)]
95struct RecoveredRecord {
96    offset: usize,
97    heap_no: u16,
98    delete_mark: bool,
99    data_hex: String,
100}
101
102/// Internal per-page analysis result.
103struct PageAnalysis {
104    page_number: u64,
105    status: PageStatus,
106    page_type: PageType,
107    checksum_valid: bool,
108    lsn_valid: bool,
109    lsn: u64,
110    record_count: Option<usize>,
111    records: Vec<RecoveredRecord>,
112}
113
114/// Try to open the tablespace, with smart page size fallback when page 0 is damaged.
115fn open_tablespace(
116    file: &str,
117    page_size_override: Option<u32>,
118    writer: &mut dyn Write,
119) -> Result<(Tablespace, Option<String>), IdbError> {
120    if let Some(ps) = page_size_override {
121        let ts = Tablespace::open_with_page_size(file, ps)?;
122        return Ok((ts, Some("user-specified".to_string())));
123    }
124
125    match Tablespace::open(file) {
126        Ok(ts) => Ok((ts, None)),
127        Err(_) => {
128            // Page 0 may be corrupt — try common page sizes
129            let candidates = [
130                SIZE_PAGE_16K,
131                SIZE_PAGE_8K,
132                SIZE_PAGE_4K,
133                SIZE_PAGE_32K,
134                SIZE_PAGE_64K,
135            ];
136
137            let file_size = std::fs::metadata(file)
138                .map_err(|e| IdbError::Io(format!("Cannot stat {}: {}", file, e)))?
139                .len();
140
141            for &ps in &candidates {
142                if file_size >= ps as u64 && file_size % ps as u64 == 0 {
143                    if let Ok(ts) = Tablespace::open_with_page_size(file, ps) {
144                        let _ = wprintln!(
145                            writer,
146                            "Warning: auto-detect failed, using page size {} (file size divisible)",
147                            ps
148                        );
149                        return Ok((ts, Some(format!("fallback ({})", ps))));
150                    }
151                }
152            }
153
154            // Last resort: default 16K
155            let ts = Tablespace::open_with_page_size(file, SIZE_PAGE_DEFAULT)?;
156            let _ = wprintln!(
157                writer,
158                "Warning: using default page size {} (no size divides evenly)",
159                SIZE_PAGE_DEFAULT
160            );
161            Ok((ts, Some("default-fallback".to_string())))
162        }
163    }
164}
165
166/// Analyze a single page and return its status and recovery info.
167fn analyze_page(
168    page_data: &[u8],
169    page_num: u64,
170    page_size: u32,
171    force: bool,
172    verbose_json: bool,
173) -> PageAnalysis {
174    // Check all-zeros (empty/allocated page)
175    if page_data.iter().all(|&b| b == 0) {
176        return PageAnalysis {
177            page_number: page_num,
178            status: PageStatus::Empty,
179            page_type: PageType::Allocated,
180            checksum_valid: true,
181            lsn_valid: true,
182            lsn: 0,
183            record_count: None,
184            records: Vec::new(),
185        };
186    }
187
188    // Parse FIL header
189    let header = match FilHeader::parse(page_data) {
190        Some(h) => h,
191        None => {
192            return PageAnalysis {
193                page_number: page_num,
194                status: PageStatus::Unreadable,
195                page_type: PageType::Unknown,
196                checksum_valid: false,
197                lsn_valid: false,
198                lsn: 0,
199                record_count: None,
200                records: Vec::new(),
201            };
202        }
203    };
204
205    let csum_result = validate_checksum(page_data, page_size);
206    let lsn_valid = validate_lsn(page_data, page_size);
207    let status = if csum_result.valid && lsn_valid {
208        PageStatus::Intact
209    } else {
210        PageStatus::Corrupt
211    };
212
213    // Count records on INDEX pages
214    let (record_count, records) = if header.page_type == PageType::Index
215        && (status == PageStatus::Intact || force)
216    {
217        let recs = walk_compact_records(page_data);
218        let count = recs.len();
219        let recovered = if verbose_json {
220            extract_records(page_data, &recs, page_size)
221        } else {
222            Vec::new()
223        };
224        (Some(count), recovered)
225    } else {
226        (None, Vec::new())
227    };
228
229    PageAnalysis {
230        page_number: page_num,
231        status,
232        page_type: header.page_type,
233        checksum_valid: csum_result.valid,
234        lsn_valid,
235        lsn: header.lsn,
236        record_count,
237        records,
238    }
239}
240
241/// Encode bytes as a lowercase hex string.
242fn to_hex(data: &[u8]) -> String {
243    let mut s = String::with_capacity(data.len() * 2);
244    for &b in data {
245        use std::fmt::Write;
246        let _ = write!(s, "{:02x}", b);
247    }
248    s
249}
250
251/// Extract raw record bytes as hex from an INDEX page.
252fn extract_records(
253    page_data: &[u8],
254    recs: &[crate::innodb::record::RecordInfo],
255    page_size: u32,
256) -> Vec<RecoveredRecord> {
257    let ps = page_size as usize;
258    let data_end = ps - SIZE_FIL_TRAILER;
259
260    recs.iter()
261        .enumerate()
262        .map(|(i, rec)| {
263            let start = rec.offset;
264            let end = if i + 1 < recs.len() {
265                // Next record's origin minus its extra header
266                recs[i + 1].offset.saturating_sub(REC_N_NEW_EXTRA_BYTES)
267            } else {
268                // Use heap top or end of data area
269                data_end
270            };
271
272            let end = end.min(data_end);
273            let data = if start < end && end <= page_data.len() {
274                &page_data[start..end]
275            } else {
276                &[]
277            };
278
279            RecoveredRecord {
280                offset: rec.offset,
281                heap_no: rec.header.heap_no,
282                delete_mark: rec.header.delete_mark,
283                data_hex: to_hex(data),
284            }
285        })
286        .collect()
287}
288
289/// Run the recovery analysis and output results.
290pub fn execute(opts: &RecoverOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
291    let (mut ts, page_size_source) = open_tablespace(&opts.file, opts.page_size, writer)?;
292    let page_size = ts.page_size();
293    let page_count = ts.page_count();
294    let file_size = ts.file_size();
295
296    let verbose_json = opts.verbose && opts.json;
297
298    // Determine which pages to analyze
299    let (start_page, end_page) = match opts.page {
300        Some(p) => {
301            if p >= page_count {
302                return Err(IdbError::Parse(format!(
303                    "Page {} out of range (tablespace has {} pages)",
304                    p, page_count
305                )));
306            }
307            (p, p + 1)
308        }
309        None => (0, page_count),
310    };
311    let scan_count = end_page - start_page;
312
313    // Analyze pages
314    let mut analyses = Vec::with_capacity(scan_count as usize);
315    let pb = if !opts.json && scan_count > 1 {
316        Some(create_progress_bar(scan_count, "pages"))
317    } else {
318        None
319    };
320
321    for page_num in start_page..end_page {
322        if let Some(ref pb) = pb {
323            pb.inc(1);
324        }
325
326        let page_data = match ts.read_page(page_num) {
327            Ok(data) => data,
328            Err(_) => {
329                analyses.push(PageAnalysis {
330                    page_number: page_num,
331                    status: PageStatus::Unreadable,
332                    page_type: PageType::Unknown,
333                    checksum_valid: false,
334                    lsn_valid: false,
335                    lsn: 0,
336                    record_count: None,
337                    records: Vec::new(),
338                });
339                continue;
340            }
341        };
342
343        analyses.push(analyze_page(
344            &page_data,
345            page_num,
346            page_size,
347            opts.force,
348            verbose_json,
349        ));
350    }
351
352    if let Some(pb) = pb {
353        pb.finish_and_clear();
354    }
355
356    // Compute summary
357    let mut intact = 0u64;
358    let mut corrupt = 0u64;
359    let mut empty = 0u64;
360    let mut unreadable = 0u64;
361    let mut total_records = 0u64;
362    let mut corrupt_records = 0u64;
363    let mut corrupt_page_numbers = Vec::new();
364    let mut index_pages_total = 0u64;
365    let mut index_pages_recoverable = 0u64;
366
367    for a in &analyses {
368        match a.status {
369            PageStatus::Intact => intact += 1,
370            PageStatus::Corrupt => {
371                corrupt += 1;
372                corrupt_page_numbers.push(a.page_number);
373            }
374            PageStatus::Empty => empty += 1,
375            PageStatus::Unreadable => unreadable += 1,
376        }
377
378        if a.page_type == PageType::Index {
379            index_pages_total += 1;
380            if a.status == PageStatus::Intact {
381                index_pages_recoverable += 1;
382            }
383            if let Some(count) = a.record_count {
384                if a.status == PageStatus::Intact {
385                    total_records += count as u64;
386                } else {
387                    corrupt_records += count as u64;
388                }
389            }
390        }
391    }
392
393    // If --force, corrupt INDEX pages with records are also recoverable
394    if opts.force {
395        for a in &analyses {
396            if a.page_type == PageType::Index
397                && a.status == PageStatus::Corrupt
398                && a.record_count.is_some()
399            {
400                index_pages_recoverable += 1;
401            }
402        }
403    }
404
405    if opts.json {
406        output_json(
407            opts,
408            &analyses,
409            file_size,
410            page_size,
411            page_size_source,
412            scan_count,
413            intact,
414            corrupt,
415            empty,
416            unreadable,
417            total_records,
418            corrupt_records,
419            writer,
420        )
421    } else {
422        output_text(
423            opts,
424            &analyses,
425            file_size,
426            page_size,
427            page_size_source,
428            scan_count,
429            intact,
430            corrupt,
431            empty,
432            unreadable,
433            total_records,
434            corrupt_records,
435            &corrupt_page_numbers,
436            index_pages_total,
437            index_pages_recoverable,
438            writer,
439        )
440    }
441}
442
443#[allow(clippy::too_many_arguments)]
444fn output_text(
445    opts: &RecoverOptions,
446    analyses: &[PageAnalysis],
447    file_size: u64,
448    page_size: u32,
449    page_size_source: Option<String>,
450    scan_count: u64,
451    intact: u64,
452    corrupt: u64,
453    empty: u64,
454    unreadable: u64,
455    total_records: u64,
456    corrupt_records: u64,
457    corrupt_page_numbers: &[u64],
458    index_pages_total: u64,
459    index_pages_recoverable: u64,
460    writer: &mut dyn Write,
461) -> Result<(), IdbError> {
462    wprintln!(writer, "Recovery Analysis: {}", opts.file)?;
463    wprintln!(
464        writer,
465        "File size: {} bytes ({} pages x {} bytes)",
466        file_size, scan_count, page_size
467    )?;
468
469    let source_note = match &page_size_source {
470        Some(s) => format!(" ({})", s),
471        None => " (auto-detected)".to_string(),
472    };
473    wprintln!(writer, "Page size: {}{}", page_size, source_note)?;
474    wprintln!(writer)?;
475
476    // Verbose: per-page detail
477    if opts.verbose {
478        for a in analyses {
479            let status_str = match a.status {
480                PageStatus::Intact => a.status.label().to_string(),
481                PageStatus::Corrupt => format!("{}", a.status.label().red()),
482                PageStatus::Empty => a.status.label().to_string(),
483                PageStatus::Unreadable => format!("{}", a.status.label().red()),
484            };
485
486            let mut line = format!(
487                "Page {:>4}: {:<14} {:<12} LSN={}",
488                a.page_number,
489                a.page_type.name(),
490                status_str,
491                a.lsn,
492            );
493
494            if let Some(count) = a.record_count {
495                line.push_str(&format!("  records={}", count));
496            }
497
498            if a.status == PageStatus::Corrupt {
499                if !a.checksum_valid {
500                    line.push_str("  checksum mismatch");
501                }
502                if !a.lsn_valid {
503                    line.push_str("  LSN mismatch");
504                }
505            }
506
507            wprintln!(writer, "{}", line)?;
508        }
509        wprintln!(writer)?;
510    }
511
512    // Summary
513    wprintln!(writer, "Page Status Summary:")?;
514    wprintln!(writer, "  Intact:      {:>4} pages", intact)?;
515    if corrupt > 0 {
516        let pages_str = if corrupt_page_numbers.len() <= 10 {
517            let nums: Vec<String> = corrupt_page_numbers.iter().map(|n| n.to_string()).collect();
518            format!(" (pages {})", nums.join(", "))
519        } else {
520            format!(" ({} pages)", corrupt)
521        };
522        wprintln!(
523            writer,
524            "  Corrupt:     {:>4} pages{}",
525            format!("{}", corrupt).red(),
526            pages_str
527        )?;
528    } else {
529        wprintln!(writer, "  Corrupt:     {:>4} pages", corrupt)?;
530    }
531    wprintln!(writer, "  Empty:       {:>4} pages", empty)?;
532    if unreadable > 0 {
533        wprintln!(
534            writer,
535            "  Unreadable:  {:>4} pages",
536            format!("{}", unreadable).red()
537        )?;
538    } else {
539        wprintln!(writer, "  Unreadable:  {:>4} pages", unreadable)?;
540    }
541    wprintln!(writer, "  Total:       {:>4} pages", scan_count)?;
542    wprintln!(writer)?;
543
544    if index_pages_total > 0 {
545        wprintln!(
546            writer,
547            "Recoverable INDEX Pages: {} of {}",
548            index_pages_recoverable, index_pages_total
549        )?;
550        wprintln!(writer, "  Total user records: {}", total_records)?;
551        if corrupt_records > 0 && !opts.force {
552            wprintln!(
553                writer,
554                "  Records on corrupt pages: {} (use --force to include)",
555                corrupt_records
556            )?;
557        } else if corrupt_records > 0 {
558            wprintln!(
559                writer,
560                "  Records on corrupt pages: {} (included with --force)",
561                corrupt_records
562            )?;
563        }
564        wprintln!(writer)?;
565    }
566
567    let total_non_empty = intact + corrupt + unreadable;
568    if total_non_empty > 0 {
569        let pct = (intact as f64 / total_non_empty as f64) * 100.0;
570        wprintln!(writer, "Overall: {:.1}% of pages intact", pct)?;
571    }
572
573    Ok(())
574}
575
576#[allow(clippy::too_many_arguments)]
577fn output_json(
578    opts: &RecoverOptions,
579    analyses: &[PageAnalysis],
580    file_size: u64,
581    page_size: u32,
582    page_size_source: Option<String>,
583    scan_count: u64,
584    intact: u64,
585    corrupt: u64,
586    empty: u64,
587    unreadable: u64,
588    total_records: u64,
589    corrupt_records: u64,
590    writer: &mut dyn Write,
591) -> Result<(), IdbError> {
592    let all_records = total_records + if opts.force { corrupt_records } else { 0 };
593
594    let pages: Vec<PageRecoveryInfo> = if opts.verbose {
595        analyses
596            .iter()
597            .map(|a| PageRecoveryInfo {
598                page_number: a.page_number,
599                status: a.status,
600                page_type: a.page_type.name().to_string(),
601                checksum_valid: a.checksum_valid,
602                lsn_valid: a.lsn_valid,
603                lsn: a.lsn,
604                record_count: a.record_count,
605                records: a
606                    .records
607                    .iter()
608                    .map(|r| RecoveredRecord {
609                        offset: r.offset,
610                        heap_no: r.heap_no,
611                        delete_mark: r.delete_mark,
612                        data_hex: r.data_hex.clone(),
613                    })
614                    .collect(),
615            })
616            .collect()
617    } else {
618        Vec::new()
619    };
620
621    let force_recs = if corrupt_records > 0 && !opts.force {
622        Some(corrupt_records)
623    } else {
624        None
625    };
626
627    let report = RecoverReport {
628        file: opts.file.clone(),
629        file_size,
630        page_size,
631        page_size_source,
632        total_pages: scan_count,
633        summary: RecoverSummary {
634            intact,
635            corrupt,
636            empty,
637            unreadable,
638        },
639        recoverable_records: all_records,
640        force_recoverable_records: force_recs,
641        pages,
642    };
643
644    let json = serde_json::to_string_pretty(&report)
645        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
646    wprintln!(writer, "{}", json)?;
647
648    Ok(())
649}
650
651#[cfg(test)]
652mod tests {
653    use super::*;
654
655    #[test]
656    fn test_page_status_label() {
657        assert_eq!(PageStatus::Intact.label(), "intact");
658        assert_eq!(PageStatus::Corrupt.label(), "CORRUPT");
659        assert_eq!(PageStatus::Empty.label(), "empty");
660        assert_eq!(PageStatus::Unreadable.label(), "UNREADABLE");
661    }
662
663    #[test]
664    fn test_analyze_empty_page() {
665        let page = vec![0u8; 16384];
666        let result = analyze_page(&page, 0, 16384, false, false);
667        assert_eq!(result.status, PageStatus::Empty);
668        assert_eq!(result.page_type, PageType::Allocated);
669    }
670
671    #[test]
672    fn test_analyze_short_page_is_unreadable() {
673        let page = vec![0xFF; 10];
674        let result = analyze_page(&page, 0, 16384, false, false);
675        assert_eq!(result.status, PageStatus::Unreadable);
676    }
677
678    #[test]
679    fn test_analyze_valid_index_page() {
680        use byteorder::{BigEndian, ByteOrder};
681
682        let mut page = vec![0u8; 16384];
683        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
684        BigEndian::write_u32(&mut page[FIL_PAGE_PREV..], FIL_NULL);
685        BigEndian::write_u32(&mut page[FIL_PAGE_NEXT..], FIL_NULL);
686        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
687        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855); // INDEX
688        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
689
690        // Trailer
691        let trailer = 16384 - SIZE_FIL_TRAILER;
692        BigEndian::write_u32(&mut page[trailer + 4..], (5000u64 & 0xFFFFFFFF) as u32);
693
694        // CRC-32C checksum
695        let end = 16384 - SIZE_FIL_TRAILER;
696        let crc1 = crc32c::crc32c(&page[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
697        let crc2 = crc32c::crc32c(&page[FIL_PAGE_DATA..end]);
698        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc1 ^ crc2);
699
700        let result = analyze_page(&page, 1, 16384, false, false);
701        assert_eq!(result.status, PageStatus::Intact);
702        assert_eq!(result.page_type, PageType::Index);
703        assert!(result.record_count.is_some());
704    }
705
706    #[test]
707    fn test_analyze_corrupt_page() {
708        use byteorder::{BigEndian, ByteOrder};
709
710        let mut page = vec![0u8; 16384];
711        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
712        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
713        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
714        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
715        // Bad checksum — leave it as 0 while page has data
716        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
717
718        let result = analyze_page(&page, 1, 16384, false, false);
719        assert_eq!(result.status, PageStatus::Corrupt);
720        // Without --force, no record count on corrupt pages
721        assert!(result.record_count.is_none());
722    }
723
724    #[test]
725    fn test_analyze_corrupt_page_with_force() {
726        use byteorder::{BigEndian, ByteOrder};
727
728        let mut page = vec![0u8; 16384];
729        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
730        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
731        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
732        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
733        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
734
735        let result = analyze_page(&page, 1, 16384, true, false);
736        assert_eq!(result.status, PageStatus::Corrupt);
737        // With --force, records are counted even on corrupt pages
738        assert!(result.record_count.is_some());
739    }
740}