Skip to main content

idb/cli/
recover.rs

1use std::io::Write;
2
3use colored::Colorize;
4use serde::Serialize;
5
6use crate::cli::{create_progress_bar, wprintln};
7use crate::innodb::checksum::{validate_checksum, validate_lsn};
8use crate::innodb::constants::*;
9use crate::innodb::page::FilHeader;
10use crate::innodb::page_types::PageType;
11use crate::innodb::record::walk_compact_records;
12use crate::innodb::tablespace::Tablespace;
13use crate::IdbError;
14
15/// Options for the `inno recover` subcommand.
16pub struct RecoverOptions {
17    /// Path to the InnoDB tablespace file (.ibd).
18    pub file: String,
19    /// Analyze a single page instead of full scan.
20    pub page: Option<u64>,
21    /// Show per-page details.
22    pub verbose: bool,
23    /// Emit output as JSON.
24    pub json: bool,
25    /// Extract records from corrupt pages with valid headers.
26    pub force: bool,
27    /// Override the auto-detected page size.
28    pub page_size: Option<u32>,
29    /// Path to MySQL keyring file for decrypting encrypted tablespaces.
30    pub keyring: Option<String>,
31}
32
33/// Page integrity status.
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
35#[serde(rename_all = "lowercase")]
36enum PageStatus {
37    Intact,
38    Corrupt,
39    Empty,
40    Unreadable,
41}
42
43impl PageStatus {
44    fn label(self) -> &'static str {
45        match self {
46            PageStatus::Intact => "intact",
47            PageStatus::Corrupt => "CORRUPT",
48            PageStatus::Empty => "empty",
49            PageStatus::Unreadable => "UNREADABLE",
50        }
51    }
52}
53
54/// Top-level JSON output for the recovery report.
55#[derive(Serialize)]
56struct RecoverReport {
57    file: String,
58    file_size: u64,
59    page_size: u32,
60    #[serde(skip_serializing_if = "Option::is_none")]
61    page_size_source: Option<String>,
62    total_pages: u64,
63    summary: RecoverSummary,
64    recoverable_records: u64,
65    #[serde(skip_serializing_if = "Option::is_none")]
66    force_recoverable_records: Option<u64>,
67    #[serde(skip_serializing_if = "Vec::is_empty")]
68    pages: Vec<PageRecoveryInfo>,
69}
70
71/// Status counts by category.
72#[derive(Serialize)]
73struct RecoverSummary {
74    intact: u64,
75    corrupt: u64,
76    empty: u64,
77    unreadable: u64,
78}
79
80/// Per-page recovery info for JSON output.
81#[derive(Serialize)]
82struct PageRecoveryInfo {
83    page_number: u64,
84    status: PageStatus,
85    page_type: String,
86    checksum_valid: bool,
87    lsn_valid: bool,
88    lsn: u64,
89    #[serde(skip_serializing_if = "Option::is_none")]
90    record_count: Option<usize>,
91    #[serde(skip_serializing_if = "Vec::is_empty")]
92    records: Vec<RecoveredRecord>,
93}
94
95/// A single recovered record for verbose JSON output.
96#[derive(Serialize)]
97struct RecoveredRecord {
98    offset: usize,
99    heap_no: u16,
100    delete_mark: bool,
101    data_hex: String,
102}
103
104/// Computed statistics from page analysis, used by output functions.
105struct RecoverStats {
106    file_size: u64,
107    page_size: u32,
108    page_size_source: Option<String>,
109    scan_count: u64,
110    intact: u64,
111    corrupt: u64,
112    empty: u64,
113    unreadable: u64,
114    total_records: u64,
115    corrupt_records: u64,
116    corrupt_page_numbers: Vec<u64>,
117    index_pages_total: u64,
118    index_pages_recoverable: u64,
119}
120
121/// Internal per-page analysis result.
122struct PageAnalysis {
123    page_number: u64,
124    status: PageStatus,
125    page_type: PageType,
126    checksum_valid: bool,
127    lsn_valid: bool,
128    lsn: u64,
129    record_count: Option<usize>,
130    records: Vec<RecoveredRecord>,
131}
132
133/// Try to open the tablespace, with smart page size fallback when page 0 is damaged.
134fn open_tablespace(
135    file: &str,
136    page_size_override: Option<u32>,
137    writer: &mut dyn Write,
138) -> Result<(Tablespace, Option<String>), IdbError> {
139    if let Some(ps) = page_size_override {
140        let ts = Tablespace::open_with_page_size(file, ps)?;
141        return Ok((ts, Some("user-specified".to_string())));
142    }
143
144    match Tablespace::open(file) {
145        Ok(ts) => Ok((ts, None)),
146        Err(_) => {
147            // Page 0 may be corrupt — try common page sizes
148            let candidates = [
149                SIZE_PAGE_16K,
150                SIZE_PAGE_8K,
151                SIZE_PAGE_4K,
152                SIZE_PAGE_32K,
153                SIZE_PAGE_64K,
154            ];
155
156            let file_size = std::fs::metadata(file)
157                .map_err(|e| IdbError::Io(format!("Cannot stat {}: {}", file, e)))?
158                .len();
159
160            for &ps in &candidates {
161                if file_size >= ps as u64 && file_size % ps as u64 == 0 {
162                    if let Ok(ts) = Tablespace::open_with_page_size(file, ps) {
163                        let _ = wprintln!(
164                            writer,
165                            "Warning: auto-detect failed, using page size {} (file size divisible)",
166                            ps
167                        );
168                        return Ok((ts, Some(format!("fallback ({})", ps))));
169                    }
170                }
171            }
172
173            // Last resort: default 16K
174            let ts = Tablespace::open_with_page_size(file, SIZE_PAGE_DEFAULT)?;
175            let _ = wprintln!(
176                writer,
177                "Warning: using default page size {} (no size divides evenly)",
178                SIZE_PAGE_DEFAULT
179            );
180            Ok((ts, Some("default-fallback".to_string())))
181        }
182    }
183}
184
185/// Analyze a single page and return its status and recovery info.
186fn analyze_page(
187    page_data: &[u8],
188    page_num: u64,
189    page_size: u32,
190    force: bool,
191    verbose_json: bool,
192) -> PageAnalysis {
193    // Check all-zeros (empty/allocated page)
194    if page_data.iter().all(|&b| b == 0) {
195        return PageAnalysis {
196            page_number: page_num,
197            status: PageStatus::Empty,
198            page_type: PageType::Allocated,
199            checksum_valid: true,
200            lsn_valid: true,
201            lsn: 0,
202            record_count: None,
203            records: Vec::new(),
204        };
205    }
206
207    // Parse FIL header
208    let header = match FilHeader::parse(page_data) {
209        Some(h) => h,
210        None => {
211            return PageAnalysis {
212                page_number: page_num,
213                status: PageStatus::Unreadable,
214                page_type: PageType::Unknown,
215                checksum_valid: false,
216                lsn_valid: false,
217                lsn: 0,
218                record_count: None,
219                records: Vec::new(),
220            };
221        }
222    };
223
224    let csum_result = validate_checksum(page_data, page_size, None);
225    let lsn_valid = validate_lsn(page_data, page_size);
226    let status = if csum_result.valid && lsn_valid {
227        PageStatus::Intact
228    } else {
229        PageStatus::Corrupt
230    };
231
232    // Count records on INDEX pages
233    let (record_count, records) =
234        if header.page_type == PageType::Index && (status == PageStatus::Intact || force) {
235            let recs = walk_compact_records(page_data);
236            let count = recs.len();
237            let recovered = if verbose_json {
238                extract_records(page_data, &recs, page_size)
239            } else {
240                Vec::new()
241            };
242            (Some(count), recovered)
243        } else {
244            (None, Vec::new())
245        };
246
247    PageAnalysis {
248        page_number: page_num,
249        status,
250        page_type: header.page_type,
251        checksum_valid: csum_result.valid,
252        lsn_valid,
253        lsn: header.lsn,
254        record_count,
255        records,
256    }
257}
258
259/// Encode bytes as a lowercase hex string.
260fn to_hex(data: &[u8]) -> String {
261    let mut s = String::with_capacity(data.len() * 2);
262    for &b in data {
263        use std::fmt::Write;
264        let _ = write!(s, "{:02x}", b);
265    }
266    s
267}
268
269/// Extract raw record bytes as hex from an INDEX page.
270fn extract_records(
271    page_data: &[u8],
272    recs: &[crate::innodb::record::RecordInfo],
273    page_size: u32,
274) -> Vec<RecoveredRecord> {
275    let ps = page_size as usize;
276    let data_end = ps - SIZE_FIL_TRAILER;
277
278    recs.iter()
279        .enumerate()
280        .map(|(i, rec)| {
281            let start = rec.offset;
282            let end = if i + 1 < recs.len() {
283                // Next record's origin minus its extra header
284                recs[i + 1].offset.saturating_sub(REC_N_NEW_EXTRA_BYTES)
285            } else {
286                // Use heap top or end of data area
287                data_end
288            };
289
290            let end = end.min(data_end);
291            let data = if start < end && end <= page_data.len() {
292                &page_data[start..end]
293            } else {
294                &[]
295            };
296
297            RecoveredRecord {
298                offset: rec.offset,
299                heap_no: rec.header.heap_no,
300                delete_mark: rec.header.delete_mark,
301                data_hex: to_hex(data),
302            }
303        })
304        .collect()
305}
306
307/// Run the recovery analysis and output results.
308pub fn execute(opts: &RecoverOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
309    let (mut ts, page_size_source) = open_tablespace(&opts.file, opts.page_size, writer)?;
310
311    if let Some(ref keyring_path) = opts.keyring {
312        crate::cli::setup_decryption(&mut ts, keyring_path)?;
313    }
314
315    let page_size = ts.page_size();
316    let page_count = ts.page_count();
317    let file_size = ts.file_size();
318
319    let verbose_json = opts.verbose && opts.json;
320
321    // Determine which pages to analyze
322    let (start_page, end_page) = match opts.page {
323        Some(p) => {
324            if p >= page_count {
325                return Err(IdbError::Parse(format!(
326                    "Page {} out of range (tablespace has {} pages)",
327                    p, page_count
328                )));
329            }
330            (p, p + 1)
331        }
332        None => (0, page_count),
333    };
334    let scan_count = end_page - start_page;
335
336    // Analyze pages
337    let mut analyses = Vec::with_capacity(scan_count as usize);
338    let pb = if !opts.json && scan_count > 1 {
339        Some(create_progress_bar(scan_count, "pages"))
340    } else {
341        None
342    };
343
344    for page_num in start_page..end_page {
345        if let Some(ref pb) = pb {
346            pb.inc(1);
347        }
348
349        let page_data = match ts.read_page(page_num) {
350            Ok(data) => data,
351            Err(_) => {
352                analyses.push(PageAnalysis {
353                    page_number: page_num,
354                    status: PageStatus::Unreadable,
355                    page_type: PageType::Unknown,
356                    checksum_valid: false,
357                    lsn_valid: false,
358                    lsn: 0,
359                    record_count: None,
360                    records: Vec::new(),
361                });
362                continue;
363            }
364        };
365
366        analyses.push(analyze_page(
367            &page_data,
368            page_num,
369            page_size,
370            opts.force,
371            verbose_json,
372        ));
373    }
374
375    if let Some(pb) = pb {
376        pb.finish_and_clear();
377    }
378
379    // Compute summary
380    let mut intact = 0u64;
381    let mut corrupt = 0u64;
382    let mut empty = 0u64;
383    let mut unreadable = 0u64;
384    let mut total_records = 0u64;
385    let mut corrupt_records = 0u64;
386    let mut corrupt_page_numbers = Vec::new();
387    let mut index_pages_total = 0u64;
388    let mut index_pages_recoverable = 0u64;
389
390    for a in &analyses {
391        match a.status {
392            PageStatus::Intact => intact += 1,
393            PageStatus::Corrupt => {
394                corrupt += 1;
395                corrupt_page_numbers.push(a.page_number);
396            }
397            PageStatus::Empty => empty += 1,
398            PageStatus::Unreadable => unreadable += 1,
399        }
400
401        if a.page_type == PageType::Index {
402            index_pages_total += 1;
403            if a.status == PageStatus::Intact {
404                index_pages_recoverable += 1;
405            }
406            if let Some(count) = a.record_count {
407                if a.status == PageStatus::Intact {
408                    total_records += count as u64;
409                } else {
410                    corrupt_records += count as u64;
411                }
412            }
413        }
414    }
415
416    // If --force, corrupt INDEX pages with records are also recoverable
417    if opts.force {
418        for a in &analyses {
419            if a.page_type == PageType::Index
420                && a.status == PageStatus::Corrupt
421                && a.record_count.is_some()
422            {
423                index_pages_recoverable += 1;
424            }
425        }
426    }
427
428    let stats = RecoverStats {
429        file_size,
430        page_size,
431        page_size_source,
432        scan_count,
433        intact,
434        corrupt,
435        empty,
436        unreadable,
437        total_records,
438        corrupt_records,
439        corrupt_page_numbers,
440        index_pages_total,
441        index_pages_recoverable,
442    };
443
444    if opts.json {
445        output_json(opts, &analyses, &stats, writer)
446    } else {
447        output_text(opts, &analyses, &stats, writer)
448    }
449}
450
451fn output_text(
452    opts: &RecoverOptions,
453    analyses: &[PageAnalysis],
454    stats: &RecoverStats,
455    writer: &mut dyn Write,
456) -> Result<(), IdbError> {
457    wprintln!(writer, "Recovery Analysis: {}", opts.file)?;
458    wprintln!(
459        writer,
460        "File size: {} bytes ({} pages x {} bytes)",
461        stats.file_size,
462        stats.scan_count,
463        stats.page_size
464    )?;
465
466    let source_note = match &stats.page_size_source {
467        Some(s) => format!(" ({})", s),
468        None => " (auto-detected)".to_string(),
469    };
470    wprintln!(writer, "Page size: {}{}", stats.page_size, source_note)?;
471    wprintln!(writer)?;
472
473    // Verbose: per-page detail
474    if opts.verbose {
475        for a in analyses {
476            let status_str = match a.status {
477                PageStatus::Intact => a.status.label().to_string(),
478                PageStatus::Corrupt => format!("{}", a.status.label().red()),
479                PageStatus::Empty => a.status.label().to_string(),
480                PageStatus::Unreadable => format!("{}", a.status.label().red()),
481            };
482
483            let mut line = format!(
484                "Page {:>4}: {:<14} {:<12} LSN={}",
485                a.page_number,
486                a.page_type.name(),
487                status_str,
488                a.lsn,
489            );
490
491            if let Some(count) = a.record_count {
492                line.push_str(&format!("  records={}", count));
493            }
494
495            if a.status == PageStatus::Corrupt {
496                if !a.checksum_valid {
497                    line.push_str("  checksum mismatch");
498                }
499                if !a.lsn_valid {
500                    line.push_str("  LSN mismatch");
501                }
502            }
503
504            wprintln!(writer, "{}", line)?;
505        }
506        wprintln!(writer)?;
507    }
508
509    // Summary
510    wprintln!(writer, "Page Status Summary:")?;
511    wprintln!(writer, "  Intact:      {:>4} pages", stats.intact)?;
512    if stats.corrupt > 0 {
513        let pages_str = if stats.corrupt_page_numbers.len() <= 10 {
514            let nums: Vec<String> = stats
515                .corrupt_page_numbers
516                .iter()
517                .map(|n| n.to_string())
518                .collect();
519            format!(" (pages {})", nums.join(", "))
520        } else {
521            format!(" ({} pages)", stats.corrupt)
522        };
523        wprintln!(
524            writer,
525            "  Corrupt:     {:>4} pages{}",
526            format!("{}", stats.corrupt).red(),
527            pages_str
528        )?;
529    } else {
530        wprintln!(writer, "  Corrupt:     {:>4} pages", stats.corrupt)?;
531    }
532    wprintln!(writer, "  Empty:       {:>4} pages", stats.empty)?;
533    if stats.unreadable > 0 {
534        wprintln!(
535            writer,
536            "  Unreadable:  {:>4} pages",
537            format!("{}", stats.unreadable).red()
538        )?;
539    } else {
540        wprintln!(writer, "  Unreadable:  {:>4} pages", stats.unreadable)?;
541    }
542    wprintln!(writer, "  Total:       {:>4} pages", stats.scan_count)?;
543    wprintln!(writer)?;
544
545    if stats.index_pages_total > 0 {
546        wprintln!(
547            writer,
548            "Recoverable INDEX Pages: {} of {}",
549            stats.index_pages_recoverable,
550            stats.index_pages_total
551        )?;
552        wprintln!(writer, "  Total user records: {}", stats.total_records)?;
553        if stats.corrupt_records > 0 && !opts.force {
554            wprintln!(
555                writer,
556                "  Records on corrupt pages: {} (use --force to include)",
557                stats.corrupt_records
558            )?;
559        } else if stats.corrupt_records > 0 {
560            wprintln!(
561                writer,
562                "  Records on corrupt pages: {} (included with --force)",
563                stats.corrupt_records
564            )?;
565        }
566        wprintln!(writer)?;
567    }
568
569    let total_non_empty = stats.intact + stats.corrupt + stats.unreadable;
570    if total_non_empty > 0 {
571        let pct = (stats.intact as f64 / total_non_empty as f64) * 100.0;
572        wprintln!(writer, "Overall: {:.1}% of pages intact", pct)?;
573    }
574
575    Ok(())
576}
577
578fn output_json(
579    opts: &RecoverOptions,
580    analyses: &[PageAnalysis],
581    stats: &RecoverStats,
582    writer: &mut dyn Write,
583) -> Result<(), IdbError> {
584    let all_records = stats.total_records + if opts.force { stats.corrupt_records } else { 0 };
585
586    let pages: Vec<PageRecoveryInfo> = if opts.verbose {
587        analyses
588            .iter()
589            .map(|a| PageRecoveryInfo {
590                page_number: a.page_number,
591                status: a.status,
592                page_type: a.page_type.name().to_string(),
593                checksum_valid: a.checksum_valid,
594                lsn_valid: a.lsn_valid,
595                lsn: a.lsn,
596                record_count: a.record_count,
597                records: a
598                    .records
599                    .iter()
600                    .map(|r| RecoveredRecord {
601                        offset: r.offset,
602                        heap_no: r.heap_no,
603                        delete_mark: r.delete_mark,
604                        data_hex: r.data_hex.clone(),
605                    })
606                    .collect(),
607            })
608            .collect()
609    } else {
610        Vec::new()
611    };
612
613    let force_recs = if stats.corrupt_records > 0 && !opts.force {
614        Some(stats.corrupt_records)
615    } else {
616        None
617    };
618
619    let report = RecoverReport {
620        file: opts.file.clone(),
621        file_size: stats.file_size,
622        page_size: stats.page_size,
623        page_size_source: stats.page_size_source.clone(),
624        total_pages: stats.scan_count,
625        summary: RecoverSummary {
626            intact: stats.intact,
627            corrupt: stats.corrupt,
628            empty: stats.empty,
629            unreadable: stats.unreadable,
630        },
631        recoverable_records: all_records,
632        force_recoverable_records: force_recs,
633        pages,
634    };
635
636    let json = serde_json::to_string_pretty(&report)
637        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
638    wprintln!(writer, "{}", json)?;
639
640    Ok(())
641}
642
643#[cfg(test)]
644mod tests {
645    use super::*;
646
647    #[test]
648    fn test_page_status_label() {
649        assert_eq!(PageStatus::Intact.label(), "intact");
650        assert_eq!(PageStatus::Corrupt.label(), "CORRUPT");
651        assert_eq!(PageStatus::Empty.label(), "empty");
652        assert_eq!(PageStatus::Unreadable.label(), "UNREADABLE");
653    }
654
655    #[test]
656    fn test_analyze_empty_page() {
657        let page = vec![0u8; 16384];
658        let result = analyze_page(&page, 0, 16384, false, false);
659        assert_eq!(result.status, PageStatus::Empty);
660        assert_eq!(result.page_type, PageType::Allocated);
661    }
662
663    #[test]
664    fn test_analyze_short_page_is_unreadable() {
665        let page = vec![0xFF; 10];
666        let result = analyze_page(&page, 0, 16384, false, false);
667        assert_eq!(result.status, PageStatus::Unreadable);
668    }
669
670    #[test]
671    fn test_analyze_valid_index_page() {
672        use byteorder::{BigEndian, ByteOrder};
673
674        let mut page = vec![0u8; 16384];
675        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
676        BigEndian::write_u32(&mut page[FIL_PAGE_PREV..], FIL_NULL);
677        BigEndian::write_u32(&mut page[FIL_PAGE_NEXT..], FIL_NULL);
678        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
679        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855); // INDEX
680        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
681
682        // Trailer
683        let trailer = 16384 - SIZE_FIL_TRAILER;
684        BigEndian::write_u32(&mut page[trailer + 4..], (5000u64 & 0xFFFFFFFF) as u32);
685
686        // CRC-32C checksum
687        let end = 16384 - SIZE_FIL_TRAILER;
688        let crc1 = crc32c::crc32c(&page[FIL_PAGE_OFFSET..FIL_PAGE_FILE_FLUSH_LSN]);
689        let crc2 = crc32c::crc32c(&page[FIL_PAGE_DATA..end]);
690        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], crc1 ^ crc2);
691
692        let result = analyze_page(&page, 1, 16384, false, false);
693        assert_eq!(result.status, PageStatus::Intact);
694        assert_eq!(result.page_type, PageType::Index);
695        assert!(result.record_count.is_some());
696    }
697
698    #[test]
699    fn test_analyze_corrupt_page() {
700        use byteorder::{BigEndian, ByteOrder};
701
702        let mut page = vec![0u8; 16384];
703        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
704        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
705        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
706        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
707        // Bad checksum — leave it as 0 while page has data
708        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
709
710        let result = analyze_page(&page, 1, 16384, false, false);
711        assert_eq!(result.status, PageStatus::Corrupt);
712        // Without --force, no record count on corrupt pages
713        assert!(result.record_count.is_none());
714    }
715
716    #[test]
717    fn test_analyze_corrupt_page_with_force() {
718        use byteorder::{BigEndian, ByteOrder};
719
720        let mut page = vec![0u8; 16384];
721        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], 1);
722        BigEndian::write_u64(&mut page[FIL_PAGE_LSN..], 5000);
723        BigEndian::write_u16(&mut page[FIL_PAGE_TYPE..], 17855);
724        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_ID..], 1);
725        BigEndian::write_u32(&mut page[FIL_PAGE_SPACE_OR_CHKSUM..], 0xDEAD);
726
727        let result = analyze_page(&page, 1, 16384, true, false);
728        assert_eq!(result.status, PageStatus::Corrupt);
729        // With --force, records are counted even on corrupt pages
730        assert!(result.record_count.is_some());
731    }
732}