Skip to main content

idb/cli/
parse.rs

1use std::collections::HashMap;
2use std::io::Write;
3
4use byteorder::{BigEndian, ByteOrder};
5use colored::Colorize;
6
7use crate::cli::{wprintln, wprint, create_progress_bar};
8use crate::innodb::checksum;
9use crate::innodb::page::{FilHeader, FspHeader};
10use crate::innodb::page_types::PageType;
11use crate::innodb::tablespace::Tablespace;
12use crate::util::hex::format_offset;
13use crate::IdbError;
14
15/// Options for the parse subcommand.
16pub struct ParseOptions {
17    pub file: String,
18    pub page: Option<u64>,
19    pub verbose: bool,
20    pub no_empty: bool,
21    pub page_size: Option<u32>,
22    pub json: bool,
23}
24
25/// JSON-serializable page info.
26#[derive(serde::Serialize)]
27struct PageJson {
28    page_number: u64,
29    header: FilHeader,
30    page_type_name: String,
31    page_type_description: String,
32    byte_start: u64,
33    byte_end: u64,
34    #[serde(skip_serializing_if = "Option::is_none")]
35    fsp_header: Option<crate::innodb::page::FspHeader>,
36}
37
38/// Parse an InnoDB tablespace file and display page headers with a type summary.
39///
40/// Opens the tablespace, auto-detects (or uses the overridden) page size, then
41/// iterates over every page reading the 38-byte FIL header to extract the
42/// checksum, page number, prev/next page pointers, LSN, page type, and space ID.
43/// Page 0 additionally displays the FSP header (space ID, tablespace size,
44/// free-page limit, and flags).
45///
46/// In **single-page mode** (`-p N`), only the specified page is printed with
47/// its full FIL header and trailer. In **full-file mode** (the default), all
48/// pages are listed and a frequency summary table is appended showing how many
49/// pages of each type exist. Pages with zero checksum and type `Allocated` are
50/// skipped by default unless `--verbose` is set; `--no-empty` additionally
51/// filters these from `--json` output.
52///
53/// With `--verbose`, each page also shows checksum validation status (algorithm,
54/// stored vs. calculated values) and LSN consistency between header and trailer.
55pub fn execute(opts: &ParseOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
56    let mut ts = match opts.page_size {
57        Some(ps) => Tablespace::open_with_page_size(&opts.file, ps)?,
58        None => Tablespace::open(&opts.file)?,
59    };
60
61    let page_size = ts.page_size();
62
63    if opts.json {
64        return execute_json(opts, &mut ts, page_size, writer);
65    }
66
67    if let Some(page_num) = opts.page {
68        // Single page mode
69        let page_data = ts.read_page(page_num)?;
70        print_page_info(writer, &page_data, page_num, page_size, opts.verbose)?;
71    } else {
72        // All pages mode
73        // Print FSP header first
74        let page0 = ts.read_page(0)?;
75        if let Some(fsp) = FspHeader::parse(&page0) {
76            print_fsp_header(writer, &fsp)?;
77            wprintln!(writer)?;
78        }
79
80        wprintln!(
81            writer,
82            "Pages in {} ({} pages, page size {}):",
83            opts.file,
84            ts.page_count(),
85            page_size
86        )?;
87        wprintln!(writer, "{}", "-".repeat(50))?;
88
89        let mut type_counts: HashMap<PageType, u64> = HashMap::new();
90
91        let pb = create_progress_bar(ts.page_count(), "pages");
92
93        for page_num in 0..ts.page_count() {
94            pb.inc(1);
95            let page_data = ts.read_page(page_num)?;
96            let header = match FilHeader::parse(&page_data) {
97                Some(h) => h,
98                None => continue,
99            };
100
101            *type_counts.entry(header.page_type).or_insert(0) += 1;
102
103            // Skip empty pages if --no-empty
104            if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
105                continue;
106            }
107
108            // Skip pages with zero checksum unless they are page 0
109            if header.checksum == 0 && page_num != 0 && !opts.verbose {
110                continue;
111            }
112
113            print_page_info(writer, &page_data, page_num, page_size, opts.verbose)?;
114        }
115
116        pb.finish_and_clear();
117
118        // Print page type summary
119        wprintln!(writer)?;
120        wprintln!(writer, "{}", "Page Type Summary".bold())?;
121        let mut sorted_types: Vec<_> = type_counts.iter().collect();
122        sorted_types.sort_by(|a, b| b.1.cmp(a.1));
123        for (pt, count) in sorted_types {
124            let label = if *count == 1 { "page" } else { "pages" };
125            wprintln!(writer, "  {:20} {:>6} {}", pt.name(), count, label)?;
126        }
127    }
128
129    Ok(())
130}
131
132/// Execute parse in JSON output mode.
133fn execute_json(
134    opts: &ParseOptions,
135    ts: &mut Tablespace,
136    page_size: u32,
137    writer: &mut dyn Write,
138) -> Result<(), IdbError> {
139    let mut pages = Vec::new();
140
141    let range: Box<dyn Iterator<Item = u64>> = if let Some(p) = opts.page {
142        Box::new(std::iter::once(p))
143    } else {
144        Box::new(0..ts.page_count())
145    };
146
147    for page_num in range {
148        let page_data = ts.read_page(page_num)?;
149        let header = match FilHeader::parse(&page_data) {
150            Some(h) => h,
151            None => continue,
152        };
153
154        if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
155            continue;
156        }
157
158        let pt = header.page_type;
159        let byte_start = page_num * page_size as u64;
160
161        let fsp_header = if page_num == 0 {
162            FspHeader::parse(&page_data)
163        } else {
164            None
165        };
166
167        pages.push(PageJson {
168            page_number: page_num,
169            page_type_name: pt.name().to_string(),
170            page_type_description: pt.description().to_string(),
171            byte_start,
172            byte_end: byte_start + page_size as u64,
173            header,
174            fsp_header,
175        });
176    }
177
178    let json = serde_json::to_string_pretty(&pages)
179        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
180    wprintln!(writer, "{}", json)?;
181    Ok(())
182}
183
184/// Print detailed information about a single page.
185fn print_page_info(writer: &mut dyn Write, page_data: &[u8], page_num: u64, page_size: u32, verbose: bool) -> Result<(), IdbError> {
186    let header = match FilHeader::parse(page_data) {
187        Some(h) => h,
188        None => {
189            eprintln!("Could not parse FIL header for page {}", page_num);
190            return Ok(());
191        }
192    };
193
194    let byte_start = page_num * page_size as u64;
195    let byte_end = byte_start + page_size as u64;
196
197    let pt = header.page_type;
198
199    wprintln!(writer, "Page: {}", header.page_number)?;
200    wprintln!(writer, "{}", "-".repeat(20))?;
201    wprintln!(writer, "{}", "HEADER".bold())?;
202    wprintln!(writer, "Byte Start: {}", format_offset(byte_start))?;
203    wprintln!(
204        writer,
205        "Page Type: {}\n-- {}: {} - {}",
206        pt.as_u16(),
207        pt.name(),
208        pt.description(),
209        pt.usage()
210    )?;
211
212    if verbose {
213        wprintln!(writer, "PAGE_N_HEAP (Amount of records in page): {}", read_page_n_heap(page_data))?;
214    }
215
216    wprint!(writer, "Prev Page: ")?;
217    if !header.has_prev() {
218        wprintln!(writer, "Not used.")?;
219    } else {
220        wprintln!(writer, "{}", header.prev_page)?;
221    }
222
223    wprint!(writer, "Next Page: ")?;
224    if !header.has_next() {
225        wprintln!(writer, "Not used.")?;
226    } else {
227        wprintln!(writer, "{}", header.next_page)?;
228    }
229
230    wprintln!(writer, "LSN: {}", header.lsn)?;
231    wprintln!(writer, "Space ID: {}", header.space_id)?;
232    wprintln!(writer, "Checksum: {}", header.checksum)?;
233
234    // Checksum validation
235    let csum_result = checksum::validate_checksum(page_data, page_size);
236    if verbose {
237        let status = if csum_result.valid {
238            "OK".green().to_string()
239        } else {
240            "MISMATCH".red().to_string()
241        };
242        wprintln!(
243            writer,
244            "Checksum Status: {} ({:?}, stored={}, calculated={})",
245            status, csum_result.algorithm, csum_result.stored_checksum, csum_result.calculated_checksum
246        )?;
247    }
248
249    wprintln!(writer)?;
250
251    // Trailer
252    let ps = page_size as usize;
253    if page_data.len() >= ps {
254        let trailer_offset = ps - 8;
255        if let Some(trailer) = crate::innodb::page::FilTrailer::parse(&page_data[trailer_offset..]) {
256            wprintln!(writer, "{}", "TRAILER".bold())?;
257            wprintln!(writer, "Old-style Checksum: {}", trailer.checksum)?;
258            wprintln!(writer, "Low 32 bits of LSN: {}", trailer.lsn_low32)?;
259            wprintln!(writer, "Byte End: {}", format_offset(byte_end))?;
260
261            // LSN validation
262            if verbose {
263                let lsn_valid = checksum::validate_lsn(page_data, page_size);
264                let lsn_status = if lsn_valid {
265                    "OK".green().to_string()
266                } else {
267                    "MISMATCH".red().to_string()
268                };
269                wprintln!(writer, "LSN Consistency: {}", lsn_status)?;
270            }
271        }
272    }
273    wprintln!(writer, "{}", "-".repeat(20))?;
274    Ok(())
275}
276
277/// Print FSP header information.
278fn print_fsp_header(writer: &mut dyn Write, fsp: &FspHeader) -> Result<(), IdbError> {
279    wprintln!(writer, "{}", "-".repeat(20))?;
280    wprintln!(writer, "{}", "FSP_HDR - Filespace Header".bold())?;
281    wprintln!(writer, "{}", "-".repeat(20))?;
282    wprintln!(writer, "Space ID: {}", fsp.space_id)?;
283    wprintln!(writer, "Size (pages): {}", fsp.size)?;
284    wprintln!(writer, "Page Free Limit: {}", fsp.free_limit)?;
285    wprintln!(writer, "Flags: {}", fsp.flags)?;
286    Ok(())
287}
288
289/// Read PAGE_N_HEAP from the page header (INDEX page specific).
290fn read_page_n_heap(page_data: &[u8]) -> u16 {
291    let offset = crate::innodb::constants::FIL_PAGE_DATA + 4; // PAGE_N_HEAP is at FIL_PAGE_DATA + 4
292    if page_data.len() < offset + 2 {
293        return 0;
294    }
295    BigEndian::read_u16(&page_data[offset..])
296}