Skip to main content

idb/cli/
parse.rs

1use std::collections::HashMap;
2use std::io::Write;
3
4use byteorder::{BigEndian, ByteOrder};
5use colored::Colorize;
6
7use crate::cli::{create_progress_bar, wprint, wprintln};
8use crate::innodb::checksum;
9use crate::innodb::page::{FilHeader, FspHeader};
10use crate::innodb::page_types::PageType;
11use crate::innodb::tablespace::Tablespace;
12use crate::util::hex::format_offset;
13use crate::IdbError;
14
15/// Options for the parse subcommand.
16pub struct ParseOptions {
17    pub file: String,
18    pub page: Option<u64>,
19    pub verbose: bool,
20    pub no_empty: bool,
21    pub page_size: Option<u32>,
22    pub json: bool,
23}
24
25/// JSON-serializable page info.
26#[derive(serde::Serialize)]
27struct PageJson {
28    page_number: u64,
29    header: FilHeader,
30    page_type_name: String,
31    page_type_description: String,
32    byte_start: u64,
33    byte_end: u64,
34    #[serde(skip_serializing_if = "Option::is_none")]
35    fsp_header: Option<crate::innodb::page::FspHeader>,
36}
37
38/// Parse an InnoDB tablespace file and display page headers with a type summary.
39///
40/// Opens the tablespace, auto-detects (or uses the overridden) page size, then
41/// iterates over every page reading the 38-byte FIL header to extract the
42/// checksum, page number, prev/next page pointers, LSN, page type, and space ID.
43/// Page 0 additionally displays the FSP header (space ID, tablespace size,
44/// free-page limit, and flags).
45///
46/// In **single-page mode** (`-p N`), only the specified page is printed with
47/// its full FIL header and trailer. In **full-file mode** (the default), all
48/// pages are listed and a frequency summary table is appended showing how many
49/// pages of each type exist. Pages with zero checksum and type `Allocated` are
50/// skipped by default unless `--verbose` is set; `--no-empty` additionally
51/// filters these from `--json` output.
52///
53/// With `--verbose`, each page also shows checksum validation status (algorithm,
54/// stored vs. calculated values) and LSN consistency between header and trailer.
55pub fn execute(opts: &ParseOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
56    let mut ts = match opts.page_size {
57        Some(ps) => Tablespace::open_with_page_size(&opts.file, ps)?,
58        None => Tablespace::open(&opts.file)?,
59    };
60
61    let page_size = ts.page_size();
62
63    if opts.json {
64        return execute_json(opts, &mut ts, page_size, writer);
65    }
66
67    if let Some(page_num) = opts.page {
68        // Single page mode
69        let page_data = ts.read_page(page_num)?;
70        print_page_info(writer, &page_data, page_num, page_size, opts.verbose)?;
71    } else {
72        // All pages mode
73        // Print FSP header first
74        let page0 = ts.read_page(0)?;
75        if let Some(fsp) = FspHeader::parse(&page0) {
76            print_fsp_header(writer, &fsp)?;
77            wprintln!(writer)?;
78        }
79
80        wprintln!(
81            writer,
82            "Pages in {} ({} pages, page size {}):",
83            opts.file,
84            ts.page_count(),
85            page_size
86        )?;
87        wprintln!(writer, "{}", "-".repeat(50))?;
88
89        let mut type_counts: HashMap<PageType, u64> = HashMap::new();
90
91        let pb = create_progress_bar(ts.page_count(), "pages");
92
93        for page_num in 0..ts.page_count() {
94            pb.inc(1);
95            let page_data = ts.read_page(page_num)?;
96            let header = match FilHeader::parse(&page_data) {
97                Some(h) => h,
98                None => continue,
99            };
100
101            *type_counts.entry(header.page_type).or_insert(0) += 1;
102
103            // Skip empty pages if --no-empty
104            if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
105                continue;
106            }
107
108            // Skip pages with zero checksum unless they are page 0
109            if header.checksum == 0 && page_num != 0 && !opts.verbose {
110                continue;
111            }
112
113            print_page_info(writer, &page_data, page_num, page_size, opts.verbose)?;
114        }
115
116        pb.finish_and_clear();
117
118        // Print page type summary
119        wprintln!(writer)?;
120        wprintln!(writer, "{}", "Page Type Summary".bold())?;
121        let mut sorted_types: Vec<_> = type_counts.iter().collect();
122        sorted_types.sort_by(|a, b| b.1.cmp(a.1));
123        for (pt, count) in sorted_types {
124            let label = if *count == 1 { "page" } else { "pages" };
125            wprintln!(writer, "  {:20} {:>6} {}", pt.name(), count, label)?;
126        }
127    }
128
129    Ok(())
130}
131
132/// Execute parse in JSON output mode.
133fn execute_json(
134    opts: &ParseOptions,
135    ts: &mut Tablespace,
136    page_size: u32,
137    writer: &mut dyn Write,
138) -> Result<(), IdbError> {
139    let mut pages = Vec::new();
140
141    let range: Box<dyn Iterator<Item = u64>> = if let Some(p) = opts.page {
142        Box::new(std::iter::once(p))
143    } else {
144        Box::new(0..ts.page_count())
145    };
146
147    for page_num in range {
148        let page_data = ts.read_page(page_num)?;
149        let header = match FilHeader::parse(&page_data) {
150            Some(h) => h,
151            None => continue,
152        };
153
154        if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
155            continue;
156        }
157
158        let pt = header.page_type;
159        let byte_start = page_num * page_size as u64;
160
161        let fsp_header = if page_num == 0 {
162            FspHeader::parse(&page_data)
163        } else {
164            None
165        };
166
167        pages.push(PageJson {
168            page_number: page_num,
169            page_type_name: pt.name().to_string(),
170            page_type_description: pt.description().to_string(),
171            byte_start,
172            byte_end: byte_start + page_size as u64,
173            header,
174            fsp_header,
175        });
176    }
177
178    let json = serde_json::to_string_pretty(&pages)
179        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
180    wprintln!(writer, "{}", json)?;
181    Ok(())
182}
183
184/// Print detailed information about a single page.
185fn print_page_info(
186    writer: &mut dyn Write,
187    page_data: &[u8],
188    page_num: u64,
189    page_size: u32,
190    verbose: bool,
191) -> Result<(), IdbError> {
192    let header = match FilHeader::parse(page_data) {
193        Some(h) => h,
194        None => {
195            eprintln!("Could not parse FIL header for page {}", page_num);
196            return Ok(());
197        }
198    };
199
200    let byte_start = page_num * page_size as u64;
201    let byte_end = byte_start + page_size as u64;
202
203    let pt = header.page_type;
204
205    wprintln!(writer, "Page: {}", header.page_number)?;
206    wprintln!(writer, "{}", "-".repeat(20))?;
207    wprintln!(writer, "{}", "HEADER".bold())?;
208    wprintln!(writer, "Byte Start: {}", format_offset(byte_start))?;
209    wprintln!(
210        writer,
211        "Page Type: {}\n-- {}: {} - {}",
212        pt.as_u16(),
213        pt.name(),
214        pt.description(),
215        pt.usage()
216    )?;
217
218    if verbose {
219        wprintln!(
220            writer,
221            "PAGE_N_HEAP (Amount of records in page): {}",
222            read_page_n_heap(page_data)
223        )?;
224    }
225
226    wprint!(writer, "Prev Page: ")?;
227    if !header.has_prev() {
228        wprintln!(writer, "Not used.")?;
229    } else {
230        wprintln!(writer, "{}", header.prev_page)?;
231    }
232
233    wprint!(writer, "Next Page: ")?;
234    if !header.has_next() {
235        wprintln!(writer, "Not used.")?;
236    } else {
237        wprintln!(writer, "{}", header.next_page)?;
238    }
239
240    wprintln!(writer, "LSN: {}", header.lsn)?;
241    wprintln!(writer, "Space ID: {}", header.space_id)?;
242    wprintln!(writer, "Checksum: {}", header.checksum)?;
243
244    // Checksum validation
245    let csum_result = checksum::validate_checksum(page_data, page_size);
246    if verbose {
247        let status = if csum_result.valid {
248            "OK".green().to_string()
249        } else {
250            "MISMATCH".red().to_string()
251        };
252        wprintln!(
253            writer,
254            "Checksum Status: {} ({:?}, stored={}, calculated={})",
255            status,
256            csum_result.algorithm,
257            csum_result.stored_checksum,
258            csum_result.calculated_checksum
259        )?;
260    }
261
262    wprintln!(writer)?;
263
264    // Trailer
265    let ps = page_size as usize;
266    if page_data.len() >= ps {
267        let trailer_offset = ps - 8;
268        if let Some(trailer) = crate::innodb::page::FilTrailer::parse(&page_data[trailer_offset..])
269        {
270            wprintln!(writer, "{}", "TRAILER".bold())?;
271            wprintln!(writer, "Old-style Checksum: {}", trailer.checksum)?;
272            wprintln!(writer, "Low 32 bits of LSN: {}", trailer.lsn_low32)?;
273            wprintln!(writer, "Byte End: {}", format_offset(byte_end))?;
274
275            // LSN validation
276            if verbose {
277                let lsn_valid = checksum::validate_lsn(page_data, page_size);
278                let lsn_status = if lsn_valid {
279                    "OK".green().to_string()
280                } else {
281                    "MISMATCH".red().to_string()
282                };
283                wprintln!(writer, "LSN Consistency: {}", lsn_status)?;
284            }
285        }
286    }
287    wprintln!(writer, "{}", "-".repeat(20))?;
288    Ok(())
289}
290
291/// Print FSP header information.
292fn print_fsp_header(writer: &mut dyn Write, fsp: &FspHeader) -> Result<(), IdbError> {
293    wprintln!(writer, "{}", "-".repeat(20))?;
294    wprintln!(writer, "{}", "FSP_HDR - Filespace Header".bold())?;
295    wprintln!(writer, "{}", "-".repeat(20))?;
296    wprintln!(writer, "Space ID: {}", fsp.space_id)?;
297    wprintln!(writer, "Size (pages): {}", fsp.size)?;
298    wprintln!(writer, "Page Free Limit: {}", fsp.free_limit)?;
299    wprintln!(writer, "Flags: {}", fsp.flags)?;
300    Ok(())
301}
302
303/// Read PAGE_N_HEAP from the page header (INDEX page specific).
304fn read_page_n_heap(page_data: &[u8]) -> u16 {
305    let offset = crate::innodb::constants::FIL_PAGE_DATA + 4; // PAGE_N_HEAP is at FIL_PAGE_DATA + 4
306    if page_data.len() < offset + 2 {
307        return 0;
308    }
309    BigEndian::read_u16(&page_data[offset..])
310}