Skip to main content

idb/cli/
parse.rs

1use std::collections::HashMap;
2use std::io::Write;
3
4use byteorder::{BigEndian, ByteOrder};
5use colored::Colorize;
6
7use crate::cli::{create_progress_bar, wprint, wprintln};
8use crate::innodb::checksum;
9use crate::innodb::page::{FilHeader, FspHeader};
10use crate::innodb::page_types::PageType;
11use crate::innodb::tablespace::Tablespace;
12use crate::util::hex::format_offset;
13use crate::IdbError;
14
15/// Options for the parse subcommand.
16pub struct ParseOptions {
17    pub file: String,
18    pub page: Option<u64>,
19    pub verbose: bool,
20    pub no_empty: bool,
21    pub page_size: Option<u32>,
22    pub json: bool,
23    pub keyring: Option<String>,
24}
25
26/// JSON-serializable page info.
27#[derive(serde::Serialize)]
28struct PageJson {
29    page_number: u64,
30    header: FilHeader,
31    page_type_name: String,
32    page_type_description: String,
33    byte_start: u64,
34    byte_end: u64,
35    #[serde(skip_serializing_if = "Option::is_none")]
36    fsp_header: Option<crate::innodb::page::FspHeader>,
37}
38
39/// Parse an InnoDB tablespace file and display page headers with a type summary.
40///
41/// Opens the tablespace, auto-detects (or uses the overridden) page size, then
42/// iterates over every page reading the 38-byte FIL header to extract the
43/// checksum, page number, prev/next page pointers, LSN, page type, and space ID.
44/// Page 0 additionally displays the FSP header (space ID, tablespace size,
45/// free-page limit, and flags).
46///
47/// In **single-page mode** (`-p N`), only the specified page is printed with
48/// its full FIL header and trailer. In **full-file mode** (the default), all
49/// pages are listed and a frequency summary table is appended showing how many
50/// pages of each type exist. Pages with zero checksum and type `Allocated` are
51/// skipped by default unless `--verbose` is set; `--no-empty` additionally
52/// filters these from `--json` output.
53///
54/// With `--verbose`, each page also shows checksum validation status (algorithm,
55/// stored vs. calculated values) and LSN consistency between header and trailer.
56pub fn execute(opts: &ParseOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
57    let mut ts = match opts.page_size {
58        Some(ps) => Tablespace::open_with_page_size(&opts.file, ps)?,
59        None => Tablespace::open(&opts.file)?,
60    };
61
62    if let Some(ref keyring_path) = opts.keyring {
63        crate::cli::setup_decryption(&mut ts, keyring_path)?;
64    }
65
66    let page_size = ts.page_size();
67
68    if opts.json {
69        return execute_json(opts, &mut ts, page_size, writer);
70    }
71
72    if let Some(page_num) = opts.page {
73        // Single page mode
74        let page_data = ts.read_page(page_num)?;
75        print_page_info(writer, &page_data, page_num, page_size, opts.verbose)?;
76    } else {
77        // All pages mode
78        // Print FSP header first
79        let page0 = ts.read_page(0)?;
80        if let Some(fsp) = FspHeader::parse(&page0) {
81            print_fsp_header(writer, &fsp)?;
82            wprintln!(writer)?;
83        }
84
85        wprintln!(
86            writer,
87            "Pages in {} ({} pages, page size {}):",
88            opts.file,
89            ts.page_count(),
90            page_size
91        )?;
92        wprintln!(writer, "{}", "-".repeat(50))?;
93
94        let mut type_counts: HashMap<PageType, u64> = HashMap::new();
95
96        let pb = create_progress_bar(ts.page_count(), "pages");
97
98        for page_num in 0..ts.page_count() {
99            pb.inc(1);
100            let page_data = ts.read_page(page_num)?;
101            let header = match FilHeader::parse(&page_data) {
102                Some(h) => h,
103                None => continue,
104            };
105
106            *type_counts.entry(header.page_type).or_insert(0) += 1;
107
108            // Skip empty pages if --no-empty
109            if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
110                continue;
111            }
112
113            // Skip pages with zero checksum unless they are page 0
114            if header.checksum == 0 && page_num != 0 && !opts.verbose {
115                continue;
116            }
117
118            print_page_info(writer, &page_data, page_num, page_size, opts.verbose)?;
119        }
120
121        pb.finish_and_clear();
122
123        // Print page type summary
124        wprintln!(writer)?;
125        wprintln!(writer, "{}", "Page Type Summary".bold())?;
126        let mut sorted_types: Vec<_> = type_counts.iter().collect();
127        sorted_types.sort_by(|a, b| b.1.cmp(a.1));
128        for (pt, count) in sorted_types {
129            let label = if *count == 1 { "page" } else { "pages" };
130            wprintln!(writer, "  {:20} {:>6} {}", pt.name(), count, label)?;
131        }
132    }
133
134    Ok(())
135}
136
137/// Execute parse in JSON output mode.
138fn execute_json(
139    opts: &ParseOptions,
140    ts: &mut Tablespace,
141    page_size: u32,
142    writer: &mut dyn Write,
143) -> Result<(), IdbError> {
144    let mut pages = Vec::new();
145
146    let range: Box<dyn Iterator<Item = u64>> = if let Some(p) = opts.page {
147        Box::new(std::iter::once(p))
148    } else {
149        Box::new(0..ts.page_count())
150    };
151
152    for page_num in range {
153        let page_data = ts.read_page(page_num)?;
154        let header = match FilHeader::parse(&page_data) {
155            Some(h) => h,
156            None => continue,
157        };
158
159        if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
160            continue;
161        }
162
163        let pt = header.page_type;
164        let byte_start = page_num * page_size as u64;
165
166        let fsp_header = if page_num == 0 {
167            FspHeader::parse(&page_data)
168        } else {
169            None
170        };
171
172        pages.push(PageJson {
173            page_number: page_num,
174            page_type_name: pt.name().to_string(),
175            page_type_description: pt.description().to_string(),
176            byte_start,
177            byte_end: byte_start + page_size as u64,
178            header,
179            fsp_header,
180        });
181    }
182
183    let json = serde_json::to_string_pretty(&pages)
184        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
185    wprintln!(writer, "{}", json)?;
186    Ok(())
187}
188
189/// Print detailed information about a single page.
190fn print_page_info(
191    writer: &mut dyn Write,
192    page_data: &[u8],
193    page_num: u64,
194    page_size: u32,
195    verbose: bool,
196) -> Result<(), IdbError> {
197    let header = match FilHeader::parse(page_data) {
198        Some(h) => h,
199        None => {
200            eprintln!("Could not parse FIL header for page {}", page_num);
201            return Ok(());
202        }
203    };
204
205    let byte_start = page_num * page_size as u64;
206    let byte_end = byte_start + page_size as u64;
207
208    let pt = header.page_type;
209
210    wprintln!(writer, "Page: {}", header.page_number)?;
211    wprintln!(writer, "{}", "-".repeat(20))?;
212    wprintln!(writer, "{}", "HEADER".bold())?;
213    wprintln!(writer, "Byte Start: {}", format_offset(byte_start))?;
214    wprintln!(
215        writer,
216        "Page Type: {}\n-- {}: {} - {}",
217        pt.as_u16(),
218        pt.name(),
219        pt.description(),
220        pt.usage()
221    )?;
222
223    if verbose {
224        wprintln!(
225            writer,
226            "PAGE_N_HEAP (Amount of records in page): {}",
227            read_page_n_heap(page_data)
228        )?;
229    }
230
231    wprint!(writer, "Prev Page: ")?;
232    if !header.has_prev() {
233        wprintln!(writer, "Not used.")?;
234    } else {
235        wprintln!(writer, "{}", header.prev_page)?;
236    }
237
238    wprint!(writer, "Next Page: ")?;
239    if !header.has_next() {
240        wprintln!(writer, "Not used.")?;
241    } else {
242        wprintln!(writer, "{}", header.next_page)?;
243    }
244
245    wprintln!(writer, "LSN: {}", header.lsn)?;
246    wprintln!(writer, "Space ID: {}", header.space_id)?;
247    wprintln!(writer, "Checksum: {}", header.checksum)?;
248
249    // Checksum validation
250    let csum_result = checksum::validate_checksum(page_data, page_size, None);
251    if verbose {
252        let status = if csum_result.valid {
253            "OK".green().to_string()
254        } else {
255            "MISMATCH".red().to_string()
256        };
257        wprintln!(
258            writer,
259            "Checksum Status: {} ({:?}, stored={}, calculated={})",
260            status,
261            csum_result.algorithm,
262            csum_result.stored_checksum,
263            csum_result.calculated_checksum
264        )?;
265    }
266
267    wprintln!(writer)?;
268
269    // Trailer
270    let ps = page_size as usize;
271    if page_data.len() >= ps {
272        let trailer_offset = ps - 8;
273        if let Some(trailer) = crate::innodb::page::FilTrailer::parse(&page_data[trailer_offset..])
274        {
275            wprintln!(writer, "{}", "TRAILER".bold())?;
276            wprintln!(writer, "Old-style Checksum: {}", trailer.checksum)?;
277            wprintln!(writer, "Low 32 bits of LSN: {}", trailer.lsn_low32)?;
278            wprintln!(writer, "Byte End: {}", format_offset(byte_end))?;
279
280            // LSN validation
281            if verbose {
282                let lsn_valid = checksum::validate_lsn(page_data, page_size);
283                let lsn_status = if lsn_valid {
284                    "OK".green().to_string()
285                } else {
286                    "MISMATCH".red().to_string()
287                };
288                wprintln!(writer, "LSN Consistency: {}", lsn_status)?;
289            }
290        }
291    }
292    wprintln!(writer, "{}", "-".repeat(20))?;
293    Ok(())
294}
295
296/// Print FSP header information.
297fn print_fsp_header(writer: &mut dyn Write, fsp: &FspHeader) -> Result<(), IdbError> {
298    wprintln!(writer, "{}", "-".repeat(20))?;
299    wprintln!(writer, "{}", "FSP_HDR - Filespace Header".bold())?;
300    wprintln!(writer, "{}", "-".repeat(20))?;
301    wprintln!(writer, "Space ID: {}", fsp.space_id)?;
302    wprintln!(writer, "Size (pages): {}", fsp.size)?;
303    wprintln!(writer, "Page Free Limit: {}", fsp.free_limit)?;
304    wprintln!(writer, "Flags: {}", fsp.flags)?;
305    Ok(())
306}
307
308/// Read PAGE_N_HEAP from the page header (INDEX page specific).
309fn read_page_n_heap(page_data: &[u8]) -> u16 {
310    let offset = crate::innodb::constants::FIL_PAGE_DATA + 4; // PAGE_N_HEAP is at FIL_PAGE_DATA + 4
311    if page_data.len() < offset + 2 {
312        return 0;
313    }
314    BigEndian::read_u16(&page_data[offset..])
315}