Skip to main content

idb/cli/
parse.rs

1use std::collections::HashMap;
2use std::io::Write;
3
4use byteorder::{BigEndian, ByteOrder};
5use colored::Colorize;
6use rayon::prelude::*;
7
8use crate::cli::{create_progress_bar, wprint, wprintln};
9use crate::innodb::checksum;
10use crate::innodb::page::{FilHeader, FspHeader};
11use crate::innodb::page_types::PageType;
12use crate::innodb::tablespace::Tablespace;
13use crate::util::hex::format_offset;
14use crate::IdbError;
15
16/// Options for the parse subcommand.
17pub struct ParseOptions {
18    pub file: String,
19    pub page: Option<u64>,
20    pub verbose: bool,
21    pub no_empty: bool,
22    pub page_size: Option<u32>,
23    pub json: bool,
24    /// Output as CSV.
25    pub csv: bool,
26    pub keyring: Option<String>,
27    /// Number of threads for parallel processing (0 = auto-detect).
28    pub threads: usize,
29    /// Use memory-mapped I/O for file access.
30    pub mmap: bool,
31    /// Stream results incrementally for lower memory usage.
32    pub streaming: bool,
33}
34
35/// JSON-serializable page info.
36#[derive(serde::Serialize)]
37struct PageJson {
38    page_number: u64,
39    header: FilHeader,
40    page_type_name: String,
41    page_type_description: String,
42    byte_start: u64,
43    byte_end: u64,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    fsp_header: Option<crate::innodb::page::FspHeader>,
46}
47
48/// Pre-parsed page data for parallel processing.
49struct ParsedPage {
50    page_num: u64,
51    header: Option<FilHeader>,
52    page_type: PageType,
53}
54
55/// Parse an InnoDB tablespace file and display page headers with a type summary.
56///
57/// Opens the tablespace, auto-detects (or uses the overridden) page size, then
58/// iterates over every page reading the 38-byte FIL header to extract the
59/// checksum, page number, prev/next page pointers, LSN, page type, and space ID.
60/// Page 0 additionally displays the FSP header (space ID, tablespace size,
61/// free-page limit, and flags).
62///
63/// When the tablespace has more than one page, all page data is read into memory
64/// and headers are parsed in parallel using rayon. Results are collected in
65/// page order for deterministic output.
66///
67/// In **single-page mode** (`-p N`), only the specified page is printed with
68/// its full FIL header and trailer. In **full-file mode** (the default), all
69/// pages are listed and a frequency summary table is appended showing how many
70/// pages of each type exist. Pages with zero checksum and type `Allocated` are
71/// skipped by default unless `--verbose` is set; `--no-empty` additionally
72/// filters these from `--json` output.
73///
74/// With `--verbose`, each page also shows checksum validation status (algorithm,
75/// stored vs. calculated values) and LSN consistency between header and trailer.
76pub fn execute(opts: &ParseOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
77    let mut ts = crate::cli::open_tablespace(&opts.file, opts.page_size, opts.mmap)?;
78
79    if let Some(ref keyring_path) = opts.keyring {
80        crate::cli::setup_decryption(&mut ts, keyring_path)?;
81    }
82
83    let page_size = ts.page_size();
84
85    // Streaming mode: process one page at a time, output immediately
86    if opts.streaming && opts.page.is_none() {
87        if opts.json {
88            return execute_streaming_json(opts, &mut ts, page_size, writer);
89        }
90        return execute_streaming_text(opts, &mut ts, page_size, writer);
91    }
92
93    if opts.json {
94        return execute_json(opts, &mut ts, page_size, writer);
95    }
96
97    if opts.csv {
98        return execute_csv(opts, &mut ts, page_size, writer);
99    }
100
101    if let Some(page_num) = opts.page {
102        // Single page mode — no parallelism needed
103        let page_data = ts.read_page(page_num)?;
104        print_page_info(writer, &page_data, page_num, page_size, opts.verbose)?;
105    } else {
106        // All pages mode — use parallel processing
107        let page_count = ts.page_count();
108        let ps = page_size as usize;
109
110        // Read all pages into memory
111        let all_data = ts.read_all_pages()?;
112
113        // Print FSP header first
114        let page0_data = &all_data[0..ps];
115        if let Some(fsp) = FspHeader::parse(page0_data) {
116            print_fsp_header(writer, &fsp)?;
117            wprintln!(writer)?;
118        }
119
120        wprintln!(
121            writer,
122            "Pages in {} ({} pages, page size {}):",
123            opts.file,
124            page_count,
125            page_size
126        )?;
127        wprintln!(writer, "{}", "-".repeat(50))?;
128
129        // Create progress bar before parallel work so it tracks real progress
130        let pb = create_progress_bar(page_count, "pages");
131
132        // Parse headers in parallel to build type counts
133        let parsed_pages: Vec<ParsedPage> = (0..page_count)
134            .into_par_iter()
135            .map(|page_num| {
136                let offset = page_num as usize * ps;
137                if offset + ps > all_data.len() {
138                    pb.inc(1);
139                    return ParsedPage {
140                        page_num,
141                        header: None,
142                        page_type: PageType::Unknown(0),
143                    };
144                }
145                let page_data = &all_data[offset..offset + ps];
146                let header = FilHeader::parse(page_data);
147                let page_type = header
148                    .as_ref()
149                    .map(|h| h.page_type)
150                    .unwrap_or(PageType::Unknown(0));
151                pb.inc(1);
152                ParsedPage {
153                    page_num,
154                    header,
155                    page_type,
156                }
157            })
158            .collect();
159
160        pb.finish_and_clear();
161
162        let mut type_counts: HashMap<PageType, u64> = HashMap::new();
163
164        for pp in &parsed_pages {
165            let header = match &pp.header {
166                Some(h) => h,
167                None => continue,
168            };
169
170            *type_counts.entry(pp.page_type).or_insert(0) += 1;
171
172            // Skip empty pages if --no-empty
173            if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
174                continue;
175            }
176
177            // Skip pages with zero checksum unless they are page 0
178            if header.checksum == 0 && pp.page_num != 0 && !opts.verbose {
179                continue;
180            }
181
182            let offset = pp.page_num as usize * ps;
183            let page_data = &all_data[offset..offset + ps];
184            print_page_info(writer, page_data, pp.page_num, page_size, opts.verbose)?;
185        }
186
187        // Print page type summary
188        wprintln!(writer)?;
189        wprintln!(writer, "{}", "Page Type Summary".bold())?;
190        let mut sorted_types: Vec<_> = type_counts.iter().collect();
191        sorted_types.sort_by(|a, b| b.1.cmp(a.1));
192        for (pt, count) in sorted_types {
193            let label = if *count == 1 { "page" } else { "pages" };
194            wprintln!(writer, "  {:20} {:>6} {}", pt.name(), count, label)?;
195        }
196    }
197
198    Ok(())
199}
200
201/// Streaming text mode: process pages one at a time via `for_each_page()`,
202/// writing each result immediately. No progress bar, no bulk memory allocation.
203fn execute_streaming_text(
204    opts: &ParseOptions,
205    ts: &mut Tablespace,
206    page_size: u32,
207    writer: &mut dyn Write,
208) -> Result<(), IdbError> {
209    let page_count = ts.page_count();
210
211    // Print FSP header from page 0 first
212    let page0_data = ts.read_page(0)?;
213    if let Some(fsp) = FspHeader::parse(&page0_data) {
214        print_fsp_header(writer, &fsp)?;
215        wprintln!(writer)?;
216    }
217
218    wprintln!(
219        writer,
220        "Pages in {} ({} pages, page size {}):",
221        opts.file,
222        page_count,
223        page_size
224    )?;
225    wprintln!(writer, "{}", "-".repeat(50))?;
226
227    let mut type_counts: HashMap<PageType, u64> = HashMap::new();
228
229    ts.for_each_page(|page_num, page_data| {
230        let header = match FilHeader::parse(page_data) {
231            Some(h) => h,
232            None => return Ok(()),
233        };
234
235        let page_type = header.page_type;
236        *type_counts.entry(page_type).or_insert(0) += 1;
237
238        // Skip empty pages if --no-empty
239        if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
240            return Ok(());
241        }
242
243        // Skip pages with zero checksum unless they are page 0
244        if header.checksum == 0 && page_num != 0 && !opts.verbose {
245            return Ok(());
246        }
247
248        print_page_info(writer, page_data, page_num, page_size, opts.verbose)?;
249        Ok(())
250    })?;
251
252    // Print page type summary
253    wprintln!(writer)?;
254    wprintln!(writer, "{}", "Page Type Summary".bold())?;
255    let mut sorted_types: Vec<_> = type_counts.iter().collect();
256    sorted_types.sort_by(|a, b| b.1.cmp(a.1));
257    for (pt, count) in sorted_types {
258        let label = if *count == 1 { "page" } else { "pages" };
259        wprintln!(writer, "  {:20} {:>6} {}", pt.name(), count, label)?;
260    }
261
262    Ok(())
263}
264
265/// Streaming JSON mode: output NDJSON (one JSON object per line per page).
266fn execute_streaming_json(
267    opts: &ParseOptions,
268    ts: &mut Tablespace,
269    page_size: u32,
270    writer: &mut dyn Write,
271) -> Result<(), IdbError> {
272    ts.for_each_page(|page_num, page_data| {
273        let header = match FilHeader::parse(page_data) {
274            Some(h) => h,
275            None => return Ok(()),
276        };
277
278        if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
279            return Ok(());
280        }
281
282        let pt = header.page_type;
283        let byte_start = page_num * page_size as u64;
284        let fsp_header = if page_num == 0 {
285            FspHeader::parse(page_data)
286        } else {
287            None
288        };
289
290        let page_json = PageJson {
291            page_number: page_num,
292            page_type_name: pt.name().to_string(),
293            page_type_description: pt.description().to_string(),
294            byte_start,
295            byte_end: byte_start + page_size as u64,
296            header,
297            fsp_header,
298        };
299
300        let line = serde_json::to_string(&page_json)
301            .map_err(|e| IdbError::Parse(format!("JSON error: {}", e)))?;
302        wprintln!(writer, "{}", line)?;
303        Ok(())
304    })?;
305
306    Ok(())
307}
308
309/// Execute parse in CSV output mode.
310fn execute_csv(
311    opts: &ParseOptions,
312    ts: &mut Tablespace,
313    _page_size: u32,
314    writer: &mut dyn Write,
315) -> Result<(), IdbError> {
316    wprintln!(
317        writer,
318        "page_number,checksum,page_type,lsn,space_id,prev_page,next_page"
319    )?;
320
321    let range: Box<dyn Iterator<Item = u64>> = if let Some(p) = opts.page {
322        Box::new(std::iter::once(p))
323    } else {
324        Box::new(0..ts.page_count())
325    };
326
327    for page_num in range {
328        let page_data = ts.read_page(page_num)?;
329        let header = match FilHeader::parse(&page_data) {
330            Some(h) => h,
331            None => continue,
332        };
333
334        if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
335            continue;
336        }
337
338        let prev = if header.has_prev() {
339            header.prev_page.to_string()
340        } else {
341            String::new()
342        };
343        let next = if header.has_next() {
344            header.next_page.to_string()
345        } else {
346            String::new()
347        };
348
349        wprintln!(
350            writer,
351            "{},{},{},{},{},{},{}",
352            page_num,
353            header.checksum,
354            crate::cli::csv_escape(header.page_type.name()),
355            header.lsn,
356            header.space_id,
357            prev,
358            next
359        )?;
360    }
361    Ok(())
362}
363
364/// Execute parse in JSON output mode.
365fn execute_json(
366    opts: &ParseOptions,
367    ts: &mut Tablespace,
368    page_size: u32,
369    writer: &mut dyn Write,
370) -> Result<(), IdbError> {
371    if let Some(p) = opts.page {
372        // Single page — no parallelism
373        let page_data = ts.read_page(p)?;
374        let header = match FilHeader::parse(&page_data) {
375            Some(h) => h,
376            None => {
377                wprintln!(writer, "[]")?;
378                return Ok(());
379            }
380        };
381
382        let pt = header.page_type;
383        let byte_start = p * page_size as u64;
384        let fsp_header = if p == 0 {
385            FspHeader::parse(&page_data)
386        } else {
387            None
388        };
389
390        let pages = vec![PageJson {
391            page_number: p,
392            page_type_name: pt.name().to_string(),
393            page_type_description: pt.description().to_string(),
394            byte_start,
395            byte_end: byte_start + page_size as u64,
396            header,
397            fsp_header,
398        }];
399
400        let json = serde_json::to_string_pretty(&pages)
401            .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
402        wprintln!(writer, "{}", json)?;
403        return Ok(());
404    }
405
406    // Full tablespace — read all then process in parallel
407    let page_count = ts.page_count();
408    let ps = page_size as usize;
409    let all_data = ts.read_all_pages()?;
410
411    let pages: Vec<Option<PageJson>> = (0..page_count)
412        .into_par_iter()
413        .map(|page_num| {
414            let offset = page_num as usize * ps;
415            if offset + ps > all_data.len() {
416                return None;
417            }
418            let page_data = &all_data[offset..offset + ps];
419            let header = match FilHeader::parse(page_data) {
420                Some(h) => h,
421                None => return None,
422            };
423
424            if opts.no_empty && header.checksum == 0 && header.page_type == PageType::Allocated {
425                return None;
426            }
427
428            let pt = header.page_type;
429            let byte_start = page_num * page_size as u64;
430            let fsp_header = if page_num == 0 {
431                FspHeader::parse(page_data)
432            } else {
433                None
434            };
435
436            Some(PageJson {
437                page_number: page_num,
438                page_type_name: pt.name().to_string(),
439                page_type_description: pt.description().to_string(),
440                byte_start,
441                byte_end: byte_start + page_size as u64,
442                header,
443                fsp_header,
444            })
445        })
446        .collect();
447
448    let pages: Vec<PageJson> = pages.into_iter().flatten().collect();
449
450    let json = serde_json::to_string_pretty(&pages)
451        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
452    wprintln!(writer, "{}", json)?;
453    Ok(())
454}
455
456/// Print detailed information about a single page.
457fn print_page_info(
458    writer: &mut dyn Write,
459    page_data: &[u8],
460    page_num: u64,
461    page_size: u32,
462    verbose: bool,
463) -> Result<(), IdbError> {
464    let header = match FilHeader::parse(page_data) {
465        Some(h) => h,
466        None => {
467            eprintln!("Could not parse FIL header for page {}", page_num);
468            return Ok(());
469        }
470    };
471
472    let byte_start = page_num * page_size as u64;
473    let byte_end = byte_start + page_size as u64;
474
475    let pt = header.page_type;
476
477    wprintln!(writer, "Page: {}", header.page_number)?;
478    wprintln!(writer, "{}", "-".repeat(20))?;
479    wprintln!(writer, "{}", "HEADER".bold())?;
480    wprintln!(writer, "Byte Start: {}", format_offset(byte_start))?;
481    wprintln!(
482        writer,
483        "Page Type: {}\n-- {}: {} - {}",
484        pt.as_u16(),
485        pt.name(),
486        pt.description(),
487        pt.usage()
488    )?;
489
490    if verbose {
491        wprintln!(
492            writer,
493            "PAGE_N_HEAP (Amount of records in page): {}",
494            read_page_n_heap(page_data)
495        )?;
496    }
497
498    wprint!(writer, "Prev Page: ")?;
499    if !header.has_prev() {
500        wprintln!(writer, "Not used.")?;
501    } else {
502        wprintln!(writer, "{}", header.prev_page)?;
503    }
504
505    wprint!(writer, "Next Page: ")?;
506    if !header.has_next() {
507        wprintln!(writer, "Not used.")?;
508    } else {
509        wprintln!(writer, "{}", header.next_page)?;
510    }
511
512    wprintln!(writer, "LSN: {}", header.lsn)?;
513    wprintln!(writer, "Space ID: {}", header.space_id)?;
514    wprintln!(writer, "Checksum: {}", header.checksum)?;
515
516    // Checksum validation
517    let csum_result = checksum::validate_checksum(page_data, page_size, None);
518    if verbose {
519        let status = if csum_result.valid {
520            "OK".green().to_string()
521        } else {
522            "MISMATCH".red().to_string()
523        };
524        wprintln!(
525            writer,
526            "Checksum Status: {} ({:?}, stored={}, calculated={})",
527            status,
528            csum_result.algorithm,
529            csum_result.stored_checksum,
530            csum_result.calculated_checksum
531        )?;
532    }
533
534    wprintln!(writer)?;
535
536    // Trailer
537    let ps = page_size as usize;
538    if page_data.len() >= ps {
539        let trailer_offset = ps - 8;
540        if let Some(trailer) = crate::innodb::page::FilTrailer::parse(&page_data[trailer_offset..])
541        {
542            wprintln!(writer, "{}", "TRAILER".bold())?;
543            wprintln!(writer, "Old-style Checksum: {}", trailer.checksum)?;
544            wprintln!(writer, "Low 32 bits of LSN: {}", trailer.lsn_low32)?;
545            wprintln!(writer, "Byte End: {}", format_offset(byte_end))?;
546
547            // LSN validation
548            if verbose {
549                let lsn_valid = checksum::validate_lsn(page_data, page_size);
550                let lsn_status = if lsn_valid {
551                    "OK".green().to_string()
552                } else {
553                    "MISMATCH".red().to_string()
554                };
555                wprintln!(writer, "LSN Consistency: {}", lsn_status)?;
556            }
557        }
558    }
559    wprintln!(writer, "{}", "-".repeat(20))?;
560    Ok(())
561}
562
563/// Print FSP header information.
564fn print_fsp_header(writer: &mut dyn Write, fsp: &FspHeader) -> Result<(), IdbError> {
565    wprintln!(writer, "{}", "-".repeat(20))?;
566    wprintln!(writer, "{}", "FSP_HDR - Filespace Header".bold())?;
567    wprintln!(writer, "{}", "-".repeat(20))?;
568    wprintln!(writer, "Space ID: {}", fsp.space_id)?;
569    wprintln!(writer, "Size (pages): {}", fsp.size)?;
570    wprintln!(writer, "Page Free Limit: {}", fsp.free_limit)?;
571    wprintln!(writer, "Flags: {}", fsp.flags)?;
572    Ok(())
573}
574
575/// Read PAGE_N_HEAP from the page header (INDEX page specific).
576fn read_page_n_heap(page_data: &[u8]) -> u16 {
577    let offset = crate::innodb::constants::FIL_PAGE_DATA + 4; // PAGE_N_HEAP is at FIL_PAGE_DATA + 4
578    if page_data.len() < offset + 2 {
579        return 0;
580    }
581    BigEndian::read_u16(&page_data[offset..])
582}