Skip to main content

idb/cli/
diff.rs

1use std::io::Write;
2
3use colored::Colorize;
4use serde::Serialize;
5
6use crate::cli::{create_progress_bar, wprintln};
7use crate::innodb::constants::SIZE_FIL_HEAD;
8use crate::innodb::page::FilHeader;
9use crate::innodb::sdi;
10use crate::innodb::tablespace::Tablespace;
11use crate::IdbError;
12
13/// Options for the `inno diff` subcommand.
14pub struct DiffOptions {
15    /// Path to the first InnoDB tablespace file.
16    pub file1: String,
17    /// Path to the second InnoDB tablespace file.
18    pub file2: String,
19    /// Show per-page header field diffs.
20    pub verbose: bool,
21    /// Show byte-range diffs (requires verbose).
22    pub byte_ranges: bool,
23    /// Compare a single page only.
24    pub page: Option<u64>,
25    /// Annotate diff with MySQL version information from SDI metadata.
26    pub version_aware: bool,
27    /// Emit output as JSON.
28    pub json: bool,
29    /// Override the auto-detected page size.
30    pub page_size: Option<u32>,
31    /// Path to MySQL keyring file for decrypting encrypted tablespaces.
32    pub keyring: Option<String>,
33    /// Use memory-mapped I/O for file access.
34    pub mmap: bool,
35}
36
37// ── JSON output structs ─────────────────────────────────────────────
38
39#[derive(Serialize)]
40struct DiffReport {
41    file1: FileInfo,
42    file2: FileInfo,
43    page_size_mismatch: bool,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    file1_mysql_version: Option<String>,
46    #[serde(skip_serializing_if = "Option::is_none")]
47    file2_mysql_version: Option<String>,
48    summary: DiffSummary,
49    #[serde(skip_serializing_if = "Vec::is_empty")]
50    modified_pages: Vec<PageDiff>,
51}
52
53#[derive(Serialize)]
54struct FileInfo {
55    path: String,
56    page_count: u64,
57    page_size: u32,
58}
59
60#[derive(Serialize)]
61struct DiffSummary {
62    identical: u64,
63    modified: u64,
64    only_in_file1: u64,
65    only_in_file2: u64,
66}
67
68#[derive(Serialize)]
69struct PageDiff {
70    page_number: u64,
71    #[serde(skip_serializing_if = "Option::is_none")]
72    file1_header: Option<HeaderFields>,
73    #[serde(skip_serializing_if = "Option::is_none")]
74    file2_header: Option<HeaderFields>,
75    #[serde(skip_serializing_if = "Vec::is_empty")]
76    changed_fields: Vec<FieldChange>,
77    #[serde(skip_serializing_if = "Vec::is_empty")]
78    byte_ranges: Vec<ByteRange>,
79    #[serde(skip_serializing_if = "Option::is_none")]
80    total_bytes_changed: Option<usize>,
81}
82
83#[derive(Serialize)]
84struct HeaderFields {
85    checksum: String,
86    page_number: u32,
87    prev_page: String,
88    next_page: String,
89    lsn: u64,
90    page_type: String,
91    flush_lsn: u64,
92    space_id: u32,
93}
94
95#[derive(Serialize)]
96struct FieldChange {
97    field: String,
98    old_value: String,
99    new_value: String,
100}
101
102#[derive(Serialize)]
103struct ByteRange {
104    start: usize,
105    end: usize,
106    length: usize,
107}
108
109// ── Helpers ─────────────────────────────────────────────────────────
110
111/// Format a MySQL version ID (e.g. 80032) as "8.0.32".
112fn format_version_id(id: u64) -> String {
113    format!("{}.{}.{}", id / 10000, (id / 100) % 100, id % 100)
114}
115
116/// Extract the MySQL version string from a tablespace's SDI metadata.
117///
118/// Returns `None` if the tablespace has no SDI pages or the version cannot
119/// be determined (e.g. pre-8.0 files).
120fn extract_mysql_version(ts: &mut Tablespace) -> Option<String> {
121    let sdi_pages = sdi::find_sdi_pages(ts).ok()?;
122    if sdi_pages.is_empty() {
123        return None;
124    }
125    let records = sdi::extract_sdi_from_pages(ts, &sdi_pages).ok()?;
126    for rec in &records {
127        if rec.sdi_type != 1 {
128            continue;
129        }
130        // Parse the JSON to extract mysqld_version_id or dd_object.mysql_version_id
131        let v: serde_json::Value = match serde_json::from_str(&rec.data) {
132            Ok(v) => v,
133            Err(_) => continue,
134        };
135        // Prefer the envelope-level mysqld_version_id
136        if let Some(id) = v.get("mysqld_version_id").and_then(|v| v.as_u64()) {
137            if id > 0 {
138                return Some(format_version_id(id));
139            }
140        }
141        // Fall back to dd_object.mysql_version_id
142        if let Some(id) = v
143            .get("dd_object")
144            .and_then(|dd| dd.get("mysql_version_id"))
145            .and_then(|v| v.as_u64())
146        {
147            if id > 0 {
148                return Some(format_version_id(id));
149            }
150        }
151    }
152    None
153}
154
155fn header_to_fields(h: &FilHeader) -> HeaderFields {
156    HeaderFields {
157        checksum: format!("0x{:08X}", h.checksum),
158        page_number: h.page_number,
159        prev_page: format!("0x{:08X}", h.prev_page),
160        next_page: format!("0x{:08X}", h.next_page),
161        lsn: h.lsn,
162        page_type: h.page_type.name().to_string(),
163        flush_lsn: h.flush_lsn,
164        space_id: h.space_id,
165    }
166}
167
168fn compare_headers(h1: &FilHeader, h2: &FilHeader) -> Vec<FieldChange> {
169    let mut changes = Vec::new();
170
171    if h1.checksum != h2.checksum {
172        changes.push(FieldChange {
173            field: "Checksum".to_string(),
174            old_value: format!("0x{:08X}", h1.checksum),
175            new_value: format!("0x{:08X}", h2.checksum),
176        });
177    }
178    if h1.page_number != h2.page_number {
179        changes.push(FieldChange {
180            field: "Page Number".to_string(),
181            old_value: h1.page_number.to_string(),
182            new_value: h2.page_number.to_string(),
183        });
184    }
185    if h1.prev_page != h2.prev_page {
186        changes.push(FieldChange {
187            field: "Prev Page".to_string(),
188            old_value: format!("0x{:08X}", h1.prev_page),
189            new_value: format!("0x{:08X}", h2.prev_page),
190        });
191    }
192    if h1.next_page != h2.next_page {
193        changes.push(FieldChange {
194            field: "Next Page".to_string(),
195            old_value: format!("0x{:08X}", h1.next_page),
196            new_value: format!("0x{:08X}", h2.next_page),
197        });
198    }
199    if h1.lsn != h2.lsn {
200        changes.push(FieldChange {
201            field: "LSN".to_string(),
202            old_value: h1.lsn.to_string(),
203            new_value: h2.lsn.to_string(),
204        });
205    }
206    if h1.page_type != h2.page_type {
207        changes.push(FieldChange {
208            field: "Page Type".to_string(),
209            old_value: h1.page_type.name().to_string(),
210            new_value: h2.page_type.name().to_string(),
211        });
212    }
213    if h1.flush_lsn != h2.flush_lsn {
214        changes.push(FieldChange {
215            field: "Flush LSN".to_string(),
216            old_value: h1.flush_lsn.to_string(),
217            new_value: h2.flush_lsn.to_string(),
218        });
219    }
220    if h1.space_id != h2.space_id {
221        changes.push(FieldChange {
222            field: "Space ID".to_string(),
223            old_value: h1.space_id.to_string(),
224            new_value: h2.space_id.to_string(),
225        });
226    }
227
228    changes
229}
230
231fn find_diff_ranges(data1: &[u8], data2: &[u8]) -> Vec<ByteRange> {
232    let len = data1.len().min(data2.len());
233    let mut ranges = Vec::new();
234    let mut in_diff = false;
235    let mut start = 0;
236
237    for i in 0..len {
238        if data1[i] != data2[i] {
239            if !in_diff {
240                in_diff = true;
241                start = i;
242            }
243        } else if in_diff {
244            in_diff = false;
245            ranges.push(ByteRange {
246                start,
247                end: i,
248                length: i - start,
249            });
250        }
251    }
252    if in_diff {
253        ranges.push(ByteRange {
254            start,
255            end: len,
256            length: len - start,
257        });
258    }
259
260    ranges
261}
262
263/// Compare two InnoDB tablespace files page-by-page.
264pub fn execute(opts: &DiffOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
265    let mut ts1 = crate::cli::open_tablespace(&opts.file1, opts.page_size, opts.mmap)?;
266    let mut ts2 = crate::cli::open_tablespace(&opts.file2, opts.page_size, opts.mmap)?;
267
268    if let Some(ref keyring_path) = opts.keyring {
269        crate::cli::setup_decryption(&mut ts1, keyring_path)?;
270        crate::cli::setup_decryption(&mut ts2, keyring_path)?;
271    }
272
273    let ps1 = ts1.page_size();
274    let ps2 = ts2.page_size();
275    let pc1 = ts1.page_count();
276    let pc2 = ts2.page_count();
277
278    let page_size_mismatch = ps1 != ps2;
279
280    // Extract MySQL version info when --version-aware is set
281    let version1 = if opts.version_aware {
282        extract_mysql_version(&mut ts1)
283    } else {
284        None
285    };
286    let version2 = if opts.version_aware {
287        extract_mysql_version(&mut ts2)
288    } else {
289        None
290    };
291
292    if opts.json {
293        return execute_json(
294            opts,
295            &mut ts1,
296            &mut ts2,
297            page_size_mismatch,
298            version1,
299            version2,
300            writer,
301        );
302    }
303
304    // Text output
305    wprintln!(writer, "Comparing:")?;
306    wprintln!(
307        writer,
308        "  File 1: {} ({} pages, {} bytes/page)",
309        opts.file1,
310        pc1,
311        ps1
312    )?;
313    wprintln!(
314        writer,
315        "  File 2: {} ({} pages, {} bytes/page)",
316        opts.file2,
317        pc2,
318        ps2
319    )?;
320
321    if opts.version_aware {
322        if let Some(ref v) = version1 {
323            wprintln!(writer, "  File 1 MySQL version: {}", v)?;
324        }
325        if let Some(ref v) = version2 {
326            wprintln!(writer, "  File 2 MySQL version: {}", v)?;
327        }
328    }
329
330    wprintln!(writer)?;
331
332    if page_size_mismatch {
333        wprintln!(
334            writer,
335            "{}",
336            format!(
337                "WARNING: Page size mismatch ({} vs {}). Comparing FIL headers only.",
338                ps1, ps2
339            )
340            .yellow()
341        )?;
342        wprintln!(writer)?;
343    }
344
345    // Determine comparison range
346    let (start_page, end_page) = match opts.page {
347        Some(p) => {
348            if p >= pc1 && p >= pc2 {
349                return Err(IdbError::Argument(format!(
350                    "Page {} out of range (file1 has {} pages, file2 has {} pages)",
351                    p, pc1, pc2
352                )));
353            }
354            (p, p + 1)
355        }
356        None => (0, pc1.max(pc2)),
357    };
358
359    let common_pages = pc1.min(pc2);
360    let mut identical = 0u64;
361    let mut modified = 0u64;
362    let mut only_in_file1 = 0u64;
363    let mut only_in_file2 = 0u64;
364    let mut modified_page_nums: Vec<u64> = Vec::new();
365
366    let total = end_page - start_page;
367    let pb = create_progress_bar(total, "pages");
368
369    for page_num in start_page..end_page {
370        pb.inc(1);
371
372        // Pages only in one file
373        if page_num >= pc1 {
374            only_in_file2 += 1;
375            continue;
376        }
377        if page_num >= pc2 {
378            only_in_file1 += 1;
379            continue;
380        }
381
382        let data1 = ts1.read_page(page_num)?;
383        let data2 = ts2.read_page(page_num)?;
384
385        if page_size_mismatch {
386            // Compare only FIL headers (first 38 bytes)
387            let cmp_len = SIZE_FIL_HEAD.min(data1.len()).min(data2.len());
388            if data1[..cmp_len] == data2[..cmp_len] {
389                identical += 1;
390            } else {
391                modified += 1;
392                modified_page_nums.push(page_num);
393
394                if opts.verbose {
395                    print_page_diff(writer, page_num, &data1, &data2, opts.byte_ranges, true)?;
396                }
397            }
398        } else {
399            // Full page comparison
400            if data1 == data2 {
401                identical += 1;
402            } else {
403                modified += 1;
404                modified_page_nums.push(page_num);
405
406                if opts.verbose {
407                    print_page_diff(writer, page_num, &data1, &data2, opts.byte_ranges, false)?;
408                }
409            }
410        }
411    }
412
413    pb.finish_and_clear();
414
415    // Count pages beyond common range for non-single-page mode
416    if opts.page.is_none() {
417        if pc1 > common_pages {
418            only_in_file1 = pc1 - common_pages;
419        }
420        if pc2 > common_pages {
421            only_in_file2 = pc2 - common_pages;
422        }
423    }
424
425    // Print summary
426    wprintln!(writer, "Summary:")?;
427    wprintln!(writer, "  Identical pages:  {}", identical)?;
428    if modified > 0 {
429        wprintln!(
430            writer,
431            "  Modified pages:   {}",
432            format!("{}", modified).red()
433        )?;
434    } else {
435        wprintln!(writer, "  Modified pages:   {}", modified)?;
436    }
437    wprintln!(writer, "  Only in file 1:   {}", only_in_file1)?;
438    wprintln!(writer, "  Only in file 2:   {}", only_in_file2)?;
439
440    if !modified_page_nums.is_empty() {
441        wprintln!(writer)?;
442        let nums: Vec<String> = modified_page_nums.iter().map(|n| n.to_string()).collect();
443        wprintln!(writer, "Modified pages: {}", nums.join(", "))?;
444    }
445
446    Ok(())
447}
448
449fn print_page_diff(
450    writer: &mut dyn Write,
451    page_num: u64,
452    data1: &[u8],
453    data2: &[u8],
454    show_byte_ranges: bool,
455    header_only: bool,
456) -> Result<(), IdbError> {
457    wprintln!(writer, "Page {}: {}", page_num, "MODIFIED".red())?;
458
459    let h1 = FilHeader::parse(data1);
460    let h2 = FilHeader::parse(data2);
461
462    match (h1, h2) {
463        (Some(h1), Some(h2)) => {
464            let changes = compare_headers(&h1, &h2);
465            if changes.is_empty() {
466                wprintln!(writer, "  FIL header: identical (data content differs)")?;
467            } else {
468                for c in &changes {
469                    wprintln!(writer, "  {}: {} -> {}", c.field, c.old_value, c.new_value)?;
470                }
471            }
472
473            // Report unchanged page type for context
474            if h1.page_type == h2.page_type && !changes.iter().any(|c| c.field == "Page Type") {
475                wprintln!(writer, "  Page Type: {} (unchanged)", h1.page_type.name())?;
476            }
477        }
478        _ => {
479            wprintln!(writer, "  (could not parse one or both FIL headers)")?;
480        }
481    }
482
483    if show_byte_ranges && !header_only {
484        let ranges = find_diff_ranges(data1, data2);
485        if !ranges.is_empty() {
486            wprintln!(writer, "  Byte diff ranges:")?;
487            for r in &ranges {
488                wprintln!(writer, "    {}-{} ({} bytes)", r.start, r.end, r.length)?;
489            }
490            let total_changed: usize = ranges.iter().map(|r| r.length).sum();
491            let page_size = data1.len();
492            let pct = (total_changed as f64 / page_size as f64) * 100.0;
493            wprintln!(
494                writer,
495                "  Total: {} bytes changed ({:.1}% of page)",
496                total_changed,
497                pct
498            )?;
499        }
500    }
501
502    wprintln!(writer)?;
503    Ok(())
504}
505
506fn execute_json(
507    opts: &DiffOptions,
508    ts1: &mut Tablespace,
509    ts2: &mut Tablespace,
510    page_size_mismatch: bool,
511    version1: Option<String>,
512    version2: Option<String>,
513    writer: &mut dyn Write,
514) -> Result<(), IdbError> {
515    let ps1 = ts1.page_size();
516    let ps2 = ts2.page_size();
517    let pc1 = ts1.page_count();
518    let pc2 = ts2.page_count();
519
520    let (start_page, end_page) = match opts.page {
521        Some(p) => {
522            if p >= pc1 && p >= pc2 {
523                return Err(IdbError::Argument(format!(
524                    "Page {} out of range (file1 has {} pages, file2 has {} pages)",
525                    p, pc1, pc2
526                )));
527            }
528            (p, p + 1)
529        }
530        None => (0, pc1.max(pc2)),
531    };
532
533    let mut identical = 0u64;
534    let mut modified = 0u64;
535    let mut only_in_file1 = 0u64;
536    let mut only_in_file2 = 0u64;
537    let mut modified_pages: Vec<PageDiff> = Vec::new();
538
539    for page_num in start_page..end_page {
540        if page_num >= pc1 {
541            only_in_file2 += 1;
542            continue;
543        }
544        if page_num >= pc2 {
545            only_in_file1 += 1;
546            continue;
547        }
548
549        let data1 = ts1.read_page(page_num)?;
550        let data2 = ts2.read_page(page_num)?;
551
552        let is_equal = if page_size_mismatch {
553            let cmp_len = SIZE_FIL_HEAD.min(data1.len()).min(data2.len());
554            data1[..cmp_len] == data2[..cmp_len]
555        } else {
556            data1 == data2
557        };
558
559        if is_equal {
560            identical += 1;
561        } else {
562            modified += 1;
563
564            let h1 = FilHeader::parse(&data1);
565            let h2 = FilHeader::parse(&data2);
566
567            let (file1_header, file2_header, changed_fields) = match (&h1, &h2) {
568                (Some(h1), Some(h2)) => {
569                    let changes = compare_headers(h1, h2);
570                    (
571                        Some(header_to_fields(h1)),
572                        Some(header_to_fields(h2)),
573                        changes,
574                    )
575                }
576                _ => (
577                    h1.as_ref().map(header_to_fields),
578                    h2.as_ref().map(header_to_fields),
579                    Vec::new(),
580                ),
581            };
582
583            let (byte_ranges, total_bytes_changed) = if opts.byte_ranges && !page_size_mismatch {
584                let ranges = find_diff_ranges(&data1, &data2);
585                let total: usize = ranges.iter().map(|r| r.length).sum();
586                (ranges, Some(total))
587            } else {
588                (Vec::new(), None)
589            };
590
591            modified_pages.push(PageDiff {
592                page_number: page_num,
593                file1_header,
594                file2_header,
595                changed_fields,
596                byte_ranges,
597                total_bytes_changed,
598            });
599        }
600    }
601
602    // For non-single-page mode, count pages beyond common range
603    if opts.page.is_none() {
604        let common = pc1.min(pc2);
605        if pc1 > common {
606            only_in_file1 = pc1 - common;
607        }
608        if pc2 > common {
609            only_in_file2 = pc2 - common;
610        }
611    }
612
613    let report = DiffReport {
614        file1: FileInfo {
615            path: opts.file1.clone(),
616            page_count: pc1,
617            page_size: ps1,
618        },
619        file2: FileInfo {
620            path: opts.file2.clone(),
621            page_count: pc2,
622            page_size: ps2,
623        },
624        page_size_mismatch,
625        file1_mysql_version: version1,
626        file2_mysql_version: version2,
627        summary: DiffSummary {
628            identical,
629            modified,
630            only_in_file1,
631            only_in_file2,
632        },
633        modified_pages,
634    };
635
636    let json = serde_json::to_string_pretty(&report)
637        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
638    wprintln!(writer, "{}", json)?;
639
640    Ok(())
641}