Skip to main content

idb/cli/
audit.rs

1use std::io::Write;
2use std::path::Path;
3use std::time::Instant;
4
5use colored::Colorize;
6use rayon::prelude::*;
7use serde::Serialize;
8
9use crate::cli::{create_progress_bar, csv_escape, wprintln};
10use crate::innodb::checksum::{validate_checksum, validate_lsn};
11use crate::innodb::health;
12use crate::util::fs::find_tablespace_files;
13use crate::util::prometheus as prom;
14use crate::IdbError;
15
16/// Options for the `inno audit` subcommand.
17pub struct AuditOptions {
18    /// MySQL data directory path to scan.
19    pub datadir: String,
20    /// Show per-tablespace health metrics instead of integrity validation.
21    pub health: bool,
22    /// List only pages with checksum mismatches.
23    pub checksum_mismatch: bool,
24    /// Show additional details.
25    pub verbose: bool,
26    /// Emit output as JSON.
27    pub json: bool,
28    /// Output as CSV.
29    pub csv: bool,
30    /// Output in Prometheus exposition format.
31    pub prometheus: bool,
32    /// Override the auto-detected page size.
33    pub page_size: Option<u32>,
34    /// Path to MySQL keyring file for decrypting encrypted tablespaces.
35    pub keyring: Option<String>,
36    /// Use memory-mapped I/O for file access.
37    pub mmap: bool,
38    /// Show tables with fill factor below this threshold (0-100).
39    pub min_fill_factor: Option<f64>,
40    /// Show tables with fragmentation above this threshold (0-100).
41    pub max_fragmentation: Option<f64>,
42    /// Maximum directory recursion depth (None = default 2, Some(0) = unlimited).
43    pub depth: Option<u32>,
44}
45
46// ---------------------------------------------------------------------------
47// JSON output structs — default integrity mode
48// ---------------------------------------------------------------------------
49
50#[derive(Serialize)]
51struct AuditReport {
52    datadir: String,
53    files: Vec<FileIntegrityResult>,
54    summary: AuditSummary,
55}
56
57#[derive(Serialize, Clone)]
58struct FileIntegrityResult {
59    file: String,
60    status: String,
61    page_size: u32,
62    total_pages: u64,
63    empty_pages: u64,
64    valid_pages: u64,
65    invalid_pages: u64,
66    lsn_mismatches: u64,
67    #[serde(skip_serializing_if = "Option::is_none")]
68    error: Option<String>,
69    #[serde(skip_serializing_if = "Vec::is_empty")]
70    corrupt_pages: Vec<u64>,
71}
72
73#[derive(Serialize)]
74struct AuditSummary {
75    total_files: usize,
76    files_passed: usize,
77    files_failed: usize,
78    files_error: usize,
79    total_pages: u64,
80    corrupt_pages: u64,
81    integrity_pct: f64,
82}
83
84// ---------------------------------------------------------------------------
85// JSON output structs — health mode
86// ---------------------------------------------------------------------------
87
88#[derive(Serialize, Clone)]
89struct FileHealthResult {
90    file: String,
91    avg_fill_factor: f64,
92    avg_fragmentation: f64,
93    avg_garbage_ratio: f64,
94    index_count: u64,
95    total_index_pages: u64,
96    #[serde(skip_serializing_if = "Option::is_none")]
97    error: Option<String>,
98}
99
100#[derive(Serialize)]
101struct HealthAuditReport {
102    datadir: String,
103    tablespaces: Vec<FileHealthResult>,
104    summary: DirectoryHealthSummary,
105}
106
107#[derive(Serialize)]
108struct DirectoryHealthSummary {
109    total_files: usize,
110    total_index_pages: u64,
111    avg_fill_factor: f64,
112    avg_fragmentation: f64,
113    avg_garbage_ratio: f64,
114}
115
116// ---------------------------------------------------------------------------
117// JSON output structs — mismatch mode
118// ---------------------------------------------------------------------------
119
120#[derive(Serialize, Clone)]
121struct MismatchEntry {
122    file: String,
123    page_number: u64,
124    stored_checksum: u32,
125    calculated_checksum: u32,
126    algorithm: String,
127}
128
129#[derive(Serialize)]
130struct MismatchReport {
131    datadir: String,
132    mismatches: Vec<MismatchEntry>,
133    total_files_scanned: usize,
134    total_pages_scanned: u64,
135}
136
137// ---------------------------------------------------------------------------
138// Per-file worker functions
139// ---------------------------------------------------------------------------
140
141/// Audit a single file for integrity. Returns `None` if the file cannot be opened.
142fn audit_file(
143    path: &Path,
144    datadir: &Path,
145    page_size_override: Option<u32>,
146    keyring: &Option<String>,
147    use_mmap: bool,
148) -> FileIntegrityResult {
149    let display = path.strip_prefix(datadir).unwrap_or(path);
150    let display_str = display.display().to_string();
151    let path_str = path.to_string_lossy();
152
153    let mut ts = match crate::cli::open_tablespace(&path_str, page_size_override, use_mmap) {
154        Ok(t) => t,
155        Err(e) => {
156            return FileIntegrityResult {
157                file: display_str,
158                status: "error".to_string(),
159                page_size: 0,
160                total_pages: 0,
161                empty_pages: 0,
162                valid_pages: 0,
163                invalid_pages: 0,
164                lsn_mismatches: 0,
165                error: Some(e.to_string()),
166                corrupt_pages: Vec::new(),
167            };
168        }
169    };
170
171    if let Some(ref kp) = keyring {
172        if crate::cli::setup_decryption(&mut ts, kp).is_err() {
173            // Not encrypted or bad keyring — continue without decryption
174        }
175    }
176
177    let page_size = ts.page_size();
178    let page_count = ts.page_count();
179    let vendor_info = ts.vendor_info().clone();
180
181    // Read all pages for parallel validation
182    let all_data = match ts.read_all_pages() {
183        Ok(d) => d,
184        Err(e) => {
185            return FileIntegrityResult {
186                file: display_str,
187                status: "error".to_string(),
188                page_size,
189                total_pages: page_count,
190                empty_pages: 0,
191                valid_pages: 0,
192                invalid_pages: 0,
193                lsn_mismatches: 0,
194                error: Some(e.to_string()),
195                corrupt_pages: Vec::new(),
196            };
197        }
198    };
199
200    let ps = page_size as usize;
201
202    // Validate pages in parallel
203    let results: Vec<(u64, bool, bool, bool, Option<u64>)> = (0..page_count)
204        .into_par_iter()
205        .map(|page_num| {
206            let offset = page_num as usize * ps;
207            if offset + ps > all_data.len() {
208                return (page_num, false, false, false, None);
209            }
210            let page_data = &all_data[offset..offset + ps];
211
212            // Check for empty page
213            if page_data.iter().all(|&b| b == 0) {
214                return (page_num, true, true, true, None); // empty=true
215            }
216
217            let csum = validate_checksum(page_data, page_size, Some(&vendor_info));
218            let lsn_ok = validate_lsn(page_data, page_size);
219
220            let corrupt_page = if !csum.valid { Some(page_num) } else { None };
221            // (page_num, is_valid_or_empty, is_empty, lsn_ok, corrupt_page_num)
222            (page_num, csum.valid, false, lsn_ok, corrupt_page)
223        })
224        .collect();
225
226    let mut valid = 0u64;
227    let mut invalid = 0u64;
228    let mut empty = 0u64;
229    let mut lsn_mismatches = 0u64;
230    let mut corrupt_pages = Vec::new();
231
232    for &(_, is_valid, is_empty, lsn_ok, ref corrupt) in &results {
233        if is_empty {
234            empty += 1;
235        } else if is_valid {
236            valid += 1;
237        } else {
238            invalid += 1;
239        }
240        if !lsn_ok && !is_empty && is_valid {
241            lsn_mismatches += 1;
242        }
243        if let Some(pn) = corrupt {
244            corrupt_pages.push(*pn);
245        }
246    }
247
248    let status = if invalid > 0 { "FAIL" } else { "PASS" };
249
250    FileIntegrityResult {
251        file: display_str,
252        status: status.to_string(),
253        page_size,
254        total_pages: page_count,
255        empty_pages: empty,
256        valid_pages: valid,
257        invalid_pages: invalid,
258        lsn_mismatches,
259        error: None,
260        corrupt_pages,
261    }
262}
263
264/// Audit a single file for health metrics.
265fn audit_file_health(
266    path: &Path,
267    datadir: &Path,
268    page_size_override: Option<u32>,
269    keyring: &Option<String>,
270    use_mmap: bool,
271) -> FileHealthResult {
272    let display = path.strip_prefix(datadir).unwrap_or(path);
273    let display_str = display.display().to_string();
274    let path_str = path.to_string_lossy();
275
276    let mut ts = match crate::cli::open_tablespace(&path_str, page_size_override, use_mmap) {
277        Ok(t) => t,
278        Err(e) => {
279            return FileHealthResult {
280                file: display_str,
281                avg_fill_factor: 0.0,
282                avg_fragmentation: 0.0,
283                avg_garbage_ratio: 0.0,
284                index_count: 0,
285                total_index_pages: 0,
286                error: Some(e.to_string()),
287            };
288        }
289    };
290
291    if let Some(ref kp) = keyring {
292        let _ = crate::cli::setup_decryption(&mut ts, kp);
293    }
294
295    let page_size = ts.page_size();
296    let total_pages = ts.page_count();
297
298    let mut snapshots = Vec::new();
299    let mut empty_pages = 0u64;
300
301    let scan_result = ts.for_each_page(|page_num, data| {
302        if data.iter().all(|&b| b == 0) {
303            empty_pages += 1;
304        } else if let Some(snap) = health::extract_index_page_snapshot(data, page_num) {
305            snapshots.push(snap);
306        }
307        Ok(())
308    });
309
310    if let Err(e) = scan_result {
311        return FileHealthResult {
312            file: display_str,
313            avg_fill_factor: 0.0,
314            avg_fragmentation: 0.0,
315            avg_garbage_ratio: 0.0,
316            index_count: 0,
317            total_index_pages: 0,
318            error: Some(e.to_string()),
319        };
320    }
321
322    let report = health::analyze_health(snapshots, page_size, total_pages, empty_pages, &path_str);
323
324    FileHealthResult {
325        file: display_str,
326        avg_fill_factor: report.summary.avg_fill_factor,
327        avg_fragmentation: report.summary.avg_fragmentation,
328        avg_garbage_ratio: report.summary.avg_garbage_ratio,
329        index_count: report.summary.index_count,
330        total_index_pages: report.summary.index_pages,
331        error: None,
332    }
333}
334
335/// Scan a single file for checksum mismatches only.
336fn audit_file_mismatches(
337    path: &Path,
338    datadir: &Path,
339    page_size_override: Option<u32>,
340    keyring: &Option<String>,
341    use_mmap: bool,
342) -> (Vec<MismatchEntry>, u64) {
343    let display = path.strip_prefix(datadir).unwrap_or(path);
344    let display_str = display.display().to_string();
345    let path_str = path.to_string_lossy();
346
347    let mut ts = match crate::cli::open_tablespace(&path_str, page_size_override, use_mmap) {
348        Ok(t) => t,
349        Err(_) => return (Vec::new(), 0),
350    };
351
352    if let Some(ref kp) = keyring {
353        let _ = crate::cli::setup_decryption(&mut ts, kp);
354    }
355
356    let page_size = ts.page_size();
357    let page_count = ts.page_count();
358    let vendor_info = ts.vendor_info().clone();
359
360    let all_data = match ts.read_all_pages() {
361        Ok(d) => d,
362        Err(_) => return (Vec::new(), page_count),
363    };
364
365    let ps = page_size as usize;
366
367    let mismatches: Vec<MismatchEntry> = (0..page_count)
368        .into_par_iter()
369        .filter_map(|page_num| {
370            let offset = page_num as usize * ps;
371            if offset + ps > all_data.len() {
372                return None;
373            }
374            let page_data = &all_data[offset..offset + ps];
375
376            if page_data.iter().all(|&b| b == 0) {
377                return None;
378            }
379
380            let csum = validate_checksum(page_data, page_size, Some(&vendor_info));
381            if csum.valid {
382                return None;
383            }
384
385            Some(MismatchEntry {
386                file: display_str.clone(),
387                page_number: page_num,
388                stored_checksum: csum.stored_checksum,
389                calculated_checksum: csum.calculated_checksum,
390                algorithm: algorithm_name(csum.algorithm).to_string(),
391            })
392        })
393        .collect();
394
395    (mismatches, page_count)
396}
397
398fn algorithm_name(algo: crate::innodb::checksum::ChecksumAlgorithm) -> &'static str {
399    match algo {
400        crate::innodb::checksum::ChecksumAlgorithm::Crc32c => "crc32c",
401        crate::innodb::checksum::ChecksumAlgorithm::InnoDB => "innodb",
402        crate::innodb::checksum::ChecksumAlgorithm::MariaDbFullCrc32 => "mariadb_full_crc32",
403        crate::innodb::checksum::ChecksumAlgorithm::None => "none",
404    }
405}
406
407// ---------------------------------------------------------------------------
408// Entry point
409// ---------------------------------------------------------------------------
410
411/// Audit a MySQL data directory for integrity, health metrics, or corrupt pages.
412pub fn execute(opts: &AuditOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
413    if opts.prometheus && (opts.json || opts.csv) {
414        return Err(IdbError::Argument(
415            "--prometheus cannot be combined with JSON or CSV output".to_string(),
416        ));
417    }
418
419    // Validate mutually exclusive flags
420    if opts.health && opts.checksum_mismatch {
421        return Err(IdbError::Argument(
422            "--health and --checksum-mismatch are mutually exclusive".to_string(),
423        ));
424    }
425
426    let datadir = Path::new(&opts.datadir);
427    if !datadir.is_dir() {
428        return Err(IdbError::Argument(format!(
429            "Data directory does not exist: {}",
430            opts.datadir
431        )));
432    }
433
434    let ibd_files = find_tablespace_files(datadir, &["ibd"], opts.depth)?;
435
436    if ibd_files.is_empty() {
437        if opts.prometheus {
438            // Empty Prometheus output — valid exposition format (no metrics emitted)
439            return Ok(());
440        } else if opts.json {
441            if opts.health {
442                let report = HealthAuditReport {
443                    datadir: opts.datadir.clone(),
444                    tablespaces: Vec::new(),
445                    summary: DirectoryHealthSummary {
446                        total_files: 0,
447                        total_index_pages: 0,
448                        avg_fill_factor: 0.0,
449                        avg_fragmentation: 0.0,
450                        avg_garbage_ratio: 0.0,
451                    },
452                };
453                let json = serde_json::to_string_pretty(&report)
454                    .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
455                wprintln!(writer, "{}", json)?;
456            } else if opts.checksum_mismatch {
457                let report = MismatchReport {
458                    datadir: opts.datadir.clone(),
459                    mismatches: Vec::new(),
460                    total_files_scanned: 0,
461                    total_pages_scanned: 0,
462                };
463                let json = serde_json::to_string_pretty(&report)
464                    .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
465                wprintln!(writer, "{}", json)?;
466            } else {
467                let report = AuditReport {
468                    datadir: opts.datadir.clone(),
469                    files: Vec::new(),
470                    summary: AuditSummary {
471                        total_files: 0,
472                        files_passed: 0,
473                        files_failed: 0,
474                        files_error: 0,
475                        total_pages: 0,
476                        corrupt_pages: 0,
477                        integrity_pct: 100.0,
478                    },
479                };
480                let json = serde_json::to_string_pretty(&report)
481                    .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
482                wprintln!(writer, "{}", json)?;
483            }
484        } else {
485            wprintln!(writer, "No .ibd files found in {}", opts.datadir)?;
486        }
487        return Ok(());
488    }
489
490    if opts.health {
491        execute_health(opts, &ibd_files, datadir, writer)
492    } else if opts.checksum_mismatch {
493        execute_mismatch(opts, &ibd_files, datadir, writer)
494    } else {
495        execute_integrity(opts, &ibd_files, datadir, writer)
496    }
497}
498
499// ---------------------------------------------------------------------------
500// Default integrity mode (#83)
501// ---------------------------------------------------------------------------
502
503fn execute_integrity(
504    opts: &AuditOptions,
505    ibd_files: &[std::path::PathBuf],
506    datadir: &Path,
507    writer: &mut dyn Write,
508) -> Result<(), IdbError> {
509    let start = Instant::now();
510
511    let pb = if !opts.json && !opts.csv && !opts.prometheus {
512        Some(create_progress_bar(ibd_files.len() as u64, "files"))
513    } else {
514        None
515    };
516
517    let page_size = opts.page_size;
518    let keyring = opts.keyring.clone();
519    let use_mmap = opts.mmap;
520
521    let mut results: Vec<FileIntegrityResult> = ibd_files
522        .par_iter()
523        .map(|path| {
524            let r = audit_file(path, datadir, page_size, &keyring, use_mmap);
525            if let Some(ref pb) = pb {
526                pb.inc(1);
527            }
528            r
529        })
530        .collect();
531
532    if let Some(ref pb) = pb {
533        pb.finish_and_clear();
534    }
535
536    // Sort by file path for deterministic output
537    results.sort_by(|a, b| a.file.cmp(&b.file));
538
539    // Compute summary
540    let total_files = results.len();
541    let files_passed = results.iter().filter(|r| r.status == "PASS").count();
542    let files_failed = results.iter().filter(|r| r.status == "FAIL").count();
543    let files_error = results.iter().filter(|r| r.status == "error").count();
544    let total_pages: u64 = results.iter().map(|r| r.total_pages).sum();
545    let corrupt_pages: u64 = results.iter().map(|r| r.invalid_pages).sum();
546    let valid_pages: u64 = results.iter().map(|r| r.valid_pages).sum();
547    let checked_pages = valid_pages + corrupt_pages;
548    let integrity_pct = if checked_pages > 0 {
549        (valid_pages as f64 / checked_pages as f64) * 100.0
550    } else {
551        100.0
552    };
553    let integrity_pct = (integrity_pct * 100.0).round() / 100.0;
554
555    if opts.prometheus {
556        let duration_secs = start.elapsed().as_secs_f64();
557        print_prometheus_integrity(
558            writer,
559            &IntegrityPrometheusParams {
560                datadir: &opts.datadir,
561                results: &results,
562                total_pages,
563                corrupt_pages,
564                integrity_pct,
565                duration_secs,
566            },
567        )?;
568
569        if corrupt_pages > 0 {
570            return Err(IdbError::Parse(format!(
571                "{} corrupt pages found across {} files",
572                corrupt_pages, files_failed
573            )));
574        }
575        return Ok(());
576    }
577
578    if opts.json {
579        let report = AuditReport {
580            datadir: opts.datadir.clone(),
581            files: results,
582            summary: AuditSummary {
583                total_files,
584                files_passed,
585                files_failed,
586                files_error,
587                total_pages,
588                corrupt_pages,
589                integrity_pct,
590            },
591        };
592        let json = serde_json::to_string_pretty(&report)
593            .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
594        wprintln!(writer, "{}", json)?;
595    } else if opts.csv {
596        wprintln!(
597            writer,
598            "file,status,total_pages,empty_pages,valid_pages,invalid_pages,lsn_mismatches"
599        )?;
600        for r in &results {
601            wprintln!(
602                writer,
603                "{},{},{},{},{},{},{}",
604                csv_escape(&r.file),
605                r.status,
606                r.total_pages,
607                r.empty_pages,
608                r.valid_pages,
609                r.invalid_pages,
610                r.lsn_mismatches
611            )?;
612        }
613    } else {
614        wprintln!(
615            writer,
616            "Auditing {} ({} files)...\n",
617            opts.datadir,
618            total_files
619        )?;
620
621        for r in &results {
622            let status_colored = match r.status.as_str() {
623                "PASS" => "PASS".green().to_string(),
624                "FAIL" => "FAIL".red().to_string(),
625                _ => "ERROR".yellow().to_string(),
626            };
627
628            if r.status == "error" {
629                wprintln!(
630                    writer,
631                    "  {:<40} {}   {}",
632                    r.file,
633                    status_colored,
634                    r.error.as_deref().unwrap_or("unknown error")
635                )?;
636            } else if r.invalid_pages > 0 {
637                wprintln!(
638                    writer,
639                    "  {:<40} {}   {} pages, {} corrupt",
640                    r.file,
641                    status_colored,
642                    r.total_pages,
643                    r.invalid_pages
644                )?;
645            } else {
646                wprintln!(
647                    writer,
648                    "  {:<40} {}   {} pages",
649                    r.file,
650                    status_colored,
651                    r.total_pages
652                )?;
653            }
654
655            if opts.verbose && !r.corrupt_pages.is_empty() {
656                wprintln!(writer, "    Corrupt pages: {:?}", r.corrupt_pages)?;
657            }
658        }
659
660        wprintln!(writer)?;
661        wprintln!(writer, "Summary:")?;
662        wprintln!(
663            writer,
664            "  Files: {} ({} passed, {} failed{})",
665            total_files,
666            files_passed,
667            files_failed,
668            if files_error > 0 {
669                format!(", {} error", files_error)
670            } else {
671                String::new()
672            }
673        )?;
674        wprintln!(
675            writer,
676            "  Pages: {} total, {} corrupt",
677            total_pages,
678            corrupt_pages
679        )?;
680        wprintln!(writer, "  Integrity: {:.2}%", integrity_pct)?;
681    }
682
683    if corrupt_pages > 0 {
684        return Err(IdbError::Parse(format!(
685            "{} corrupt pages found across {} files",
686            corrupt_pages, files_failed
687        )));
688    }
689
690    Ok(())
691}
692
693// ---------------------------------------------------------------------------
694// Health mode (#84)
695// ---------------------------------------------------------------------------
696
697fn execute_health(
698    opts: &AuditOptions,
699    ibd_files: &[std::path::PathBuf],
700    datadir: &Path,
701    writer: &mut dyn Write,
702) -> Result<(), IdbError> {
703    let start = Instant::now();
704
705    let pb = if !opts.json && !opts.csv && !opts.prometheus {
706        Some(create_progress_bar(ibd_files.len() as u64, "files"))
707    } else {
708        None
709    };
710
711    let page_size = opts.page_size;
712    let keyring = opts.keyring.clone();
713    let use_mmap = opts.mmap;
714
715    let mut results: Vec<FileHealthResult> = ibd_files
716        .par_iter()
717        .map(|path| {
718            let r = audit_file_health(path, datadir, page_size, &keyring, use_mmap);
719            if let Some(ref pb) = pb {
720                pb.inc(1);
721            }
722            r
723        })
724        .collect();
725
726    if let Some(ref pb) = pb {
727        pb.finish_and_clear();
728    }
729
730    // Compute directory-wide summary from ALL results (before filtering)
731    let total_files = results.len();
732    let total_index_pages: u64 = results.iter().map(|r| r.total_index_pages).sum();
733
734    let valid_results: Vec<&FileHealthResult> =
735        results.iter().filter(|r| r.error.is_none()).collect();
736    let n = valid_results.len() as f64;
737
738    let avg_fill = if n > 0.0 {
739        valid_results.iter().map(|r| r.avg_fill_factor).sum::<f64>() / n
740    } else {
741        0.0
742    };
743    let avg_frag = if n > 0.0 {
744        valid_results
745            .iter()
746            .map(|r| r.avg_fragmentation)
747            .sum::<f64>()
748            / n
749    } else {
750        0.0
751    };
752    let avg_garbage = if n > 0.0 {
753        valid_results
754            .iter()
755            .map(|r| r.avg_garbage_ratio)
756            .sum::<f64>()
757            / n
758    } else {
759        0.0
760    };
761
762    // Prometheus output uses unfiltered results to avoid stale markers
763    if opts.prometheus {
764        let duration_secs = start.elapsed().as_secs_f64();
765        print_prometheus_health(
766            writer,
767            &HealthPrometheusParams {
768                datadir: &opts.datadir,
769                results: &results,
770                total_files,
771                total_index_pages,
772                avg_fill,
773                avg_frag,
774                avg_garbage,
775                duration_secs,
776            },
777        )?;
778        return Ok(());
779    }
780
781    // Filter by thresholds (values are 0-100 from CLI, compare as 0.0-1.0)
782    if let Some(min_ff) = opts.min_fill_factor {
783        let threshold = min_ff / 100.0;
784        results.retain(|r| r.error.is_some() || r.avg_fill_factor < threshold);
785    }
786    if let Some(max_frag) = opts.max_fragmentation {
787        let threshold = max_frag / 100.0;
788        results.retain(|r| r.error.is_some() || r.avg_fragmentation > threshold);
789    }
790
791    // Sort worst-first by fragmentation (descending)
792    results.sort_by(|a, b| {
793        b.avg_fragmentation
794            .partial_cmp(&a.avg_fragmentation)
795            .unwrap_or(std::cmp::Ordering::Equal)
796    });
797
798    if opts.json {
799        let report = HealthAuditReport {
800            datadir: opts.datadir.clone(),
801            tablespaces: results,
802            summary: DirectoryHealthSummary {
803                total_files,
804                total_index_pages,
805                avg_fill_factor: round2(avg_fill),
806                avg_fragmentation: round2(avg_frag),
807                avg_garbage_ratio: round2(avg_garbage),
808            },
809        };
810        let json = serde_json::to_string_pretty(&report)
811            .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
812        wprintln!(writer, "{}", json)?;
813    } else if opts.csv {
814        wprintln!(
815            writer,
816            "file,avg_fill_factor,avg_fragmentation,avg_garbage_ratio,index_count,total_index_pages"
817        )?;
818        for r in &results {
819            if r.error.is_some() {
820                continue;
821            }
822            wprintln!(
823                writer,
824                "{},{:.1},{:.1},{:.1},{},{}",
825                csv_escape(&r.file),
826                r.avg_fill_factor * 100.0,
827                r.avg_fragmentation * 100.0,
828                r.avg_garbage_ratio * 100.0,
829                r.index_count,
830                r.total_index_pages
831            )?;
832        }
833    } else {
834        wprintln!(writer, "Directory Health: {}\n", opts.datadir)?;
835        wprintln!(
836            writer,
837            "  {:<40} {:>6} {:>6} {:>6} {:>8} {:>6}",
838            "File",
839            "Fill%",
840            "Frag%",
841            "Garb%",
842            "Indexes",
843            "Pages"
844        )?;
845
846        for r in &results {
847            if let Some(ref err) = r.error {
848                wprintln!(
849                    writer,
850                    "  {:<40} {}",
851                    r.file,
852                    format!("ERROR: {}", err).yellow()
853                )?;
854            } else {
855                wprintln!(
856                    writer,
857                    "  {:<40} {:>5.1}  {:>5.1}  {:>5.1}  {:>7}  {:>5}",
858                    r.file,
859                    r.avg_fill_factor * 100.0,
860                    r.avg_fragmentation * 100.0,
861                    r.avg_garbage_ratio * 100.0,
862                    r.index_count,
863                    r.total_index_pages
864                )?;
865            }
866        }
867
868        wprintln!(writer)?;
869        wprintln!(
870            writer,
871            "Summary: {} files, avg fill {:.1}%, avg frag {:.1}%, avg garbage {:.1}%",
872            total_files,
873            avg_fill * 100.0,
874            avg_frag * 100.0,
875            avg_garbage * 100.0
876        )?;
877    }
878
879    Ok(())
880}
881
882// ---------------------------------------------------------------------------
883// Mismatch mode (#85)
884// ---------------------------------------------------------------------------
885
886fn execute_mismatch(
887    opts: &AuditOptions,
888    ibd_files: &[std::path::PathBuf],
889    datadir: &Path,
890    writer: &mut dyn Write,
891) -> Result<(), IdbError> {
892    let pb = if !opts.json && !opts.csv {
893        Some(create_progress_bar(ibd_files.len() as u64, "files"))
894    } else {
895        None
896    };
897
898    let page_size = opts.page_size;
899    let keyring = opts.keyring.clone();
900    let use_mmap = opts.mmap;
901
902    let all_results: Vec<(Vec<MismatchEntry>, u64)> = ibd_files
903        .par_iter()
904        .map(|path| {
905            let r = audit_file_mismatches(path, datadir, page_size, &keyring, use_mmap);
906            if let Some(ref pb) = pb {
907                pb.inc(1);
908            }
909            r
910        })
911        .collect();
912
913    if let Some(ref pb) = pb {
914        pb.finish_and_clear();
915    }
916
917    let total_files_scanned = ibd_files.len();
918    let total_pages_scanned: u64 = all_results.iter().map(|(_, count)| count).sum();
919
920    let mut mismatches: Vec<MismatchEntry> = all_results
921        .into_iter()
922        .flat_map(|(entries, _)| entries)
923        .collect();
924    mismatches.sort_by(|a, b| (&a.file, a.page_number).cmp(&(&b.file, b.page_number)));
925
926    if opts.json {
927        let report = MismatchReport {
928            datadir: opts.datadir.clone(),
929            mismatches: mismatches.clone(),
930            total_files_scanned,
931            total_pages_scanned,
932        };
933        let json = serde_json::to_string_pretty(&report)
934            .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
935        wprintln!(writer, "{}", json)?;
936    } else if opts.csv {
937        wprintln!(
938            writer,
939            "file,page_number,stored_checksum,calculated_checksum,algorithm"
940        )?;
941        for m in &mismatches {
942            wprintln!(
943                writer,
944                "{},{},{},{},{}",
945                csv_escape(&m.file),
946                m.page_number,
947                m.stored_checksum,
948                m.calculated_checksum,
949                m.algorithm
950            )?;
951        }
952    } else if mismatches.is_empty() {
953        wprintln!(
954            writer,
955            "No checksum mismatches found ({} files, {} pages scanned).",
956            total_files_scanned,
957            total_pages_scanned
958        )?;
959    } else {
960        wprintln!(
961            writer,
962            "{:<40} {:>6} {:>12} {:>12} {:>12}",
963            "FILE",
964            "PAGE",
965            "STORED",
966            "CALCULATED",
967            "ALGORITHM"
968        )?;
969        for m in &mismatches {
970            wprintln!(
971                writer,
972                "{:<40} {:>6} {:>12} {:>12} {:>12}",
973                m.file,
974                m.page_number,
975                format!("0x{:08X}", m.stored_checksum),
976                format!("0x{:08X}", m.calculated_checksum),
977                m.algorithm
978            )?;
979        }
980    }
981
982    if !mismatches.is_empty() {
983        return Err(IdbError::Parse(format!(
984            "{} checksum mismatches found",
985            mismatches.len()
986        )));
987    }
988
989    Ok(())
990}
991
992fn round2(v: f64) -> f64 {
993    (v * 100.0).round() / 100.0
994}
995
996// ---------------------------------------------------------------------------
997// Prometheus exposition format output
998// ---------------------------------------------------------------------------
999
1000struct IntegrityPrometheusParams<'a> {
1001    datadir: &'a str,
1002    results: &'a [FileIntegrityResult],
1003    total_pages: u64,
1004    corrupt_pages: u64,
1005    integrity_pct: f64,
1006    duration_secs: f64,
1007}
1008
1009/// Print integrity audit results in Prometheus exposition format.
1010fn print_prometheus_integrity(
1011    writer: &mut dyn Write,
1012    params: &IntegrityPrometheusParams<'_>,
1013) -> Result<(), IdbError> {
1014    let datadir = params.datadir;
1015    let results = params.results;
1016    let total_pages = params.total_pages;
1017    let corrupt_pages = params.corrupt_pages;
1018    let integrity_pct = params.integrity_pct;
1019    let duration_secs = params.duration_secs;
1020    // innodb_pages per file
1021    wprintln!(
1022        writer,
1023        "{}",
1024        prom::help_line("innodb_pages", "Total pages in tablespace")
1025    )?;
1026    wprintln!(writer, "{}", prom::type_line("innodb_pages", "gauge"))?;
1027    for r in results {
1028        if r.status == "error" {
1029            continue;
1030        }
1031        wprintln!(
1032            writer,
1033            "{}",
1034            prom::format_gauge_int(
1035                "innodb_pages",
1036                &[("datadir", datadir), ("file", &r.file)],
1037                r.total_pages
1038            )
1039        )?;
1040    }
1041
1042    // innodb_corrupt_pages per file
1043    wprintln!(
1044        writer,
1045        "{}",
1046        prom::help_line(
1047            "innodb_corrupt_pages",
1048            "Number of corrupt pages in tablespace"
1049        )
1050    )?;
1051    wprintln!(
1052        writer,
1053        "{}",
1054        prom::type_line("innodb_corrupt_pages", "gauge")
1055    )?;
1056    for r in results {
1057        if r.status == "error" {
1058            continue;
1059        }
1060        wprintln!(
1061            writer,
1062            "{}",
1063            prom::format_gauge_int(
1064                "innodb_corrupt_pages",
1065                &[("datadir", datadir), ("file", &r.file)],
1066                r.invalid_pages
1067            )
1068        )?;
1069    }
1070
1071    // innodb_empty_pages per file
1072    wprintln!(
1073        writer,
1074        "{}",
1075        prom::help_line("innodb_empty_pages", "Number of empty pages in tablespace")
1076    )?;
1077    wprintln!(writer, "{}", prom::type_line("innodb_empty_pages", "gauge"))?;
1078    for r in results {
1079        if r.status == "error" {
1080            continue;
1081        }
1082        wprintln!(
1083            writer,
1084            "{}",
1085            prom::format_gauge_int(
1086                "innodb_empty_pages",
1087                &[("datadir", datadir), ("file", &r.file)],
1088                r.empty_pages
1089            )
1090        )?;
1091    }
1092
1093    // innodb_audit_integrity_pct — directory-wide
1094    wprintln!(
1095        writer,
1096        "{}",
1097        prom::help_line(
1098            "innodb_audit_integrity_pct",
1099            "Directory-wide integrity percentage"
1100        )
1101    )?;
1102    wprintln!(
1103        writer,
1104        "{}",
1105        prom::type_line("innodb_audit_integrity_pct", "gauge")
1106    )?;
1107    wprintln!(
1108        writer,
1109        "{}",
1110        prom::format_gauge(
1111            "innodb_audit_integrity_pct",
1112            &[("datadir", datadir)],
1113            integrity_pct
1114        )
1115    )?;
1116
1117    // innodb_audit_pages — directory-wide
1118    wprintln!(
1119        writer,
1120        "{}",
1121        prom::help_line(
1122            "innodb_audit_pages",
1123            "Total pages scanned across data directory"
1124        )
1125    )?;
1126    wprintln!(writer, "{}", prom::type_line("innodb_audit_pages", "gauge"))?;
1127    wprintln!(
1128        writer,
1129        "{}",
1130        prom::format_gauge_int("innodb_audit_pages", &[("datadir", datadir)], total_pages)
1131    )?;
1132
1133    // innodb_audit_corrupt_pages — directory-wide
1134    wprintln!(
1135        writer,
1136        "{}",
1137        prom::help_line(
1138            "innodb_audit_corrupt_pages",
1139            "Total corrupt pages across data directory"
1140        )
1141    )?;
1142    wprintln!(
1143        writer,
1144        "{}",
1145        prom::type_line("innodb_audit_corrupt_pages", "gauge")
1146    )?;
1147    wprintln!(
1148        writer,
1149        "{}",
1150        prom::format_gauge_int(
1151            "innodb_audit_corrupt_pages",
1152            &[("datadir", datadir)],
1153            corrupt_pages
1154        )
1155    )?;
1156
1157    // innodb_scan_duration_seconds — directory-wide
1158    wprintln!(
1159        writer,
1160        "{}",
1161        prom::help_line(
1162            "innodb_scan_duration_seconds",
1163            "Time spent scanning the data directory"
1164        )
1165    )?;
1166    wprintln!(
1167        writer,
1168        "{}",
1169        prom::type_line("innodb_scan_duration_seconds", "gauge")
1170    )?;
1171    wprintln!(
1172        writer,
1173        "{}",
1174        prom::format_gauge(
1175            "innodb_scan_duration_seconds",
1176            &[("datadir", datadir)],
1177            duration_secs
1178        )
1179    )?;
1180
1181    Ok(())
1182}
1183
1184struct HealthPrometheusParams<'a> {
1185    datadir: &'a str,
1186    results: &'a [FileHealthResult],
1187    total_files: usize,
1188    total_index_pages: u64,
1189    avg_fill: f64,
1190    avg_frag: f64,
1191    avg_garbage: f64,
1192    duration_secs: f64,
1193}
1194
1195/// Print health audit results in Prometheus exposition format.
1196fn print_prometheus_health(
1197    writer: &mut dyn Write,
1198    params: &HealthPrometheusParams<'_>,
1199) -> Result<(), IdbError> {
1200    let datadir = params.datadir;
1201    let results = params.results;
1202    let total_files = params.total_files;
1203    let total_index_pages = params.total_index_pages;
1204    let avg_fill = params.avg_fill;
1205    let avg_frag = params.avg_frag;
1206    let avg_garbage = params.avg_garbage;
1207    let duration_secs = params.duration_secs;
1208    // Per-file fill factor
1209    wprintln!(
1210        writer,
1211        "{}",
1212        prom::help_line(
1213            "innodb_fill_factor",
1214            "Average B+Tree fill factor for tablespace"
1215        )
1216    )?;
1217    wprintln!(writer, "{}", prom::type_line("innodb_fill_factor", "gauge"))?;
1218    for r in results {
1219        if r.error.is_some() {
1220            continue;
1221        }
1222        wprintln!(
1223            writer,
1224            "{}",
1225            prom::format_gauge(
1226                "innodb_fill_factor",
1227                &[("datadir", datadir), ("file", &r.file)],
1228                r.avg_fill_factor
1229            )
1230        )?;
1231    }
1232
1233    // Per-file fragmentation
1234    wprintln!(
1235        writer,
1236        "{}",
1237        prom::help_line(
1238            "innodb_fragmentation_ratio",
1239            "Average fragmentation ratio for tablespace"
1240        )
1241    )?;
1242    wprintln!(
1243        writer,
1244        "{}",
1245        prom::type_line("innodb_fragmentation_ratio", "gauge")
1246    )?;
1247    for r in results {
1248        if r.error.is_some() {
1249            continue;
1250        }
1251        wprintln!(
1252            writer,
1253            "{}",
1254            prom::format_gauge(
1255                "innodb_fragmentation_ratio",
1256                &[("datadir", datadir), ("file", &r.file)],
1257                r.avg_fragmentation
1258            )
1259        )?;
1260    }
1261
1262    // Per-file garbage ratio
1263    wprintln!(
1264        writer,
1265        "{}",
1266        prom::help_line(
1267            "innodb_garbage_ratio",
1268            "Average garbage ratio for tablespace"
1269        )
1270    )?;
1271    wprintln!(
1272        writer,
1273        "{}",
1274        prom::type_line("innodb_garbage_ratio", "gauge")
1275    )?;
1276    for r in results {
1277        if r.error.is_some() {
1278            continue;
1279        }
1280        wprintln!(
1281            writer,
1282            "{}",
1283            prom::format_gauge(
1284                "innodb_garbage_ratio",
1285                &[("datadir", datadir), ("file", &r.file)],
1286                r.avg_garbage_ratio
1287            )
1288        )?;
1289    }
1290
1291    // Per-file index page count
1292    wprintln!(
1293        writer,
1294        "{}",
1295        prom::help_line("innodb_index_pages", "Total INDEX pages in tablespace")
1296    )?;
1297    wprintln!(writer, "{}", prom::type_line("innodb_index_pages", "gauge"))?;
1298    for r in results {
1299        if r.error.is_some() {
1300            continue;
1301        }
1302        wprintln!(
1303            writer,
1304            "{}",
1305            prom::format_gauge_int(
1306                "innodb_index_pages",
1307                &[("datadir", datadir), ("file", &r.file)],
1308                r.total_index_pages
1309            )
1310        )?;
1311    }
1312
1313    // Directory-wide summary metrics
1314    wprintln!(
1315        writer,
1316        "{}",
1317        prom::help_line("innodb_audit_files", "Total tablespace files scanned")
1318    )?;
1319    wprintln!(writer, "{}", prom::type_line("innodb_audit_files", "gauge"))?;
1320    wprintln!(
1321        writer,
1322        "{}",
1323        prom::format_gauge_int(
1324            "innodb_audit_files",
1325            &[("datadir", datadir)],
1326            total_files as u64
1327        )
1328    )?;
1329
1330    wprintln!(
1331        writer,
1332        "{}",
1333        prom::help_line(
1334            "innodb_audit_index_pages",
1335            "Total INDEX pages across data directory"
1336        )
1337    )?;
1338    wprintln!(
1339        writer,
1340        "{}",
1341        prom::type_line("innodb_audit_index_pages", "gauge")
1342    )?;
1343    wprintln!(
1344        writer,
1345        "{}",
1346        prom::format_gauge_int(
1347            "innodb_audit_index_pages",
1348            &[("datadir", datadir)],
1349            total_index_pages
1350        )
1351    )?;
1352
1353    wprintln!(
1354        writer,
1355        "{}",
1356        prom::help_line(
1357            "innodb_audit_avg_fill_factor",
1358            "Directory-wide average fill factor"
1359        )
1360    )?;
1361    wprintln!(
1362        writer,
1363        "{}",
1364        prom::type_line("innodb_audit_avg_fill_factor", "gauge")
1365    )?;
1366    wprintln!(
1367        writer,
1368        "{}",
1369        prom::format_gauge(
1370            "innodb_audit_avg_fill_factor",
1371            &[("datadir", datadir)],
1372            avg_fill
1373        )
1374    )?;
1375
1376    wprintln!(
1377        writer,
1378        "{}",
1379        prom::help_line(
1380            "innodb_audit_avg_fragmentation",
1381            "Directory-wide average fragmentation"
1382        )
1383    )?;
1384    wprintln!(
1385        writer,
1386        "{}",
1387        prom::type_line("innodb_audit_avg_fragmentation", "gauge")
1388    )?;
1389    wprintln!(
1390        writer,
1391        "{}",
1392        prom::format_gauge(
1393            "innodb_audit_avg_fragmentation",
1394            &[("datadir", datadir)],
1395            avg_frag
1396        )
1397    )?;
1398
1399    wprintln!(
1400        writer,
1401        "{}",
1402        prom::help_line(
1403            "innodb_audit_avg_garbage_ratio",
1404            "Directory-wide average garbage ratio"
1405        )
1406    )?;
1407    wprintln!(
1408        writer,
1409        "{}",
1410        prom::type_line("innodb_audit_avg_garbage_ratio", "gauge")
1411    )?;
1412    wprintln!(
1413        writer,
1414        "{}",
1415        prom::format_gauge(
1416            "innodb_audit_avg_garbage_ratio",
1417            &[("datadir", datadir)],
1418            avg_garbage
1419        )
1420    )?;
1421
1422    // innodb_scan_duration_seconds — directory-wide
1423    wprintln!(
1424        writer,
1425        "{}",
1426        prom::help_line(
1427            "innodb_scan_duration_seconds",
1428            "Time spent scanning the data directory"
1429        )
1430    )?;
1431    wprintln!(
1432        writer,
1433        "{}",
1434        prom::type_line("innodb_scan_duration_seconds", "gauge")
1435    )?;
1436    wprintln!(
1437        writer,
1438        "{}",
1439        prom::format_gauge(
1440            "innodb_scan_duration_seconds",
1441            &[("datadir", datadir)],
1442            duration_secs
1443        )
1444    )?;
1445
1446    Ok(())
1447}