1use std::io::Write;
2use std::path::Path;
3use std::time::Instant;
4
5use colored::Colorize;
6use rayon::prelude::*;
7use serde::Serialize;
8
9use crate::cli::{create_progress_bar, csv_escape, wprintln};
10use crate::innodb::checksum::{validate_checksum, validate_lsn};
11use crate::innodb::health;
12use crate::util::fs::find_tablespace_files;
13use crate::util::prometheus as prom;
14use crate::IdbError;
15
16pub struct AuditOptions {
18 pub datadir: String,
20 pub health: bool,
22 pub checksum_mismatch: bool,
24 pub verbose: bool,
26 pub json: bool,
28 pub csv: bool,
30 pub prometheus: bool,
32 pub page_size: Option<u32>,
34 pub keyring: Option<String>,
36 pub mmap: bool,
38 pub min_fill_factor: Option<f64>,
40 pub max_fragmentation: Option<f64>,
42 pub bloat: bool,
44 pub max_bloat_grade: Option<String>,
46 pub depth: Option<u32>,
48}
49
50#[derive(Serialize)]
55struct AuditReport {
56 datadir: String,
57 files: Vec<FileIntegrityResult>,
58 summary: AuditSummary,
59}
60
61#[derive(Serialize, Clone)]
62struct FileIntegrityResult {
63 file: String,
64 status: String,
65 page_size: u32,
66 total_pages: u64,
67 empty_pages: u64,
68 valid_pages: u64,
69 invalid_pages: u64,
70 lsn_mismatches: u64,
71 #[serde(skip_serializing_if = "Option::is_none")]
72 error: Option<String>,
73 #[serde(skip_serializing_if = "Vec::is_empty")]
74 corrupt_pages: Vec<u64>,
75}
76
77#[derive(Serialize)]
78struct AuditSummary {
79 total_files: usize,
80 files_passed: usize,
81 files_failed: usize,
82 files_error: usize,
83 total_pages: u64,
84 corrupt_pages: u64,
85 integrity_pct: f64,
86}
87
88#[derive(Serialize, Clone)]
93struct FileHealthResult {
94 file: String,
95 avg_fill_factor: f64,
96 avg_fragmentation: f64,
97 avg_garbage_ratio: f64,
98 index_count: u64,
99 total_index_pages: u64,
100 #[serde(skip_serializing_if = "Option::is_none")]
101 worst_bloat_grade: Option<String>,
102 #[serde(skip_serializing_if = "Option::is_none")]
103 worst_bloat_score: Option<f64>,
104 #[serde(skip_serializing_if = "Option::is_none")]
105 error: Option<String>,
106}
107
108#[derive(Serialize)]
109struct HealthAuditReport {
110 datadir: String,
111 tablespaces: Vec<FileHealthResult>,
112 summary: DirectoryHealthSummary,
113}
114
115#[derive(Serialize)]
116struct DirectoryHealthSummary {
117 total_files: usize,
118 total_index_pages: u64,
119 avg_fill_factor: f64,
120 avg_fragmentation: f64,
121 avg_garbage_ratio: f64,
122 #[serde(skip_serializing_if = "Option::is_none")]
123 worst_bloat_grade: Option<String>,
124}
125
126#[derive(Serialize, Clone)]
131struct MismatchEntry {
132 file: String,
133 page_number: u64,
134 stored_checksum: u32,
135 calculated_checksum: u32,
136 algorithm: String,
137}
138
139#[derive(Serialize)]
140struct MismatchReport {
141 datadir: String,
142 mismatches: Vec<MismatchEntry>,
143 total_files_scanned: usize,
144 total_pages_scanned: u64,
145}
146
147fn audit_file(
153 path: &Path,
154 datadir: &Path,
155 page_size_override: Option<u32>,
156 keyring: &Option<String>,
157 use_mmap: bool,
158) -> FileIntegrityResult {
159 let display = path.strip_prefix(datadir).unwrap_or(path);
160 let display_str = display.display().to_string();
161 let path_str = path.to_string_lossy();
162
163 let mut ts = match crate::cli::open_tablespace(&path_str, page_size_override, use_mmap) {
164 Ok(t) => t,
165 Err(e) => {
166 return FileIntegrityResult {
167 file: display_str,
168 status: "error".to_string(),
169 page_size: 0,
170 total_pages: 0,
171 empty_pages: 0,
172 valid_pages: 0,
173 invalid_pages: 0,
174 lsn_mismatches: 0,
175 error: Some(e.to_string()),
176 corrupt_pages: Vec::new(),
177 };
178 }
179 };
180
181 if let Some(ref kp) = keyring {
182 if crate::cli::setup_decryption(&mut ts, kp).is_err() {
183 }
185 }
186
187 let page_size = ts.page_size();
188 let page_count = ts.page_count();
189 let vendor_info = ts.vendor_info().clone();
190
191 let all_data = match ts.read_all_pages() {
193 Ok(d) => d,
194 Err(e) => {
195 return FileIntegrityResult {
196 file: display_str,
197 status: "error".to_string(),
198 page_size,
199 total_pages: page_count,
200 empty_pages: 0,
201 valid_pages: 0,
202 invalid_pages: 0,
203 lsn_mismatches: 0,
204 error: Some(e.to_string()),
205 corrupt_pages: Vec::new(),
206 };
207 }
208 };
209
210 let ps = page_size as usize;
211
212 let results: Vec<(u64, bool, bool, bool, Option<u64>)> = (0..page_count)
214 .into_par_iter()
215 .map(|page_num| {
216 let offset = page_num as usize * ps;
217 if offset + ps > all_data.len() {
218 return (page_num, false, false, false, None);
219 }
220 let page_data = &all_data[offset..offset + ps];
221
222 if page_data.iter().all(|&b| b == 0) {
224 return (page_num, true, true, true, None); }
226
227 let csum = validate_checksum(page_data, page_size, Some(&vendor_info));
228 let lsn_ok = validate_lsn(page_data, page_size);
229
230 let corrupt_page = if !csum.valid { Some(page_num) } else { None };
231 (page_num, csum.valid, false, lsn_ok, corrupt_page)
233 })
234 .collect();
235
236 let mut valid = 0u64;
237 let mut invalid = 0u64;
238 let mut empty = 0u64;
239 let mut lsn_mismatches = 0u64;
240 let mut corrupt_pages = Vec::new();
241
242 for &(_, is_valid, is_empty, lsn_ok, ref corrupt) in &results {
243 if is_empty {
244 empty += 1;
245 } else if is_valid {
246 valid += 1;
247 } else {
248 invalid += 1;
249 }
250 if !lsn_ok && !is_empty && is_valid {
251 lsn_mismatches += 1;
252 }
253 if let Some(pn) = corrupt {
254 corrupt_pages.push(*pn);
255 }
256 }
257
258 let status = if invalid > 0 { "FAIL" } else { "PASS" };
259
260 FileIntegrityResult {
261 file: display_str,
262 status: status.to_string(),
263 page_size,
264 total_pages: page_count,
265 empty_pages: empty,
266 valid_pages: valid,
267 invalid_pages: invalid,
268 lsn_mismatches,
269 error: None,
270 corrupt_pages,
271 }
272}
273
274fn audit_file_health(
276 path: &Path,
277 datadir: &Path,
278 page_size_override: Option<u32>,
279 keyring: &Option<String>,
280 use_mmap: bool,
281 bloat: bool,
282) -> FileHealthResult {
283 let display = path.strip_prefix(datadir).unwrap_or(path);
284 let display_str = display.display().to_string();
285 let path_str = path.to_string_lossy();
286
287 let mut ts = match crate::cli::open_tablespace(&path_str, page_size_override, use_mmap) {
288 Ok(t) => t,
289 Err(e) => {
290 return FileHealthResult {
291 file: display_str,
292 avg_fill_factor: 0.0,
293 avg_fragmentation: 0.0,
294 avg_garbage_ratio: 0.0,
295 index_count: 0,
296 total_index_pages: 0,
297 worst_bloat_grade: None,
298 worst_bloat_score: None,
299 error: Some(e.to_string()),
300 };
301 }
302 };
303
304 if let Some(ref kp) = keyring {
305 let _ = crate::cli::setup_decryption(&mut ts, kp);
306 }
307
308 let page_size = ts.page_size();
309 let total_pages = ts.page_count();
310
311 let mut snapshots = Vec::new();
312 let mut empty_pages = 0u64;
313 let mut delete_counts: std::collections::HashMap<u64, (u64, u64)> =
315 std::collections::HashMap::new();
316
317 let scan_result = ts.for_each_page(|page_num, data| {
318 if data.iter().all(|&b| b == 0) {
319 empty_pages += 1;
320 } else if let Some(snap) = health::extract_index_page_snapshot(data, page_num) {
321 if snap.level == 0 && bloat {
323 let recs = crate::innodb::record::walk_compact_records(data);
324 let total = recs.len() as u64;
325 let deleted = recs.iter().filter(|r| r.header.delete_mark()).count() as u64;
326 let entry = delete_counts.entry(snap.index_id).or_insert((0, 0));
327 entry.0 += deleted;
328 entry.1 += total;
329 }
330 snapshots.push(snap);
331 }
332 Ok(())
333 });
334
335 if let Err(e) = scan_result {
336 return FileHealthResult {
337 file: display_str,
338 avg_fill_factor: 0.0,
339 avg_fragmentation: 0.0,
340 avg_garbage_ratio: 0.0,
341 index_count: 0,
342 total_index_pages: 0,
343 worst_bloat_grade: None,
344 worst_bloat_score: None,
345 error: Some(e.to_string()),
346 };
347 }
348
349 let mut report =
350 health::analyze_health(snapshots, page_size, total_pages, empty_pages, &path_str);
351
352 let (worst_grade, worst_score) = if bloat {
354 let mut worst_g: Option<String> = None;
355 let mut worst_s: f64 = 0.0;
356 for idx in &mut report.indexes {
357 let (deleted, total) = delete_counts.get(&idx.index_id).copied().unwrap_or((0, 0));
358 let delete_mark_ratio = if total > 0 {
359 deleted as f64 / total as f64
360 } else {
361 0.0
362 };
363 let bloat_score = health::score_bloat(idx, delete_mark_ratio);
364 if bloat_score.score > worst_s {
365 worst_s = bloat_score.score;
366 worst_g = Some(format!("{}", bloat_score.grade));
367 }
368 idx.bloat = Some(bloat_score);
369 }
370 let has_score = worst_g.is_some();
371 (worst_g, if has_score { Some(worst_s) } else { None })
372 } else {
373 (None, None)
374 };
375
376 FileHealthResult {
377 file: display_str,
378 avg_fill_factor: report.summary.avg_fill_factor,
379 avg_fragmentation: report.summary.avg_fragmentation,
380 avg_garbage_ratio: report.summary.avg_garbage_ratio,
381 index_count: report.summary.index_count,
382 total_index_pages: report.summary.index_pages,
383 worst_bloat_grade: worst_grade,
384 worst_bloat_score: worst_score,
385 error: None,
386 }
387}
388
389fn audit_file_mismatches(
391 path: &Path,
392 datadir: &Path,
393 page_size_override: Option<u32>,
394 keyring: &Option<String>,
395 use_mmap: bool,
396) -> (Vec<MismatchEntry>, u64) {
397 let display = path.strip_prefix(datadir).unwrap_or(path);
398 let display_str = display.display().to_string();
399 let path_str = path.to_string_lossy();
400
401 let mut ts = match crate::cli::open_tablespace(&path_str, page_size_override, use_mmap) {
402 Ok(t) => t,
403 Err(_) => return (Vec::new(), 0),
404 };
405
406 if let Some(ref kp) = keyring {
407 let _ = crate::cli::setup_decryption(&mut ts, kp);
408 }
409
410 let page_size = ts.page_size();
411 let page_count = ts.page_count();
412 let vendor_info = ts.vendor_info().clone();
413
414 let all_data = match ts.read_all_pages() {
415 Ok(d) => d,
416 Err(_) => return (Vec::new(), page_count),
417 };
418
419 let ps = page_size as usize;
420
421 let mismatches: Vec<MismatchEntry> = (0..page_count)
422 .into_par_iter()
423 .filter_map(|page_num| {
424 let offset = page_num as usize * ps;
425 if offset + ps > all_data.len() {
426 return None;
427 }
428 let page_data = &all_data[offset..offset + ps];
429
430 if page_data.iter().all(|&b| b == 0) {
431 return None;
432 }
433
434 let csum = validate_checksum(page_data, page_size, Some(&vendor_info));
435 if csum.valid {
436 return None;
437 }
438
439 Some(MismatchEntry {
440 file: display_str.clone(),
441 page_number: page_num,
442 stored_checksum: csum.stored_checksum,
443 calculated_checksum: csum.calculated_checksum,
444 algorithm: algorithm_name(csum.algorithm).to_string(),
445 })
446 })
447 .collect();
448
449 (mismatches, page_count)
450}
451
452fn algorithm_name(algo: crate::innodb::checksum::ChecksumAlgorithm) -> &'static str {
453 match algo {
454 crate::innodb::checksum::ChecksumAlgorithm::Crc32c => "crc32c",
455 crate::innodb::checksum::ChecksumAlgorithm::InnoDB => "innodb",
456 crate::innodb::checksum::ChecksumAlgorithm::MariaDbFullCrc32 => "mariadb_full_crc32",
457 crate::innodb::checksum::ChecksumAlgorithm::None => "none",
458 }
459}
460
461pub fn execute(opts: &AuditOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
467 if opts.prometheus && (opts.json || opts.csv) {
468 return Err(IdbError::Argument(
469 "--prometheus cannot be combined with JSON or CSV output".to_string(),
470 ));
471 }
472
473 if opts.health && opts.checksum_mismatch {
475 return Err(IdbError::Argument(
476 "--health and --checksum-mismatch are mutually exclusive".to_string(),
477 ));
478 }
479
480 let datadir = Path::new(&opts.datadir);
481 if !datadir.is_dir() {
482 return Err(IdbError::Argument(format!(
483 "Data directory does not exist: {}",
484 opts.datadir
485 )));
486 }
487
488 let ibd_files = find_tablespace_files(datadir, &["ibd"], opts.depth)?;
489
490 if ibd_files.is_empty() {
491 if opts.prometheus {
492 return Ok(());
494 } else if opts.json {
495 if opts.health {
496 let report = HealthAuditReport {
497 datadir: opts.datadir.clone(),
498 tablespaces: Vec::new(),
499 summary: DirectoryHealthSummary {
500 total_files: 0,
501 total_index_pages: 0,
502 avg_fill_factor: 0.0,
503 avg_fragmentation: 0.0,
504 avg_garbage_ratio: 0.0,
505 worst_bloat_grade: None,
506 },
507 };
508 let json = serde_json::to_string_pretty(&report)
509 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
510 wprintln!(writer, "{}", json)?;
511 } else if opts.checksum_mismatch {
512 let report = MismatchReport {
513 datadir: opts.datadir.clone(),
514 mismatches: Vec::new(),
515 total_files_scanned: 0,
516 total_pages_scanned: 0,
517 };
518 let json = serde_json::to_string_pretty(&report)
519 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
520 wprintln!(writer, "{}", json)?;
521 } else {
522 let report = AuditReport {
523 datadir: opts.datadir.clone(),
524 files: Vec::new(),
525 summary: AuditSummary {
526 total_files: 0,
527 files_passed: 0,
528 files_failed: 0,
529 files_error: 0,
530 total_pages: 0,
531 corrupt_pages: 0,
532 integrity_pct: 100.0,
533 },
534 };
535 let json = serde_json::to_string_pretty(&report)
536 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
537 wprintln!(writer, "{}", json)?;
538 }
539 } else {
540 wprintln!(writer, "No .ibd files found in {}", opts.datadir)?;
541 }
542 return Ok(());
543 }
544
545 if opts.health {
546 execute_health(opts, &ibd_files, datadir, writer)
547 } else if opts.checksum_mismatch {
548 execute_mismatch(opts, &ibd_files, datadir, writer)
549 } else {
550 execute_integrity(opts, &ibd_files, datadir, writer)
551 }
552}
553
554fn execute_integrity(
559 opts: &AuditOptions,
560 ibd_files: &[std::path::PathBuf],
561 datadir: &Path,
562 writer: &mut dyn Write,
563) -> Result<(), IdbError> {
564 let start = Instant::now();
565
566 let pb = if !opts.json && !opts.csv && !opts.prometheus {
567 Some(create_progress_bar(ibd_files.len() as u64, "files"))
568 } else {
569 None
570 };
571
572 let page_size = opts.page_size;
573 let keyring = opts.keyring.clone();
574 let use_mmap = opts.mmap;
575
576 let mut results: Vec<FileIntegrityResult> = ibd_files
577 .par_iter()
578 .map(|path| {
579 let r = audit_file(path, datadir, page_size, &keyring, use_mmap);
580 if let Some(ref pb) = pb {
581 pb.inc(1);
582 }
583 r
584 })
585 .collect();
586
587 if let Some(ref pb) = pb {
588 pb.finish_and_clear();
589 }
590
591 results.sort_by(|a, b| a.file.cmp(&b.file));
593
594 let total_files = results.len();
596 let files_passed = results.iter().filter(|r| r.status == "PASS").count();
597 let files_failed = results.iter().filter(|r| r.status == "FAIL").count();
598 let files_error = results.iter().filter(|r| r.status == "error").count();
599 let total_pages: u64 = results.iter().map(|r| r.total_pages).sum();
600 let corrupt_pages: u64 = results.iter().map(|r| r.invalid_pages).sum();
601 let valid_pages: u64 = results.iter().map(|r| r.valid_pages).sum();
602 let checked_pages = valid_pages + corrupt_pages;
603 let integrity_pct = if checked_pages > 0 {
604 (valid_pages as f64 / checked_pages as f64) * 100.0
605 } else {
606 100.0
607 };
608 let integrity_pct = (integrity_pct * 100.0).round() / 100.0;
609
610 if opts.prometheus {
611 let duration_secs = start.elapsed().as_secs_f64();
612 print_prometheus_integrity(
613 writer,
614 &IntegrityPrometheusParams {
615 datadir: &opts.datadir,
616 results: &results,
617 total_pages,
618 corrupt_pages,
619 integrity_pct,
620 duration_secs,
621 },
622 )?;
623
624 if corrupt_pages > 0 {
625 return Err(IdbError::Parse(format!(
626 "{} corrupt pages found across {} files",
627 corrupt_pages, files_failed
628 )));
629 }
630 return Ok(());
631 }
632
633 if opts.json {
634 let report = AuditReport {
635 datadir: opts.datadir.clone(),
636 files: results,
637 summary: AuditSummary {
638 total_files,
639 files_passed,
640 files_failed,
641 files_error,
642 total_pages,
643 corrupt_pages,
644 integrity_pct,
645 },
646 };
647 let json = serde_json::to_string_pretty(&report)
648 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
649 wprintln!(writer, "{}", json)?;
650 } else if opts.csv {
651 wprintln!(
652 writer,
653 "file,status,total_pages,empty_pages,valid_pages,invalid_pages,lsn_mismatches"
654 )?;
655 for r in &results {
656 wprintln!(
657 writer,
658 "{},{},{},{},{},{},{}",
659 csv_escape(&r.file),
660 r.status,
661 r.total_pages,
662 r.empty_pages,
663 r.valid_pages,
664 r.invalid_pages,
665 r.lsn_mismatches
666 )?;
667 }
668 } else {
669 wprintln!(
670 writer,
671 "Auditing {} ({} files)...\n",
672 opts.datadir,
673 total_files
674 )?;
675
676 for r in &results {
677 let status_colored = match r.status.as_str() {
678 "PASS" => "PASS".green().to_string(),
679 "FAIL" => "FAIL".red().to_string(),
680 _ => "ERROR".yellow().to_string(),
681 };
682
683 if r.status == "error" {
684 wprintln!(
685 writer,
686 " {:<40} {} {}",
687 r.file,
688 status_colored,
689 r.error.as_deref().unwrap_or("unknown error")
690 )?;
691 } else if r.invalid_pages > 0 {
692 wprintln!(
693 writer,
694 " {:<40} {} {} pages, {} corrupt",
695 r.file,
696 status_colored,
697 r.total_pages,
698 r.invalid_pages
699 )?;
700 } else {
701 wprintln!(
702 writer,
703 " {:<40} {} {} pages",
704 r.file,
705 status_colored,
706 r.total_pages
707 )?;
708 }
709
710 if opts.verbose && !r.corrupt_pages.is_empty() {
711 wprintln!(writer, " Corrupt pages: {:?}", r.corrupt_pages)?;
712 }
713 }
714
715 wprintln!(writer)?;
716 wprintln!(writer, "Summary:")?;
717 wprintln!(
718 writer,
719 " Files: {} ({} passed, {} failed{})",
720 total_files,
721 files_passed,
722 files_failed,
723 if files_error > 0 {
724 format!(", {} error", files_error)
725 } else {
726 String::new()
727 }
728 )?;
729 wprintln!(
730 writer,
731 " Pages: {} total, {} corrupt",
732 total_pages,
733 corrupt_pages
734 )?;
735 wprintln!(writer, " Integrity: {:.2}%", integrity_pct)?;
736 }
737
738 if corrupt_pages > 0 {
739 return Err(IdbError::Parse(format!(
740 "{} corrupt pages found across {} files",
741 corrupt_pages, files_failed
742 )));
743 }
744
745 Ok(())
746}
747
748fn execute_health(
753 opts: &AuditOptions,
754 ibd_files: &[std::path::PathBuf],
755 datadir: &Path,
756 writer: &mut dyn Write,
757) -> Result<(), IdbError> {
758 let start = Instant::now();
759
760 let pb = if !opts.json && !opts.csv && !opts.prometheus {
761 Some(create_progress_bar(ibd_files.len() as u64, "files"))
762 } else {
763 None
764 };
765
766 let page_size = opts.page_size;
767 let keyring = opts.keyring.clone();
768 let use_mmap = opts.mmap;
769
770 let do_bloat = opts.bloat || opts.max_bloat_grade.is_some();
771
772 let mut results: Vec<FileHealthResult> = ibd_files
773 .par_iter()
774 .map(|path| {
775 let r = audit_file_health(path, datadir, page_size, &keyring, use_mmap, do_bloat);
776 if let Some(ref pb) = pb {
777 pb.inc(1);
778 }
779 r
780 })
781 .collect();
782
783 if let Some(ref pb) = pb {
784 pb.finish_and_clear();
785 }
786
787 let total_files = results.len();
789 let total_index_pages: u64 = results.iter().map(|r| r.total_index_pages).sum();
790
791 let valid_results: Vec<&FileHealthResult> =
792 results.iter().filter(|r| r.error.is_none()).collect();
793 let n = valid_results.len() as f64;
794
795 let avg_fill = if n > 0.0 {
796 valid_results.iter().map(|r| r.avg_fill_factor).sum::<f64>() / n
797 } else {
798 0.0
799 };
800 let avg_frag = if n > 0.0 {
801 valid_results
802 .iter()
803 .map(|r| r.avg_fragmentation)
804 .sum::<f64>()
805 / n
806 } else {
807 0.0
808 };
809 let avg_garbage = if n > 0.0 {
810 valid_results
811 .iter()
812 .map(|r| r.avg_garbage_ratio)
813 .sum::<f64>()
814 / n
815 } else {
816 0.0
817 };
818
819 if opts.prometheus {
821 let duration_secs = start.elapsed().as_secs_f64();
822 print_prometheus_health(
823 writer,
824 &HealthPrometheusParams {
825 datadir: &opts.datadir,
826 results: &results,
827 total_files,
828 total_index_pages,
829 avg_fill,
830 avg_frag,
831 avg_garbage,
832 duration_secs,
833 },
834 )?;
835 return Ok(());
836 }
837
838 if let Some(min_ff) = opts.min_fill_factor {
840 let threshold = min_ff / 100.0;
841 results.retain(|r| r.error.is_some() || r.avg_fill_factor < threshold);
842 }
843 if let Some(max_frag) = opts.max_fragmentation {
844 let threshold = max_frag / 100.0;
845 results.retain(|r| r.error.is_some() || r.avg_fragmentation > threshold);
846 }
847 let dir_worst_bloat = if do_bloat {
849 results
850 .iter()
851 .filter_map(|r| r.worst_bloat_grade.as_ref())
852 .max_by_key(|g| bloat_grade_ord(g).unwrap_or(0))
853 .cloned()
854 } else {
855 None
856 };
857
858 if let Some(ref grade_str) = opts.max_bloat_grade {
859 let threshold = bloat_grade_ord(grade_str).ok_or_else(|| {
860 crate::IdbError::Argument(format!(
861 "Invalid bloat grade '{}': must be one of A, B, C, D, F",
862 grade_str
863 ))
864 })?;
865 results.retain(|r| {
866 r.error.is_some()
867 || r.worst_bloat_grade
868 .as_ref()
869 .and_then(|g| bloat_grade_ord(g))
870 .map(|ord| ord >= threshold)
871 .unwrap_or(false)
872 });
873 }
874
875 results.sort_by(|a, b| {
877 b.avg_fragmentation
878 .partial_cmp(&a.avg_fragmentation)
879 .unwrap_or(std::cmp::Ordering::Equal)
880 });
881
882 if opts.json {
883 let report = HealthAuditReport {
884 datadir: opts.datadir.clone(),
885 tablespaces: results,
886 summary: DirectoryHealthSummary {
887 total_files,
888 total_index_pages,
889 avg_fill_factor: round2(avg_fill),
890 avg_fragmentation: round2(avg_frag),
891 avg_garbage_ratio: round2(avg_garbage),
892 worst_bloat_grade: dir_worst_bloat,
893 },
894 };
895 let json = serde_json::to_string_pretty(&report)
896 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
897 wprintln!(writer, "{}", json)?;
898 } else if opts.csv {
899 if do_bloat {
900 wprintln!(
901 writer,
902 "file,avg_fill_factor,avg_fragmentation,avg_garbage_ratio,index_count,total_index_pages,worst_bloat_grade,worst_bloat_score"
903 )?;
904 } else {
905 wprintln!(
906 writer,
907 "file,avg_fill_factor,avg_fragmentation,avg_garbage_ratio,index_count,total_index_pages"
908 )?;
909 }
910 for r in &results {
911 if r.error.is_some() {
912 continue;
913 }
914 if do_bloat {
915 wprintln!(
916 writer,
917 "{},{:.1},{:.1},{:.1},{},{},{},{}",
918 csv_escape(&r.file),
919 r.avg_fill_factor * 100.0,
920 r.avg_fragmentation * 100.0,
921 r.avg_garbage_ratio * 100.0,
922 r.index_count,
923 r.total_index_pages,
924 r.worst_bloat_grade.as_deref().unwrap_or(""),
925 r.worst_bloat_score
926 .map(|s| format!("{:.3}", s))
927 .unwrap_or_default()
928 )?;
929 } else {
930 wprintln!(
931 writer,
932 "{},{:.1},{:.1},{:.1},{},{}",
933 csv_escape(&r.file),
934 r.avg_fill_factor * 100.0,
935 r.avg_fragmentation * 100.0,
936 r.avg_garbage_ratio * 100.0,
937 r.index_count,
938 r.total_index_pages
939 )?;
940 }
941 }
942 } else {
943 wprintln!(writer, "Directory Health: {}\n", opts.datadir)?;
944 if do_bloat {
945 wprintln!(
946 writer,
947 " {:<40} {:>6} {:>6} {:>6} {:>8} {:>6} {:>6}",
948 "File",
949 "Fill%",
950 "Frag%",
951 "Garb%",
952 "Indexes",
953 "Pages",
954 "Bloat"
955 )?;
956 } else {
957 wprintln!(
958 writer,
959 " {:<40} {:>6} {:>6} {:>6} {:>8} {:>6}",
960 "File",
961 "Fill%",
962 "Frag%",
963 "Garb%",
964 "Indexes",
965 "Pages"
966 )?;
967 }
968
969 for r in &results {
970 if let Some(ref err) = r.error {
971 wprintln!(
972 writer,
973 " {:<40} {}",
974 r.file,
975 format!("ERROR: {}", err).yellow()
976 )?;
977 } else if do_bloat {
978 let grade_str = r.worst_bloat_grade.as_deref().unwrap_or("-");
979 let grade_colored = match grade_str {
980 "A" | "B" => grade_str.green().to_string(),
981 "C" => grade_str.yellow().to_string(),
982 "D" | "F" => grade_str.red().to_string(),
983 _ => grade_str.to_string(),
984 };
985 wprintln!(
986 writer,
987 " {:<40} {:>5.1} {:>5.1} {:>5.1} {:>7} {:>5} {:>5}",
988 r.file,
989 r.avg_fill_factor * 100.0,
990 r.avg_fragmentation * 100.0,
991 r.avg_garbage_ratio * 100.0,
992 r.index_count,
993 r.total_index_pages,
994 grade_colored
995 )?;
996 } else {
997 wprintln!(
998 writer,
999 " {:<40} {:>5.1} {:>5.1} {:>5.1} {:>7} {:>5}",
1000 r.file,
1001 r.avg_fill_factor * 100.0,
1002 r.avg_fragmentation * 100.0,
1003 r.avg_garbage_ratio * 100.0,
1004 r.index_count,
1005 r.total_index_pages
1006 )?;
1007 }
1008 }
1009
1010 wprintln!(writer)?;
1011 wprintln!(
1012 writer,
1013 "Summary: {} files, avg fill {:.1}%, avg frag {:.1}%, avg garbage {:.1}%",
1014 total_files,
1015 avg_fill * 100.0,
1016 avg_frag * 100.0,
1017 avg_garbage * 100.0
1018 )?;
1019 }
1020
1021 Ok(())
1022}
1023
1024fn execute_mismatch(
1029 opts: &AuditOptions,
1030 ibd_files: &[std::path::PathBuf],
1031 datadir: &Path,
1032 writer: &mut dyn Write,
1033) -> Result<(), IdbError> {
1034 let pb = if !opts.json && !opts.csv {
1035 Some(create_progress_bar(ibd_files.len() as u64, "files"))
1036 } else {
1037 None
1038 };
1039
1040 let page_size = opts.page_size;
1041 let keyring = opts.keyring.clone();
1042 let use_mmap = opts.mmap;
1043
1044 let all_results: Vec<(Vec<MismatchEntry>, u64)> = ibd_files
1045 .par_iter()
1046 .map(|path| {
1047 let r = audit_file_mismatches(path, datadir, page_size, &keyring, use_mmap);
1048 if let Some(ref pb) = pb {
1049 pb.inc(1);
1050 }
1051 r
1052 })
1053 .collect();
1054
1055 if let Some(ref pb) = pb {
1056 pb.finish_and_clear();
1057 }
1058
1059 let total_files_scanned = ibd_files.len();
1060 let total_pages_scanned: u64 = all_results.iter().map(|(_, count)| count).sum();
1061
1062 let mut mismatches: Vec<MismatchEntry> = all_results
1063 .into_iter()
1064 .flat_map(|(entries, _)| entries)
1065 .collect();
1066 mismatches.sort_by(|a, b| (&a.file, a.page_number).cmp(&(&b.file, b.page_number)));
1067
1068 if opts.json {
1069 let report = MismatchReport {
1070 datadir: opts.datadir.clone(),
1071 mismatches: mismatches.clone(),
1072 total_files_scanned,
1073 total_pages_scanned,
1074 };
1075 let json = serde_json::to_string_pretty(&report)
1076 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
1077 wprintln!(writer, "{}", json)?;
1078 } else if opts.csv {
1079 wprintln!(
1080 writer,
1081 "file,page_number,stored_checksum,calculated_checksum,algorithm"
1082 )?;
1083 for m in &mismatches {
1084 wprintln!(
1085 writer,
1086 "{},{},{},{},{}",
1087 csv_escape(&m.file),
1088 m.page_number,
1089 m.stored_checksum,
1090 m.calculated_checksum,
1091 m.algorithm
1092 )?;
1093 }
1094 } else if mismatches.is_empty() {
1095 wprintln!(
1096 writer,
1097 "No checksum mismatches found ({} files, {} pages scanned).",
1098 total_files_scanned,
1099 total_pages_scanned
1100 )?;
1101 } else {
1102 wprintln!(
1103 writer,
1104 "{:<40} {:>6} {:>12} {:>12} {:>12}",
1105 "FILE",
1106 "PAGE",
1107 "STORED",
1108 "CALCULATED",
1109 "ALGORITHM"
1110 )?;
1111 for m in &mismatches {
1112 wprintln!(
1113 writer,
1114 "{:<40} {:>6} {:>12} {:>12} {:>12}",
1115 m.file,
1116 m.page_number,
1117 format!("0x{:08X}", m.stored_checksum),
1118 format!("0x{:08X}", m.calculated_checksum),
1119 m.algorithm
1120 )?;
1121 }
1122 }
1123
1124 if !mismatches.is_empty() {
1125 return Err(IdbError::Parse(format!(
1126 "{} checksum mismatches found",
1127 mismatches.len()
1128 )));
1129 }
1130
1131 Ok(())
1132}
1133
1134fn round2(v: f64) -> f64 {
1135 (v * 100.0).round() / 100.0
1136}
1137
1138fn bloat_grade_ord(grade: &str) -> Option<u8> {
1141 match grade {
1142 "A" => Some(0),
1143 "B" => Some(1),
1144 "C" => Some(2),
1145 "D" => Some(3),
1146 "F" => Some(4),
1147 _ => None,
1148 }
1149}
1150
1151struct IntegrityPrometheusParams<'a> {
1156 datadir: &'a str,
1157 results: &'a [FileIntegrityResult],
1158 total_pages: u64,
1159 corrupt_pages: u64,
1160 integrity_pct: f64,
1161 duration_secs: f64,
1162}
1163
1164fn print_prometheus_integrity(
1166 writer: &mut dyn Write,
1167 params: &IntegrityPrometheusParams<'_>,
1168) -> Result<(), IdbError> {
1169 let datadir = params.datadir;
1170 let results = params.results;
1171 let total_pages = params.total_pages;
1172 let corrupt_pages = params.corrupt_pages;
1173 let integrity_pct = params.integrity_pct;
1174 let duration_secs = params.duration_secs;
1175 wprintln!(
1177 writer,
1178 "{}",
1179 prom::help_line("innodb_pages", "Total pages in tablespace")
1180 )?;
1181 wprintln!(writer, "{}", prom::type_line("innodb_pages", "gauge"))?;
1182 for r in results {
1183 if r.status == "error" {
1184 continue;
1185 }
1186 wprintln!(
1187 writer,
1188 "{}",
1189 prom::format_gauge_int(
1190 "innodb_pages",
1191 &[("datadir", datadir), ("file", &r.file)],
1192 r.total_pages
1193 )
1194 )?;
1195 }
1196
1197 wprintln!(
1199 writer,
1200 "{}",
1201 prom::help_line(
1202 "innodb_corrupt_pages",
1203 "Number of corrupt pages in tablespace"
1204 )
1205 )?;
1206 wprintln!(
1207 writer,
1208 "{}",
1209 prom::type_line("innodb_corrupt_pages", "gauge")
1210 )?;
1211 for r in results {
1212 if r.status == "error" {
1213 continue;
1214 }
1215 wprintln!(
1216 writer,
1217 "{}",
1218 prom::format_gauge_int(
1219 "innodb_corrupt_pages",
1220 &[("datadir", datadir), ("file", &r.file)],
1221 r.invalid_pages
1222 )
1223 )?;
1224 }
1225
1226 wprintln!(
1228 writer,
1229 "{}",
1230 prom::help_line("innodb_empty_pages", "Number of empty pages in tablespace")
1231 )?;
1232 wprintln!(writer, "{}", prom::type_line("innodb_empty_pages", "gauge"))?;
1233 for r in results {
1234 if r.status == "error" {
1235 continue;
1236 }
1237 wprintln!(
1238 writer,
1239 "{}",
1240 prom::format_gauge_int(
1241 "innodb_empty_pages",
1242 &[("datadir", datadir), ("file", &r.file)],
1243 r.empty_pages
1244 )
1245 )?;
1246 }
1247
1248 wprintln!(
1250 writer,
1251 "{}",
1252 prom::help_line(
1253 "innodb_audit_integrity_pct",
1254 "Directory-wide integrity percentage"
1255 )
1256 )?;
1257 wprintln!(
1258 writer,
1259 "{}",
1260 prom::type_line("innodb_audit_integrity_pct", "gauge")
1261 )?;
1262 wprintln!(
1263 writer,
1264 "{}",
1265 prom::format_gauge(
1266 "innodb_audit_integrity_pct",
1267 &[("datadir", datadir)],
1268 integrity_pct
1269 )
1270 )?;
1271
1272 wprintln!(
1274 writer,
1275 "{}",
1276 prom::help_line(
1277 "innodb_audit_pages",
1278 "Total pages scanned across data directory"
1279 )
1280 )?;
1281 wprintln!(writer, "{}", prom::type_line("innodb_audit_pages", "gauge"))?;
1282 wprintln!(
1283 writer,
1284 "{}",
1285 prom::format_gauge_int("innodb_audit_pages", &[("datadir", datadir)], total_pages)
1286 )?;
1287
1288 wprintln!(
1290 writer,
1291 "{}",
1292 prom::help_line(
1293 "innodb_audit_corrupt_pages",
1294 "Total corrupt pages across data directory"
1295 )
1296 )?;
1297 wprintln!(
1298 writer,
1299 "{}",
1300 prom::type_line("innodb_audit_corrupt_pages", "gauge")
1301 )?;
1302 wprintln!(
1303 writer,
1304 "{}",
1305 prom::format_gauge_int(
1306 "innodb_audit_corrupt_pages",
1307 &[("datadir", datadir)],
1308 corrupt_pages
1309 )
1310 )?;
1311
1312 wprintln!(
1314 writer,
1315 "{}",
1316 prom::help_line(
1317 "innodb_scan_duration_seconds",
1318 "Time spent scanning the data directory"
1319 )
1320 )?;
1321 wprintln!(
1322 writer,
1323 "{}",
1324 prom::type_line("innodb_scan_duration_seconds", "gauge")
1325 )?;
1326 wprintln!(
1327 writer,
1328 "{}",
1329 prom::format_gauge(
1330 "innodb_scan_duration_seconds",
1331 &[("datadir", datadir)],
1332 duration_secs
1333 )
1334 )?;
1335
1336 Ok(())
1337}
1338
1339struct HealthPrometheusParams<'a> {
1340 datadir: &'a str,
1341 results: &'a [FileHealthResult],
1342 total_files: usize,
1343 total_index_pages: u64,
1344 avg_fill: f64,
1345 avg_frag: f64,
1346 avg_garbage: f64,
1347 duration_secs: f64,
1348}
1349
1350fn print_prometheus_health(
1352 writer: &mut dyn Write,
1353 params: &HealthPrometheusParams<'_>,
1354) -> Result<(), IdbError> {
1355 let datadir = params.datadir;
1356 let results = params.results;
1357 let total_files = params.total_files;
1358 let total_index_pages = params.total_index_pages;
1359 let avg_fill = params.avg_fill;
1360 let avg_frag = params.avg_frag;
1361 let avg_garbage = params.avg_garbage;
1362 let duration_secs = params.duration_secs;
1363 wprintln!(
1365 writer,
1366 "{}",
1367 prom::help_line(
1368 "innodb_fill_factor",
1369 "Average B+Tree fill factor for tablespace"
1370 )
1371 )?;
1372 wprintln!(writer, "{}", prom::type_line("innodb_fill_factor", "gauge"))?;
1373 for r in results {
1374 if r.error.is_some() {
1375 continue;
1376 }
1377 wprintln!(
1378 writer,
1379 "{}",
1380 prom::format_gauge(
1381 "innodb_fill_factor",
1382 &[("datadir", datadir), ("file", &r.file)],
1383 r.avg_fill_factor
1384 )
1385 )?;
1386 }
1387
1388 wprintln!(
1390 writer,
1391 "{}",
1392 prom::help_line(
1393 "innodb_fragmentation_ratio",
1394 "Average fragmentation ratio for tablespace"
1395 )
1396 )?;
1397 wprintln!(
1398 writer,
1399 "{}",
1400 prom::type_line("innodb_fragmentation_ratio", "gauge")
1401 )?;
1402 for r in results {
1403 if r.error.is_some() {
1404 continue;
1405 }
1406 wprintln!(
1407 writer,
1408 "{}",
1409 prom::format_gauge(
1410 "innodb_fragmentation_ratio",
1411 &[("datadir", datadir), ("file", &r.file)],
1412 r.avg_fragmentation
1413 )
1414 )?;
1415 }
1416
1417 wprintln!(
1419 writer,
1420 "{}",
1421 prom::help_line(
1422 "innodb_garbage_ratio",
1423 "Average garbage ratio for tablespace"
1424 )
1425 )?;
1426 wprintln!(
1427 writer,
1428 "{}",
1429 prom::type_line("innodb_garbage_ratio", "gauge")
1430 )?;
1431 for r in results {
1432 if r.error.is_some() {
1433 continue;
1434 }
1435 wprintln!(
1436 writer,
1437 "{}",
1438 prom::format_gauge(
1439 "innodb_garbage_ratio",
1440 &[("datadir", datadir), ("file", &r.file)],
1441 r.avg_garbage_ratio
1442 )
1443 )?;
1444 }
1445
1446 wprintln!(
1448 writer,
1449 "{}",
1450 prom::help_line("innodb_index_pages", "Total INDEX pages in tablespace")
1451 )?;
1452 wprintln!(writer, "{}", prom::type_line("innodb_index_pages", "gauge"))?;
1453 for r in results {
1454 if r.error.is_some() {
1455 continue;
1456 }
1457 wprintln!(
1458 writer,
1459 "{}",
1460 prom::format_gauge_int(
1461 "innodb_index_pages",
1462 &[("datadir", datadir), ("file", &r.file)],
1463 r.total_index_pages
1464 )
1465 )?;
1466 }
1467
1468 let has_bloat = results.iter().any(|r| r.worst_bloat_score.is_some());
1470 if has_bloat {
1471 wprintln!(
1472 writer,
1473 "{}",
1474 prom::help_line(
1475 "innodb_bloat_score",
1476 "Worst bloat score across indexes in tablespace (0-1)"
1477 )
1478 )?;
1479 wprintln!(writer, "{}", prom::type_line("innodb_bloat_score", "gauge"))?;
1480 for r in results {
1481 if let Some(score) = r.worst_bloat_score {
1482 wprintln!(
1483 writer,
1484 "{}",
1485 prom::format_gauge(
1486 "innodb_bloat_score",
1487 &[
1488 ("datadir", datadir),
1489 ("file", &r.file),
1490 ("grade", r.worst_bloat_grade.as_deref().unwrap_or(""))
1491 ],
1492 score
1493 )
1494 )?;
1495 }
1496 }
1497 }
1498
1499 wprintln!(
1501 writer,
1502 "{}",
1503 prom::help_line("innodb_audit_files", "Total tablespace files scanned")
1504 )?;
1505 wprintln!(writer, "{}", prom::type_line("innodb_audit_files", "gauge"))?;
1506 wprintln!(
1507 writer,
1508 "{}",
1509 prom::format_gauge_int(
1510 "innodb_audit_files",
1511 &[("datadir", datadir)],
1512 total_files as u64
1513 )
1514 )?;
1515
1516 wprintln!(
1517 writer,
1518 "{}",
1519 prom::help_line(
1520 "innodb_audit_index_pages",
1521 "Total INDEX pages across data directory"
1522 )
1523 )?;
1524 wprintln!(
1525 writer,
1526 "{}",
1527 prom::type_line("innodb_audit_index_pages", "gauge")
1528 )?;
1529 wprintln!(
1530 writer,
1531 "{}",
1532 prom::format_gauge_int(
1533 "innodb_audit_index_pages",
1534 &[("datadir", datadir)],
1535 total_index_pages
1536 )
1537 )?;
1538
1539 wprintln!(
1540 writer,
1541 "{}",
1542 prom::help_line(
1543 "innodb_audit_avg_fill_factor",
1544 "Directory-wide average fill factor"
1545 )
1546 )?;
1547 wprintln!(
1548 writer,
1549 "{}",
1550 prom::type_line("innodb_audit_avg_fill_factor", "gauge")
1551 )?;
1552 wprintln!(
1553 writer,
1554 "{}",
1555 prom::format_gauge(
1556 "innodb_audit_avg_fill_factor",
1557 &[("datadir", datadir)],
1558 avg_fill
1559 )
1560 )?;
1561
1562 wprintln!(
1563 writer,
1564 "{}",
1565 prom::help_line(
1566 "innodb_audit_avg_fragmentation",
1567 "Directory-wide average fragmentation"
1568 )
1569 )?;
1570 wprintln!(
1571 writer,
1572 "{}",
1573 prom::type_line("innodb_audit_avg_fragmentation", "gauge")
1574 )?;
1575 wprintln!(
1576 writer,
1577 "{}",
1578 prom::format_gauge(
1579 "innodb_audit_avg_fragmentation",
1580 &[("datadir", datadir)],
1581 avg_frag
1582 )
1583 )?;
1584
1585 wprintln!(
1586 writer,
1587 "{}",
1588 prom::help_line(
1589 "innodb_audit_avg_garbage_ratio",
1590 "Directory-wide average garbage ratio"
1591 )
1592 )?;
1593 wprintln!(
1594 writer,
1595 "{}",
1596 prom::type_line("innodb_audit_avg_garbage_ratio", "gauge")
1597 )?;
1598 wprintln!(
1599 writer,
1600 "{}",
1601 prom::format_gauge(
1602 "innodb_audit_avg_garbage_ratio",
1603 &[("datadir", datadir)],
1604 avg_garbage
1605 )
1606 )?;
1607
1608 wprintln!(
1610 writer,
1611 "{}",
1612 prom::help_line(
1613 "innodb_scan_duration_seconds",
1614 "Time spent scanning the data directory"
1615 )
1616 )?;
1617 wprintln!(
1618 writer,
1619 "{}",
1620 prom::type_line("innodb_scan_duration_seconds", "gauge")
1621 )?;
1622 wprintln!(
1623 writer,
1624 "{}",
1625 prom::format_gauge(
1626 "innodb_scan_duration_seconds",
1627 &[("datadir", datadir)],
1628 duration_secs
1629 )
1630 )?;
1631
1632 Ok(())
1633}