use serde::Serialize;
use std::collections::BTreeMap;
use crate::innodb::constants::*;
use crate::innodb::index::IndexHeader;
use crate::innodb::page::FilHeader;
use crate::innodb::page_types::PageType;
#[derive(Debug, Clone)]
pub struct IndexPageSnapshot {
pub page_number: u64,
pub index_id: u64,
pub level: u16,
pub heap_top: u16,
pub garbage: u16,
pub n_recs: u16,
pub prev: u32,
pub next: u32,
}
#[derive(Debug, Clone, Serialize)]
pub struct IndexHealth {
pub index_id: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub index_name: Option<String>,
pub tree_depth: u16,
pub total_pages: u64,
pub leaf_pages: u64,
pub non_leaf_pages: u64,
pub total_records: u64,
pub avg_fill_factor: f64,
pub min_fill_factor: f64,
pub max_fill_factor: f64,
pub avg_garbage_ratio: f64,
pub total_garbage_bytes: u64,
pub fragmentation: f64,
pub empty_leaf_pages: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub delete_marked_records: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub total_walked_records: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bloat: Option<BloatScore>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cardinality: Option<CardinalityEstimate>,
}
#[derive(Debug, Clone, Serialize)]
pub struct TablespaceHealth {
pub total_pages: u64,
pub index_pages: u64,
pub non_index_pages: u64,
pub empty_pages: u64,
pub page_size: u32,
pub avg_fill_factor: f64,
pub avg_garbage_ratio: f64,
pub avg_fragmentation: f64,
pub index_count: u64,
#[serde(skip_serializing_if = "is_zero_u64")]
pub rtree_pages: u64,
#[serde(skip_serializing_if = "is_zero_u64")]
pub lob_pages: u64,
#[serde(skip_serializing_if = "is_zero_u64")]
pub undo_pages: u64,
}
fn is_zero_u64(v: &u64) -> bool {
*v == 0
}
#[derive(Debug, Clone, Serialize)]
pub struct HealthReport {
pub file: String,
pub summary: TablespaceHealth,
pub indexes: Vec<IndexHealth>,
}
pub fn extract_index_page_snapshot(
page_data: &[u8],
page_number: u64,
) -> Option<IndexPageSnapshot> {
let fil = FilHeader::parse(page_data)?;
if fil.page_type != PageType::Index {
return None;
}
let idx = IndexHeader::parse(page_data)?;
Some(IndexPageSnapshot {
page_number,
index_id: idx.index_id,
level: idx.level,
heap_top: idx.heap_top,
garbage: idx.garbage,
n_recs: idx.n_recs,
prev: fil.prev_page,
next: fil.next_page,
})
}
pub fn compute_fill_factor(heap_top: u16, garbage: u16, page_size: u32) -> f64 {
let usable = page_size as f64 - PAGE_DATA_OFFSET as f64 - SIZE_FIL_TRAILER as f64;
if usable <= 0.0 {
return 0.0;
}
let used = heap_top as f64 - PAGE_DATA_OFFSET as f64 - garbage as f64;
(used / usable).clamp(0.0, 1.0)
}
pub fn compute_garbage_ratio(garbage: u16, page_size: u32) -> f64 {
let usable = page_size as f64 - PAGE_DATA_OFFSET as f64 - SIZE_FIL_TRAILER as f64;
if usable <= 0.0 {
return 0.0;
}
(garbage as f64 / usable).clamp(0.0, 1.0)
}
pub fn compute_fragmentation(leaf_page_numbers: &mut [u64]) -> f64 {
if leaf_page_numbers.len() <= 1 {
return 0.0;
}
leaf_page_numbers.sort_unstable();
let transitions = leaf_page_numbers.len() - 1;
let non_sequential = leaf_page_numbers
.windows(2)
.filter(|w| w[1] != w[0] + 1)
.count();
non_sequential as f64 / transitions as f64
}
pub fn analyze_health(
snapshots: Vec<IndexPageSnapshot>,
page_size: u32,
total_pages: u64,
empty_pages: u64,
file: &str,
) -> HealthReport {
let mut groups: BTreeMap<u64, Vec<IndexPageSnapshot>> = BTreeMap::new();
for snap in snapshots {
groups.entry(snap.index_id).or_default().push(snap);
}
let index_page_count: u64 = groups.values().map(|v| v.len() as u64).sum();
let non_index_pages = total_pages.saturating_sub(index_page_count + empty_pages);
let mut indexes = Vec::with_capacity(groups.len());
let mut all_fill_factors = Vec::new();
let mut all_garbage_ratios = Vec::new();
for (index_id, pages) in &groups {
let mut tree_depth: u16 = 0;
let mut leaf_pages: u64 = 0;
let mut non_leaf_pages: u64 = 0;
let mut total_records: u64 = 0;
let mut total_garbage_bytes: u64 = 0;
let mut empty_leaf_pages: u64 = 0;
let mut fill_factors = Vec::with_capacity(pages.len());
let mut garbage_ratios = Vec::with_capacity(pages.len());
let mut leaf_page_numbers = Vec::new();
for snap in pages {
let ff = compute_fill_factor(snap.heap_top, snap.garbage, page_size);
let gr = compute_garbage_ratio(snap.garbage, page_size);
fill_factors.push(ff);
garbage_ratios.push(gr);
all_fill_factors.push(ff);
all_garbage_ratios.push(gr);
if snap.level > tree_depth {
tree_depth = snap.level;
}
if snap.level == 0 {
leaf_pages += 1;
leaf_page_numbers.push(snap.page_number);
if snap.n_recs == 0 {
empty_leaf_pages += 1;
}
} else {
non_leaf_pages += 1;
}
total_records += snap.n_recs as u64;
total_garbage_bytes += snap.garbage as u64;
}
let avg_fill = if fill_factors.is_empty() {
0.0
} else {
fill_factors.iter().sum::<f64>() / fill_factors.len() as f64
};
let min_fill = fill_factors.iter().cloned().fold(f64::INFINITY, f64::min);
let max_fill = fill_factors
.iter()
.cloned()
.fold(f64::NEG_INFINITY, f64::max);
let avg_garbage = if garbage_ratios.is_empty() {
0.0
} else {
garbage_ratios.iter().sum::<f64>() / garbage_ratios.len() as f64
};
let fragmentation = compute_fragmentation(&mut leaf_page_numbers);
indexes.push(IndexHealth {
index_id: *index_id,
index_name: None,
tree_depth: tree_depth + 1,
total_pages: pages.len() as u64,
leaf_pages,
non_leaf_pages,
total_records,
avg_fill_factor: round2(avg_fill),
min_fill_factor: round2(if min_fill.is_infinite() {
0.0
} else {
min_fill
}),
max_fill_factor: round2(if max_fill.is_infinite() {
0.0
} else {
max_fill
}),
avg_garbage_ratio: round2(avg_garbage),
total_garbage_bytes,
fragmentation: round2(fragmentation),
empty_leaf_pages,
delete_marked_records: None,
total_walked_records: None,
bloat: None,
cardinality: None,
});
}
let avg_fill = if all_fill_factors.is_empty() {
0.0
} else {
round2(all_fill_factors.iter().sum::<f64>() / all_fill_factors.len() as f64)
};
let avg_garbage = if all_garbage_ratios.is_empty() {
0.0
} else {
round2(all_garbage_ratios.iter().sum::<f64>() / all_garbage_ratios.len() as f64)
};
let avg_frag = if indexes.is_empty() {
0.0
} else {
round2(indexes.iter().map(|i| i.fragmentation).sum::<f64>() / indexes.len() as f64)
};
HealthReport {
file: file.to_string(),
summary: TablespaceHealth {
total_pages,
index_pages: index_page_count,
non_index_pages,
empty_pages,
page_size,
avg_fill_factor: avg_fill,
avg_garbage_ratio: avg_garbage,
avg_fragmentation: avg_frag,
index_count: indexes.len() as u64,
rtree_pages: 0,
lob_pages: 0,
undo_pages: 0,
},
indexes,
}
}
fn round2(v: f64) -> f64 {
(v * 100.0).round() / 100.0
}
const W_FILL: f64 = 0.30;
const W_GARBAGE: f64 = 0.25;
const W_FRAG: f64 = 0.25;
const W_DELETE: f64 = 0.20;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
pub enum BloatGrade {
A,
B,
C,
D,
F,
}
impl BloatGrade {
pub fn label(self) -> &'static str {
match self {
BloatGrade::A => "A",
BloatGrade::B => "B",
BloatGrade::C => "C",
BloatGrade::D => "D",
BloatGrade::F => "F",
}
}
}
impl std::fmt::Display for BloatGrade {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.label())
}
}
#[derive(Debug, Clone, Serialize)]
pub struct BloatComponents {
pub fill_factor_deficit: f64,
pub garbage_ratio: f64,
pub fragmentation: f64,
pub delete_mark_ratio: f64,
}
#[derive(Debug, Clone, Serialize)]
pub struct BloatScore {
pub score: f64,
pub grade: BloatGrade,
pub components: BloatComponents,
#[serde(skip_serializing_if = "Option::is_none")]
pub recommendation: Option<String>,
}
pub fn score_bloat(health: &IndexHealth, delete_mark_ratio: f64) -> BloatScore {
let components = BloatComponents {
fill_factor_deficit: round2(1.0 - health.avg_fill_factor),
garbage_ratio: health.avg_garbage_ratio,
fragmentation: health.fragmentation,
delete_mark_ratio: round2(delete_mark_ratio),
};
let score = (W_FILL * components.fill_factor_deficit
+ W_GARBAGE * components.garbage_ratio
+ W_FRAG * components.fragmentation
+ W_DELETE * components.delete_mark_ratio)
.clamp(0.0, 1.0);
let score = round2(score);
let grade = if score < 0.10 {
BloatGrade::A
} else if score < 0.20 {
BloatGrade::B
} else if score < 0.35 {
BloatGrade::C
} else if score < 0.50 {
BloatGrade::D
} else {
BloatGrade::F
};
let recommendation = match grade {
BloatGrade::A | BloatGrade::B => None,
BloatGrade::C => Some("Consider OPTIMIZE TABLE during low-traffic period".to_string()),
BloatGrade::D => Some("OPTIMIZE TABLE recommended; significant bloat detected".to_string()),
BloatGrade::F => {
Some("OPTIMIZE TABLE strongly recommended; severe bloat detected".to_string())
}
};
BloatScore {
score,
grade,
components,
recommendation,
}
}
#[derive(Debug, Clone, Serialize)]
pub struct CardinalityEstimate {
pub column_name: String,
pub estimated_distinct: u64,
pub sampled_records: u64,
pub sampled_pages: u64,
pub total_leaf_pages: u64,
pub confidence: f64,
}
pub fn estimate_cardinality(
ts: &mut crate::innodb::tablespace::Tablespace,
leaf_pages: &[u64],
columns: &[crate::innodb::field_decode::ColumnStorageInfo],
column_name: &str,
page_size: u32,
sample_size: usize,
) -> Option<CardinalityEstimate> {
if leaf_pages.is_empty() || columns.is_empty() || sample_size == 0 {
return None;
}
let total_leaf = leaf_pages.len();
let actual_sample = sample_size.min(total_leaf);
let step = if actual_sample >= total_leaf {
1
} else {
total_leaf / actual_sample
};
let mut distinct_values = std::collections::HashSet::new();
let mut total_records_sampled = 0u64;
let mut pages_sampled = 0u64;
for i in (0..total_leaf).step_by(step) {
if pages_sampled >= actual_sample as u64 {
break;
}
let page_num = leaf_pages[i];
let page_data = match ts.read_page(page_num) {
Ok(d) => d,
Err(_) => continue,
};
let rows = crate::innodb::export::decode_page_records(
&page_data, columns, false, false, page_size,
);
for row in &rows {
total_records_sampled += 1;
if let Some((_name, value)) = row.first() {
distinct_values.insert(format!("{:?}", value));
}
}
pages_sampled += 1;
}
if pages_sampled == 0 || total_records_sampled == 0 {
return None;
}
let distinct_in_sample = distinct_values.len() as u64;
let scale = total_leaf as f64 / pages_sampled as f64;
let estimated_total_records = (total_records_sampled as f64 * scale) as u64;
let raw_estimate = (distinct_in_sample as f64 * scale) as u64;
let estimated = raw_estimate.min(estimated_total_records);
let confidence = round2((pages_sampled as f64 / total_leaf as f64).min(1.0));
Some(CardinalityEstimate {
column_name: column_name.to_string(),
estimated_distinct: estimated,
sampled_records: total_records_sampled,
sampled_pages: pages_sampled,
total_leaf_pages: total_leaf as u64,
confidence,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fill_factor_full_page() {
let usable_end = (16384 - SIZE_FIL_TRAILER) as u16;
let ff = compute_fill_factor(usable_end, 0, 16384);
assert!((ff - 1.0).abs() < 0.001);
}
#[test]
fn test_fill_factor_half_page() {
let usable = 16384.0 - PAGE_DATA_OFFSET as f64 - SIZE_FIL_TRAILER as f64;
let half_heap_top = PAGE_DATA_OFFSET as u16 + (usable / 2.0) as u16;
let ff = compute_fill_factor(half_heap_top, 0, 16384);
assert!((ff - 0.5).abs() < 0.01);
}
#[test]
fn test_fill_factor_empty_page() {
let ff = compute_fill_factor(PAGE_DATA_OFFSET as u16, 0, 16384);
assert!(ff.abs() < 0.001);
}
#[test]
fn test_fill_factor_with_garbage() {
let usable = 16384.0 - PAGE_DATA_OFFSET as f64 - SIZE_FIL_TRAILER as f64;
let heap_top = PAGE_DATA_OFFSET as u16 + (usable * 0.75) as u16;
let garbage = (usable * 0.25) as u16;
let ff = compute_fill_factor(heap_top, garbage, 16384);
assert!((ff - 0.5).abs() < 0.01);
}
#[test]
fn test_garbage_ratio() {
let usable = 16384.0 - PAGE_DATA_OFFSET as f64 - SIZE_FIL_TRAILER as f64;
let garbage = (usable * 0.25) as u16;
let gr = compute_garbage_ratio(garbage, 16384);
assert!((gr - 0.25).abs() < 0.01);
}
#[test]
fn test_fragmentation_sequential() {
let mut pages = vec![1, 2, 3, 4, 5];
assert!(compute_fragmentation(&mut pages).abs() < 0.001);
}
#[test]
fn test_fragmentation_scattered() {
let mut pages = vec![1, 10, 20, 30, 40];
let frag = compute_fragmentation(&mut pages);
assert!((frag - 1.0).abs() < 0.001);
}
#[test]
fn test_fragmentation_single_page() {
let mut pages = vec![5];
assert!(compute_fragmentation(&mut pages).abs() < 0.001);
}
#[test]
fn test_fragmentation_empty() {
let mut pages: Vec<u64> = vec![];
assert!(compute_fragmentation(&mut pages).abs() < 0.001);
}
#[test]
fn test_fragmentation_partial() {
let mut pages = vec![1, 2, 5, 6];
let frag = compute_fragmentation(&mut pages);
assert!((frag - 1.0 / 3.0).abs() < 0.01);
}
#[test]
fn test_analyze_health_groups_by_index() {
let snapshots = vec![
IndexPageSnapshot {
page_number: 3,
index_id: 100,
level: 0,
heap_top: 8000,
garbage: 0,
n_recs: 50,
prev: FIL_NULL,
next: 4,
},
IndexPageSnapshot {
page_number: 4,
index_id: 100,
level: 0,
heap_top: 8000,
garbage: 0,
n_recs: 50,
prev: 3,
next: FIL_NULL,
},
IndexPageSnapshot {
page_number: 5,
index_id: 200,
level: 0,
heap_top: 4000,
garbage: 100,
n_recs: 20,
prev: FIL_NULL,
next: FIL_NULL,
},
];
let report = analyze_health(snapshots, 16384, 10, 2, "test.ibd");
assert_eq!(report.indexes.len(), 2);
assert_eq!(report.summary.index_count, 2);
assert_eq!(report.summary.index_pages, 3);
assert_eq!(report.summary.empty_pages, 2);
assert_eq!(report.summary.total_pages, 10);
let idx100 = report.indexes.iter().find(|i| i.index_id == 100).unwrap();
assert_eq!(idx100.total_pages, 2);
assert_eq!(idx100.leaf_pages, 2);
assert_eq!(idx100.total_records, 100);
assert_eq!(idx100.tree_depth, 1);
assert!(idx100.fragmentation.abs() < 0.001);
let idx200 = report.indexes.iter().find(|i| i.index_id == 200).unwrap();
assert_eq!(idx200.total_pages, 1);
assert_eq!(idx200.total_records, 20);
assert!(idx200.total_garbage_bytes > 0);
}
#[test]
fn test_analyze_health_empty() {
let report = analyze_health(vec![], 16384, 5, 5, "empty.ibd");
assert!(report.indexes.is_empty());
assert_eq!(report.summary.index_count, 0);
assert_eq!(report.summary.total_pages, 5);
assert_eq!(report.summary.empty_pages, 5);
}
#[test]
fn test_analyze_health_multi_level() {
let snapshots = vec![
IndexPageSnapshot {
page_number: 3,
index_id: 100,
level: 2, heap_top: 300,
garbage: 0,
n_recs: 2,
prev: FIL_NULL,
next: FIL_NULL,
},
IndexPageSnapshot {
page_number: 4,
index_id: 100,
level: 1,
heap_top: 500,
garbage: 0,
n_recs: 5,
prev: FIL_NULL,
next: FIL_NULL,
},
IndexPageSnapshot {
page_number: 5,
index_id: 100,
level: 0,
heap_top: 8000,
garbage: 0,
n_recs: 50,
prev: FIL_NULL,
next: 6,
},
IndexPageSnapshot {
page_number: 6,
index_id: 100,
level: 0,
heap_top: 8000,
garbage: 0,
n_recs: 50,
prev: 5,
next: FIL_NULL,
},
];
let report = analyze_health(snapshots, 16384, 10, 0, "multi.ibd");
let idx = &report.indexes[0];
assert_eq!(idx.tree_depth, 3); assert_eq!(idx.leaf_pages, 2);
assert_eq!(idx.non_leaf_pages, 2);
assert_eq!(idx.total_records, 107);
}
fn make_health(fill: f64, garbage: f64, frag: f64) -> IndexHealth {
IndexHealth {
index_id: 1,
index_name: None,
tree_depth: 1,
total_pages: 100,
leaf_pages: 90,
non_leaf_pages: 10,
total_records: 5000,
avg_fill_factor: fill,
min_fill_factor: fill,
max_fill_factor: fill,
avg_garbage_ratio: garbage,
total_garbage_bytes: 0,
fragmentation: frag,
empty_leaf_pages: 0,
delete_marked_records: None,
total_walked_records: None,
bloat: None,
cardinality: None,
}
}
#[test]
fn test_bloat_weights_sum_to_one() {
let total = W_FILL + W_GARBAGE + W_FRAG + W_DELETE;
assert!((total - 1.0).abs() < 0.001);
}
#[test]
fn test_score_bloat_healthy() {
let h = make_health(0.95, 0.01, 0.02);
let s = score_bloat(&h, 0.0);
assert_eq!(s.grade, BloatGrade::A);
assert!(s.score < 0.10);
assert!(s.recommendation.is_none());
}
#[test]
fn test_score_bloat_moderate() {
let h = make_health(0.60, 0.20, 0.30);
let s = score_bloat(&h, 0.15);
assert!(
s.grade == BloatGrade::C || s.grade == BloatGrade::D,
"Expected C or D, got {:?} (score={})",
s.grade,
s.score
);
assert!(s.recommendation.is_some());
}
#[test]
fn test_score_bloat_severe() {
let h = make_health(0.20, 0.60, 0.80);
let s = score_bloat(&h, 0.70);
assert_eq!(s.grade, BloatGrade::F);
assert!(s.score >= 0.50);
assert!(s.recommendation.is_some());
}
#[test]
fn test_bloat_grade_boundaries() {
let h_a = make_health(0.98, 0.0, 0.0);
assert_eq!(score_bloat(&h_a, 0.0).grade, BloatGrade::A);
let h_b = make_health(0.80, 0.10, 0.10);
let s_b = score_bloat(&h_b, 0.05);
assert!(
s_b.grade == BloatGrade::B || s_b.grade == BloatGrade::A,
"Expected A or B, got {:?} (score={})",
s_b.grade,
s_b.score
);
}
#[test]
fn test_score_bloat_no_deletes() {
let h = make_health(0.80, 0.10, 0.20);
let s = score_bloat(&h, 0.0);
assert_eq!(s.components.delete_mark_ratio, 0.0);
assert!(s.score > 0.0);
}
#[test]
fn test_cardinality_empty_pages() {
assert!(true); }
}