use serde::Serialize;
use crate::innodb::export::{decode_page_records, extract_column_layout, extract_table_name};
use crate::innodb::field_decode::{self, ColumnStorageInfo, FieldValue};
use crate::innodb::index::IndexHeader;
use crate::innodb::page::FilHeader;
use crate::innodb::page_types::PageType;
use crate::innodb::record::{CompactRecordHeader, RecordType};
use crate::innodb::schema::SdiEnvelope;
use crate::innodb::sdi;
use crate::innodb::tablespace::Tablespace;
use crate::innodb::undo::{parse_undo_records, UndoRecordType, UndoState};
use crate::IdbError;
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum RecoverySource {
DeleteMarked,
FreeList,
UndoLog,
}
#[derive(Debug, Clone, Serialize)]
pub struct UndeletedRecord {
pub source: RecoverySource,
pub confidence: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub trx_id: Option<u64>,
pub page_number: u64,
pub offset: usize,
pub columns: Vec<(String, FieldValue)>,
#[serde(skip_serializing_if = "Option::is_none")]
pub raw_hex: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct UndeleteSummary {
pub total: usize,
pub delete_marked: usize,
pub free_list: usize,
pub undo_log: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct UndeleteScanResult {
#[serde(skip_serializing_if = "Option::is_none")]
pub table_name: Option<String>,
pub column_names: Vec<String>,
pub records: Vec<UndeletedRecord>,
pub summary: UndeleteSummary,
}
pub fn scan_delete_marked_records(
page_data: &[u8],
page_number: u64,
columns: &[ColumnStorageInfo],
page_size: u32,
) -> Vec<UndeletedRecord> {
let rows = decode_page_records(page_data, columns, true, true, page_size);
rows.into_iter()
.map(|row| {
let trx_id = row.iter().find_map(|(name, val)| {
if name == "DB_TRX_ID" {
match val {
FieldValue::Uint(v) => Some(*v),
FieldValue::Int(v) => Some(*v as u64),
FieldValue::Hex(h) => {
u64::from_str_radix(h.trim_start_matches("0x"), 16).ok()
}
_ => None,
}
} else {
None
}
});
let user_columns: Vec<(String, FieldValue)> = row
.into_iter()
.filter(|(name, _)| name != "DB_TRX_ID" && name != "DB_ROLL_PTR")
.collect();
UndeletedRecord {
source: RecoverySource::DeleteMarked,
confidence: 1.0,
trx_id,
page_number,
offset: 0,
columns: user_columns,
raw_hex: None,
}
})
.collect()
}
pub fn scan_free_list_records(
page_data: &[u8],
page_number: u64,
columns: &[ColumnStorageInfo],
page_size: u32,
) -> Vec<UndeletedRecord> {
let mut records = Vec::new();
let idx_hdr = match IndexHeader::parse(page_data) {
Some(h) => h,
None => return records,
};
if idx_hdr.free == 0 || !idx_hdr.is_compact() {
return records;
}
let mut visited = std::collections::HashSet::new();
let mut offset = idx_hdr.free as usize;
let ps = page_size as usize;
let max_iterations = 10_000;
let mut iterations = 0;
while offset > 0 && offset < ps && iterations < max_iterations {
if !visited.insert(offset) {
break; }
iterations += 1;
if offset < 5 || offset >= ps {
break;
}
let hdr_start = offset - 5;
if hdr_start + 5 > page_data.len() {
break;
}
let hdr = match CompactRecordHeader::parse(&page_data[hdr_start..]) {
Some(h) => h,
None => break,
};
if matches!(hdr.rec_type, RecordType::Infimum | RecordType::Supremum) {
break;
}
let (decoded_cols, confidence, raw_hex) = attempt_field_decode(page_data, offset, columns);
if !decoded_cols.is_empty() || raw_hex.is_some() {
records.push(UndeletedRecord {
source: RecoverySource::FreeList,
confidence,
trx_id: None,
page_number,
offset,
columns: decoded_cols,
raw_hex,
});
}
let next_rel = hdr.next_offset;
if next_rel == 0 {
break;
}
let next_abs = offset as i64 + next_rel as i64;
if next_abs <= 0 || next_abs as usize >= ps {
break;
}
offset = next_abs as usize;
}
records
}
fn attempt_field_decode(
page_data: &[u8],
record_offset: usize,
columns: &[ColumnStorageInfo],
) -> (Vec<(String, FieldValue)>, f64, Option<String>) {
let n_nullable = columns.iter().filter(|c| c.is_nullable).count();
let n_variable = columns.iter().filter(|c| c.is_variable).count();
let (nulls, var_lengths) = match crate::innodb::record::read_variable_field_lengths(
page_data,
record_offset,
n_nullable,
n_variable,
) {
Some(r) => r,
None => {
let hex = hex_at_offset(page_data, record_offset, 64);
return (Vec::new(), 0.2, Some(hex));
}
};
let mut row = Vec::new();
let mut pos = record_offset;
let mut null_idx = 0;
let mut var_idx = 0;
let mut decoded_count = 0;
let mut total_user_cols = 0;
for col in columns {
if col.is_system_column {
if col.fixed_len > 0 {
pos += col.fixed_len;
}
continue;
}
total_user_cols += 1;
if col.is_nullable {
if null_idx < nulls.len() && nulls[null_idx] {
row.push((col.name.clone(), FieldValue::Null));
null_idx += 1;
decoded_count += 1;
continue;
}
null_idx += 1;
}
if col.is_variable {
let len = if var_idx < var_lengths.len() {
var_lengths[var_idx]
} else {
0
};
var_idx += 1;
if pos + len <= page_data.len() && len < 65536 {
let val = field_decode::decode_field(&page_data[pos..pos + len], col);
row.push((col.name.clone(), val));
pos += len;
decoded_count += 1;
} else {
row.push((col.name.clone(), FieldValue::Null));
}
} else {
let len = col.fixed_len;
if len > 0 && pos + len <= page_data.len() {
let val = field_decode::decode_field(&page_data[pos..pos + len], col);
row.push((col.name.clone(), val));
pos += len;
decoded_count += 1;
} else {
row.push((col.name.clone(), FieldValue::Null));
}
}
}
let confidence = if total_user_cols == 0 {
0.2
} else if decoded_count == total_user_cols {
0.7
} else if decoded_count > 0 {
0.4
} else {
0.2
};
(row, confidence, None)
}
fn hex_at_offset(data: &[u8], offset: usize, max_len: usize) -> String {
let end = (offset + max_len).min(data.len());
if offset >= data.len() {
return String::new();
}
data[offset..end]
.iter()
.map(|b| format!("{:02x}", b))
.collect::<Vec<_>>()
.join("")
}
pub fn scan_undo_for_deletes(
ts: &mut Tablespace,
target_table_id: u64,
pk_columns: &[ColumnStorageInfo],
) -> Result<Vec<UndeletedRecord>, IdbError> {
let mut records = Vec::new();
ts.for_each_page(|page_num, page_data| {
let hdr = match FilHeader::parse(page_data) {
Some(h) => h,
None => return Ok(()),
};
if hdr.page_type != PageType::UndoLog {
return Ok(());
}
let seg_state = crate::innodb::undo::UndoSegmentHeader::parse(page_data).map(|s| s.state);
let undo_recs = parse_undo_records(page_data);
for urec in &undo_recs {
if urec.record_type != UndoRecordType::DelMarkRec {
continue;
}
if urec.table_id != target_table_id {
continue;
}
let mut cols = Vec::new();
for (i, pk_bytes) in urec.pk_fields.iter().enumerate() {
let col_name = if i < pk_columns.len() {
pk_columns[i].name.clone()
} else {
format!("pk_{}", i)
};
let val = if i < pk_columns.len() {
field_decode::decode_field(pk_bytes, &pk_columns[i])
} else {
FieldValue::Hex(
pk_bytes
.iter()
.map(|b| format!("{:02x}", b))
.collect::<Vec<_>>()
.join(""),
)
};
cols.push((col_name, val));
}
let confidence = match seg_state {
Some(UndoState::Active) => 0.3,
Some(UndoState::Cached) | Some(UndoState::ToPurge) => 0.1,
_ => 0.2,
};
records.push(UndeletedRecord {
source: RecoverySource::UndoLog,
confidence,
trx_id: urec.trx_id,
page_number: page_num,
offset: urec.offset,
columns: cols,
raw_hex: None,
});
}
Ok(())
})?;
Ok(records)
}
pub fn extract_table_id(ts: &mut Tablespace) -> Option<u64> {
let sdi_pages = sdi::find_sdi_pages(ts).ok()?;
if sdi_pages.is_empty() {
return None;
}
let records = sdi::extract_sdi_from_pages(ts, &sdi_pages).ok()?;
for rec in &records {
if rec.sdi_type == 1 {
let envelope: SdiEnvelope = serde_json::from_str(&rec.data).ok()?;
if envelope.dd_object.se_private_id > 0 {
return Some(envelope.dd_object.se_private_id);
}
}
}
None
}
pub fn scan_undeleted(
ts: &mut Tablespace,
undo_ts: Option<&mut Tablespace>,
min_confidence: f64,
min_trx_id: Option<u64>,
target_page: Option<u64>,
) -> Result<UndeleteScanResult, IdbError> {
let table_name = extract_table_name(ts);
let (columns, clustered_index_id) = extract_column_layout(ts).ok_or_else(|| {
IdbError::Parse(
"Cannot extract column layout from SDI (pre-8.0 tablespace or missing SDI)".to_string(),
)
})?;
let page_size = ts.page_size();
let col_names: Vec<String> = columns
.iter()
.filter(|c| !c.is_system_column)
.map(|c| c.name.clone())
.collect();
let mut all_records = Vec::new();
let mut leaf_pages: Vec<(u64, Vec<u8>)> = Vec::new();
ts.for_each_page(|pn, pdata| {
if let Some(target) = target_page {
if pn != target {
return Ok(());
}
}
let hdr = match FilHeader::parse(pdata) {
Some(h) => h,
None => return Ok(()),
};
if hdr.page_type != PageType::Index {
return Ok(());
}
let idx_hdr = match IndexHeader::parse(pdata) {
Some(h) => h,
None => return Ok(()),
};
if idx_hdr.index_id != clustered_index_id || !idx_hdr.is_leaf() {
return Ok(());
}
leaf_pages.push((pn, pdata.to_vec()));
Ok(())
})?;
for (pn, pdata) in &leaf_pages {
let mut dm = scan_delete_marked_records(pdata, *pn, &columns, page_size);
all_records.append(&mut dm);
let mut fl = scan_free_list_records(pdata, *pn, &columns, page_size);
all_records.append(&mut fl);
}
if let Some(uts) = undo_ts {
let table_id = extract_table_id(ts);
if let Some(tid) = table_id {
let pk_cols: Vec<ColumnStorageInfo> = columns
.iter()
.filter(|c| !c.is_system_column && !c.is_nullable)
.take(1) .cloned()
.collect();
let mut undo_recs = scan_undo_for_deletes(uts, tid, &pk_cols)?;
all_records.append(&mut undo_recs);
}
}
all_records.retain(|r| r.confidence >= min_confidence);
if let Some(min_trx) = min_trx_id {
all_records.retain(|r| r.trx_id.is_some_and(|t| t >= min_trx));
}
all_records.sort_by(|a, b| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
.then(a.page_number.cmp(&b.page_number))
});
let summary = UndeleteSummary {
total: all_records.len(),
delete_marked: all_records
.iter()
.filter(|r| matches!(r.source, RecoverySource::DeleteMarked))
.count(),
free_list: all_records
.iter()
.filter(|r| matches!(r.source, RecoverySource::FreeList))
.count(),
undo_log: all_records
.iter()
.filter(|r| matches!(r.source, RecoverySource::UndoLog))
.count(),
};
Ok(UndeleteScanResult {
table_name,
column_names: col_names,
records: all_records,
summary,
})
}
pub fn scan_deleted_from_bytes(
data: &[u8],
target_page: Option<u64>,
) -> Result<Option<UndeleteScanResult>, IdbError> {
let mut ts = Tablespace::from_bytes(data.to_vec())?;
let table_name = extract_table_name(&mut ts);
let (columns, clustered_index_id) = match extract_column_layout(&mut ts) {
Some(pair) => pair,
None => return Ok(None),
};
let page_size = ts.page_size();
let col_names: Vec<String> = columns
.iter()
.filter(|c| !c.is_system_column)
.map(|c| c.name.clone())
.collect();
let mut all_records = Vec::new();
ts.for_each_page(|pn, pdata| {
if let Some(target) = target_page {
if pn != target {
return Ok(());
}
}
let hdr = match FilHeader::parse(pdata) {
Some(h) => h,
None => return Ok(()),
};
if hdr.page_type != PageType::Index {
return Ok(());
}
let idx_hdr = match IndexHeader::parse(pdata) {
Some(h) => h,
None => return Ok(()),
};
if idx_hdr.index_id != clustered_index_id || !idx_hdr.is_leaf() {
return Ok(());
}
let mut dm = scan_delete_marked_records(pdata, pn, &columns, page_size);
all_records.append(&mut dm);
let mut fl = scan_free_list_records(pdata, pn, &columns, page_size);
all_records.append(&mut fl);
Ok(())
})?;
all_records.sort_by(|a, b| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
.then(a.page_number.cmp(&b.page_number))
});
let summary = UndeleteSummary {
total: all_records.len(),
delete_marked: all_records
.iter()
.filter(|r| matches!(r.source, RecoverySource::DeleteMarked))
.count(),
free_list: all_records
.iter()
.filter(|r| matches!(r.source, RecoverySource::FreeList))
.count(),
undo_log: 0,
};
Ok(Some(UndeleteScanResult {
table_name,
column_names: col_names,
records: all_records,
summary,
}))
}
pub fn field_value_to_sql(val: &FieldValue) -> String {
match val {
FieldValue::Null => "NULL".to_string(),
FieldValue::Int(n) => n.to_string(),
FieldValue::Uint(n) => n.to_string(),
FieldValue::Float(f) => f.to_string(),
FieldValue::Double(d) => d.to_string(),
FieldValue::Str(s) => format!("'{}'", s.replace('\'', "''")),
FieldValue::Hex(h) => format!("X'{}'", h),
}
}
pub fn field_value_to_json(val: &FieldValue) -> serde_json::Value {
match val {
FieldValue::Null => serde_json::Value::Null,
FieldValue::Int(n) => serde_json::json!(*n),
FieldValue::Uint(n) => serde_json::json!(*n),
FieldValue::Float(f) => serde_json::json!(*f),
FieldValue::Double(d) => serde_json::json!(*d),
FieldValue::Str(s) => serde_json::json!(s),
FieldValue::Hex(h) => serde_json::json!(h),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_recovery_source_serialization() {
let json = serde_json::to_string(&RecoverySource::DeleteMarked).unwrap();
assert_eq!(json, "\"delete_marked\"");
}
#[test]
fn test_undelete_summary_serialization() {
let summary = UndeleteSummary {
total: 5,
delete_marked: 3,
free_list: 2,
undo_log: 0,
};
let json = serde_json::to_string(&summary).unwrap();
assert!(json.contains("\"total\":5"));
assert!(json.contains("\"delete_marked\":3"));
}
#[test]
fn test_undeleted_record_serialization() {
let rec = UndeletedRecord {
source: RecoverySource::FreeList,
confidence: 0.7,
trx_id: Some(42),
page_number: 4,
offset: 200,
columns: vec![
("id".to_string(), FieldValue::Int(1)),
("name".to_string(), FieldValue::Str("test".to_string())),
],
raw_hex: None,
};
let json = serde_json::to_string(&rec).unwrap();
assert!(json.contains("\"free_list\""));
assert!(json.contains("\"confidence\":0.7"));
assert!(!json.contains("raw_hex")); }
#[test]
fn test_field_value_to_sql() {
assert_eq!(field_value_to_sql(&FieldValue::Null), "NULL");
assert_eq!(field_value_to_sql(&FieldValue::Int(42)), "42");
assert_eq!(
field_value_to_sql(&FieldValue::Str("hello".into())),
"'hello'"
);
assert_eq!(
field_value_to_sql(&FieldValue::Str("it's".into())),
"'it''s'"
);
assert_eq!(
field_value_to_sql(&FieldValue::Hex("DEADBEEF".into())),
"X'DEADBEEF'"
);
}
#[test]
fn test_field_value_to_json() {
assert_eq!(
field_value_to_json(&FieldValue::Null),
serde_json::Value::Null
);
assert_eq!(
field_value_to_json(&FieldValue::Int(42)),
serde_json::json!(42)
);
assert_eq!(
field_value_to_json(&FieldValue::Str("test".into())),
serde_json::json!("test")
);
}
#[test]
fn test_hex_at_offset() {
let data = [0xDE, 0xAD, 0xBE, 0xEF];
assert_eq!(hex_at_offset(&data, 0, 4), "deadbeef");
assert_eq!(hex_at_offset(&data, 2, 10), "beef");
assert_eq!(hex_at_offset(&data, 10, 4), "");
}
#[test]
fn test_scan_delete_marked_empty_page() {
let page = vec![0u8; 16384];
let cols = vec![];
let result = scan_delete_marked_records(&page, 0, &cols, 16384);
assert!(result.is_empty());
}
#[test]
fn test_scan_free_list_no_index() {
let page = vec![0u8; 16384];
let cols = vec![];
let result = scan_free_list_records(&page, 0, &cols, 16384);
assert!(result.is_empty());
}
#[test]
fn test_scan_result_full_serialization() {
let result = UndeleteScanResult {
table_name: Some("users".to_string()),
column_names: vec!["id".to_string(), "name".to_string()],
records: vec![],
summary: UndeleteSummary {
total: 0,
delete_marked: 0,
free_list: 0,
undo_log: 0,
},
};
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("\"table_name\":\"users\""));
assert!(json.contains("\"column_names\""));
}
}