1use serde::Serialize;
17
18use crate::innodb::export::{decode_page_records, extract_column_layout, extract_table_name};
19use crate::innodb::field_decode::{self, ColumnStorageInfo, FieldValue};
20use crate::innodb::index::IndexHeader;
21use crate::innodb::page::FilHeader;
22use crate::innodb::page_types::PageType;
23use crate::innodb::record::{CompactRecordHeader, RecordType};
24use crate::innodb::schema::SdiEnvelope;
25use crate::innodb::sdi;
26use crate::innodb::tablespace::Tablespace;
27use crate::innodb::undo::{parse_undo_records, UndoRecordType, UndoState};
28use crate::IdbError;
29
30#[derive(Debug, Clone, Serialize)]
36#[serde(rename_all = "snake_case")]
37pub enum RecoverySource {
38 DeleteMarked,
40 FreeList,
42 UndoLog,
44}
45
46#[derive(Debug, Clone, Serialize)]
48pub struct UndeletedRecord {
49 pub source: RecoverySource,
51 pub confidence: f64,
53 #[serde(skip_serializing_if = "Option::is_none")]
55 pub trx_id: Option<u64>,
56 pub page_number: u64,
58 pub offset: usize,
60 pub columns: Vec<(String, FieldValue)>,
62 #[serde(skip_serializing_if = "Option::is_none")]
64 pub raw_hex: Option<String>,
65}
66
67#[derive(Debug, Clone, Serialize)]
69pub struct UndeleteSummary {
70 pub total: usize,
72 pub delete_marked: usize,
74 pub free_list: usize,
76 pub undo_log: usize,
78}
79
80#[derive(Debug, Clone, Serialize)]
82pub struct UndeleteScanResult {
83 #[serde(skip_serializing_if = "Option::is_none")]
85 pub table_name: Option<String>,
86 pub column_names: Vec<String>,
88 pub records: Vec<UndeletedRecord>,
90 pub summary: UndeleteSummary,
92}
93
94pub fn scan_delete_marked_records(
103 page_data: &[u8],
104 page_number: u64,
105 columns: &[ColumnStorageInfo],
106 page_size: u32,
107) -> Vec<UndeletedRecord> {
108 let rows = decode_page_records(page_data, columns, true, true, page_size);
109
110 rows.into_iter()
111 .map(|row| {
112 let trx_id = row.iter().find_map(|(name, val)| {
114 if name == "DB_TRX_ID" {
115 match val {
116 FieldValue::Uint(v) => Some(*v),
117 FieldValue::Int(v) => Some(*v as u64),
118 FieldValue::Hex(h) => {
119 u64::from_str_radix(h.trim_start_matches("0x"), 16).ok()
120 }
121 _ => None,
122 }
123 } else {
124 None
125 }
126 });
127
128 let user_columns: Vec<(String, FieldValue)> = row
130 .into_iter()
131 .filter(|(name, _)| name != "DB_TRX_ID" && name != "DB_ROLL_PTR")
132 .collect();
133
134 UndeletedRecord {
135 source: RecoverySource::DeleteMarked,
136 confidence: 1.0,
137 trx_id,
138 page_number,
139 offset: 0,
140 columns: user_columns,
141 raw_hex: None,
142 }
143 })
144 .collect()
145}
146
147pub fn scan_free_list_records(
157 page_data: &[u8],
158 page_number: u64,
159 columns: &[ColumnStorageInfo],
160 page_size: u32,
161) -> Vec<UndeletedRecord> {
162 let mut records = Vec::new();
163
164 let idx_hdr = match IndexHeader::parse(page_data) {
165 Some(h) => h,
166 None => return records,
167 };
168
169 if idx_hdr.free == 0 || !idx_hdr.is_compact() {
170 return records;
171 }
172
173 let mut visited = std::collections::HashSet::new();
174 let mut offset = idx_hdr.free as usize;
175 let ps = page_size as usize;
176 let max_iterations = 10_000;
177 let mut iterations = 0;
178
179 while offset > 0 && offset < ps && iterations < max_iterations {
180 if !visited.insert(offset) {
181 break; }
183 iterations += 1;
184
185 if offset < 5 || offset >= ps {
187 break;
188 }
189 let hdr_start = offset - 5;
190 if hdr_start + 5 > page_data.len() {
191 break;
192 }
193
194 let hdr = match CompactRecordHeader::parse(&page_data[hdr_start..]) {
195 Some(h) => h,
196 None => break,
197 };
198
199 if matches!(hdr.rec_type, RecordType::Infimum | RecordType::Supremum) {
202 break;
203 }
204
205 let (decoded_cols, confidence, raw_hex) = attempt_field_decode(page_data, offset, columns);
207
208 if !decoded_cols.is_empty() || raw_hex.is_some() {
209 records.push(UndeletedRecord {
210 source: RecoverySource::FreeList,
211 confidence,
212 trx_id: None,
213 page_number,
214 offset,
215 columns: decoded_cols,
216 raw_hex,
217 });
218 }
219
220 let next_rel = hdr.next_offset;
222 if next_rel == 0 {
223 break;
224 }
225 let next_abs = offset as i64 + next_rel as i64;
226 if next_abs <= 0 || next_abs as usize >= ps {
227 break;
228 }
229 offset = next_abs as usize;
230 }
231
232 records
233}
234
235fn attempt_field_decode(
237 page_data: &[u8],
238 record_offset: usize,
239 columns: &[ColumnStorageInfo],
240) -> (Vec<(String, FieldValue)>, f64, Option<String>) {
241 let n_nullable = columns.iter().filter(|c| c.is_nullable).count();
242 let n_variable = columns.iter().filter(|c| c.is_variable).count();
243
244 let (nulls, var_lengths) = match crate::innodb::record::read_variable_field_lengths(
245 page_data,
246 record_offset,
247 n_nullable,
248 n_variable,
249 ) {
250 Some(r) => r,
251 None => {
252 let hex = hex_at_offset(page_data, record_offset, 64);
254 return (Vec::new(), 0.2, Some(hex));
255 }
256 };
257
258 let mut row = Vec::new();
259 let mut pos = record_offset;
260 let mut null_idx = 0;
261 let mut var_idx = 0;
262 let mut decoded_count = 0;
263 let mut total_user_cols = 0;
264
265 for col in columns {
266 if col.is_system_column {
267 if col.fixed_len > 0 {
268 pos += col.fixed_len;
269 }
270 continue;
271 }
272 total_user_cols += 1;
273
274 if col.is_nullable {
275 if null_idx < nulls.len() && nulls[null_idx] {
276 row.push((col.name.clone(), FieldValue::Null));
277 null_idx += 1;
278 decoded_count += 1;
279 continue;
280 }
281 null_idx += 1;
282 }
283
284 if col.is_variable {
285 let len = if var_idx < var_lengths.len() {
286 var_lengths[var_idx]
287 } else {
288 0
289 };
290 var_idx += 1;
291
292 if pos + len <= page_data.len() && len < 65536 {
293 let val = field_decode::decode_field(&page_data[pos..pos + len], col);
294 row.push((col.name.clone(), val));
295 pos += len;
296 decoded_count += 1;
297 } else {
298 row.push((col.name.clone(), FieldValue::Null));
299 }
300 } else {
301 let len = col.fixed_len;
302 if len > 0 && pos + len <= page_data.len() {
303 let val = field_decode::decode_field(&page_data[pos..pos + len], col);
304 row.push((col.name.clone(), val));
305 pos += len;
306 decoded_count += 1;
307 } else {
308 row.push((col.name.clone(), FieldValue::Null));
309 }
310 }
311 }
312
313 let confidence = if total_user_cols == 0 {
314 0.2
315 } else if decoded_count == total_user_cols {
316 0.7
317 } else if decoded_count > 0 {
318 0.4
319 } else {
320 0.2
321 };
322
323 (row, confidence, None)
324}
325
326fn hex_at_offset(data: &[u8], offset: usize, max_len: usize) -> String {
328 let end = (offset + max_len).min(data.len());
329 if offset >= data.len() {
330 return String::new();
331 }
332 data[offset..end]
333 .iter()
334 .map(|b| format!("{:02x}", b))
335 .collect::<Vec<_>>()
336 .join("")
337}
338
339pub fn scan_undo_for_deletes(
347 ts: &mut Tablespace,
348 target_table_id: u64,
349 pk_columns: &[ColumnStorageInfo],
350) -> Result<Vec<UndeletedRecord>, IdbError> {
351 let mut records = Vec::new();
352
353 ts.for_each_page(|page_num, page_data| {
354 let hdr = match FilHeader::parse(page_data) {
355 Some(h) => h,
356 None => return Ok(()),
357 };
358
359 if hdr.page_type != PageType::UndoLog {
360 return Ok(());
361 }
362
363 let seg_state = crate::innodb::undo::UndoSegmentHeader::parse(page_data).map(|s| s.state);
365
366 let undo_recs = parse_undo_records(page_data);
367
368 for urec in &undo_recs {
369 if urec.record_type != UndoRecordType::DelMarkRec {
370 continue;
371 }
372 if urec.table_id != target_table_id {
373 continue;
374 }
375
376 let mut cols = Vec::new();
378 for (i, pk_bytes) in urec.pk_fields.iter().enumerate() {
379 let col_name = if i < pk_columns.len() {
380 pk_columns[i].name.clone()
381 } else {
382 format!("pk_{}", i)
383 };
384
385 let val = if i < pk_columns.len() {
386 field_decode::decode_field(pk_bytes, &pk_columns[i])
387 } else {
388 FieldValue::Hex(
389 pk_bytes
390 .iter()
391 .map(|b| format!("{:02x}", b))
392 .collect::<Vec<_>>()
393 .join(""),
394 )
395 };
396
397 cols.push((col_name, val));
398 }
399
400 let confidence = match seg_state {
401 Some(UndoState::Active) => 0.3,
402 Some(UndoState::Cached) | Some(UndoState::ToPurge) => 0.1,
403 _ => 0.2,
404 };
405
406 records.push(UndeletedRecord {
407 source: RecoverySource::UndoLog,
408 confidence,
409 trx_id: urec.trx_id,
410 page_number: page_num,
411 offset: urec.offset,
412 columns: cols,
413 raw_hex: None,
414 });
415 }
416
417 Ok(())
418 })?;
419
420 Ok(records)
421}
422
423pub fn extract_table_id(ts: &mut Tablespace) -> Option<u64> {
429 let sdi_pages = sdi::find_sdi_pages(ts).ok()?;
430 if sdi_pages.is_empty() {
431 return None;
432 }
433 let records = sdi::extract_sdi_from_pages(ts, &sdi_pages).ok()?;
434
435 for rec in &records {
436 if rec.sdi_type == 1 {
437 let envelope: SdiEnvelope = serde_json::from_str(&rec.data).ok()?;
438 if envelope.dd_object.se_private_id > 0 {
439 return Some(envelope.dd_object.se_private_id);
440 }
441 }
442 }
443 None
444}
445
446pub fn scan_undeleted(
457 ts: &mut Tablespace,
458 undo_ts: Option<&mut Tablespace>,
459 min_confidence: f64,
460 min_trx_id: Option<u64>,
461 target_page: Option<u64>,
462) -> Result<UndeleteScanResult, IdbError> {
463 let table_name = extract_table_name(ts);
464
465 let (columns, clustered_index_id) = extract_column_layout(ts).ok_or_else(|| {
467 IdbError::Parse(
468 "Cannot extract column layout from SDI (pre-8.0 tablespace or missing SDI)".to_string(),
469 )
470 })?;
471
472 let page_size = ts.page_size();
473 let col_names: Vec<String> = columns
474 .iter()
475 .filter(|c| !c.is_system_column)
476 .map(|c| c.name.clone())
477 .collect();
478
479 let mut all_records = Vec::new();
480
481 let mut leaf_pages: Vec<(u64, Vec<u8>)> = Vec::new();
483 ts.for_each_page(|pn, pdata| {
484 if let Some(target) = target_page {
485 if pn != target {
486 return Ok(());
487 }
488 }
489 let hdr = match FilHeader::parse(pdata) {
490 Some(h) => h,
491 None => return Ok(()),
492 };
493 if hdr.page_type != PageType::Index {
494 return Ok(());
495 }
496 let idx_hdr = match IndexHeader::parse(pdata) {
497 Some(h) => h,
498 None => return Ok(()),
499 };
500 if idx_hdr.index_id != clustered_index_id || !idx_hdr.is_leaf() {
501 return Ok(());
502 }
503 leaf_pages.push((pn, pdata.to_vec()));
504 Ok(())
505 })?;
506
507 for (pn, pdata) in &leaf_pages {
509 let mut dm = scan_delete_marked_records(pdata, *pn, &columns, page_size);
510 all_records.append(&mut dm);
511
512 let mut fl = scan_free_list_records(pdata, *pn, &columns, page_size);
513 all_records.append(&mut fl);
514 }
515
516 if let Some(uts) = undo_ts {
518 let table_id = extract_table_id(ts);
519 if let Some(tid) = table_id {
520 let pk_cols: Vec<ColumnStorageInfo> = columns
522 .iter()
523 .filter(|c| !c.is_system_column && !c.is_nullable)
524 .take(1) .cloned()
526 .collect();
527
528 let mut undo_recs = scan_undo_for_deletes(uts, tid, &pk_cols)?;
529 all_records.append(&mut undo_recs);
530 }
531 }
532
533 all_records.retain(|r| r.confidence >= min_confidence);
535 if let Some(min_trx) = min_trx_id {
536 all_records.retain(|r| r.trx_id.is_some_and(|t| t >= min_trx));
537 }
538
539 all_records.sort_by(|a, b| {
541 b.confidence
542 .partial_cmp(&a.confidence)
543 .unwrap_or(std::cmp::Ordering::Equal)
544 .then(a.page_number.cmp(&b.page_number))
545 });
546
547 let summary = UndeleteSummary {
548 total: all_records.len(),
549 delete_marked: all_records
550 .iter()
551 .filter(|r| matches!(r.source, RecoverySource::DeleteMarked))
552 .count(),
553 free_list: all_records
554 .iter()
555 .filter(|r| matches!(r.source, RecoverySource::FreeList))
556 .count(),
557 undo_log: all_records
558 .iter()
559 .filter(|r| matches!(r.source, RecoverySource::UndoLog))
560 .count(),
561 };
562
563 Ok(UndeleteScanResult {
564 table_name,
565 column_names: col_names,
566 records: all_records,
567 summary,
568 })
569}
570
571pub fn scan_deleted_from_bytes(
580 data: &[u8],
581 target_page: Option<u64>,
582) -> Result<Option<UndeleteScanResult>, IdbError> {
583 let mut ts = Tablespace::from_bytes(data.to_vec())?;
584
585 let table_name = extract_table_name(&mut ts);
586
587 let (columns, clustered_index_id) = match extract_column_layout(&mut ts) {
588 Some(pair) => pair,
589 None => return Ok(None),
590 };
591
592 let page_size = ts.page_size();
593 let col_names: Vec<String> = columns
594 .iter()
595 .filter(|c| !c.is_system_column)
596 .map(|c| c.name.clone())
597 .collect();
598
599 let mut all_records = Vec::new();
600
601 ts.for_each_page(|pn, pdata| {
602 if let Some(target) = target_page {
603 if pn != target {
604 return Ok(());
605 }
606 }
607 let hdr = match FilHeader::parse(pdata) {
608 Some(h) => h,
609 None => return Ok(()),
610 };
611 if hdr.page_type != PageType::Index {
612 return Ok(());
613 }
614 let idx_hdr = match IndexHeader::parse(pdata) {
615 Some(h) => h,
616 None => return Ok(()),
617 };
618 if idx_hdr.index_id != clustered_index_id || !idx_hdr.is_leaf() {
619 return Ok(());
620 }
621
622 let mut dm = scan_delete_marked_records(pdata, pn, &columns, page_size);
623 all_records.append(&mut dm);
624
625 let mut fl = scan_free_list_records(pdata, pn, &columns, page_size);
626 all_records.append(&mut fl);
627
628 Ok(())
629 })?;
630
631 all_records.sort_by(|a, b| {
632 b.confidence
633 .partial_cmp(&a.confidence)
634 .unwrap_or(std::cmp::Ordering::Equal)
635 .then(a.page_number.cmp(&b.page_number))
636 });
637
638 let summary = UndeleteSummary {
639 total: all_records.len(),
640 delete_marked: all_records
641 .iter()
642 .filter(|r| matches!(r.source, RecoverySource::DeleteMarked))
643 .count(),
644 free_list: all_records
645 .iter()
646 .filter(|r| matches!(r.source, RecoverySource::FreeList))
647 .count(),
648 undo_log: 0,
649 };
650
651 Ok(Some(UndeleteScanResult {
652 table_name,
653 column_names: col_names,
654 records: all_records,
655 summary,
656 }))
657}
658
659pub fn field_value_to_sql(val: &FieldValue) -> String {
665 match val {
666 FieldValue::Null => "NULL".to_string(),
667 FieldValue::Int(n) => n.to_string(),
668 FieldValue::Uint(n) => n.to_string(),
669 FieldValue::Float(f) => f.to_string(),
670 FieldValue::Double(d) => d.to_string(),
671 FieldValue::Str(s) => format!("'{}'", s.replace('\'', "''")),
672 FieldValue::Hex(h) => format!("X'{}'", h),
673 }
674}
675
676pub fn field_value_to_json(val: &FieldValue) -> serde_json::Value {
678 match val {
679 FieldValue::Null => serde_json::Value::Null,
680 FieldValue::Int(n) => serde_json::json!(*n),
681 FieldValue::Uint(n) => serde_json::json!(*n),
682 FieldValue::Float(f) => serde_json::json!(*f),
683 FieldValue::Double(d) => serde_json::json!(*d),
684 FieldValue::Str(s) => serde_json::json!(s),
685 FieldValue::Hex(h) => serde_json::json!(h),
686 }
687}
688
689#[cfg(test)]
690mod tests {
691 use super::*;
692
693 #[test]
694 fn test_recovery_source_serialization() {
695 let json = serde_json::to_string(&RecoverySource::DeleteMarked).unwrap();
696 assert_eq!(json, "\"delete_marked\"");
697 }
698
699 #[test]
700 fn test_undelete_summary_serialization() {
701 let summary = UndeleteSummary {
702 total: 5,
703 delete_marked: 3,
704 free_list: 2,
705 undo_log: 0,
706 };
707 let json = serde_json::to_string(&summary).unwrap();
708 assert!(json.contains("\"total\":5"));
709 assert!(json.contains("\"delete_marked\":3"));
710 }
711
712 #[test]
713 fn test_undeleted_record_serialization() {
714 let rec = UndeletedRecord {
715 source: RecoverySource::FreeList,
716 confidence: 0.7,
717 trx_id: Some(42),
718 page_number: 4,
719 offset: 200,
720 columns: vec![
721 ("id".to_string(), FieldValue::Int(1)),
722 ("name".to_string(), FieldValue::Str("test".to_string())),
723 ],
724 raw_hex: None,
725 };
726 let json = serde_json::to_string(&rec).unwrap();
727 assert!(json.contains("\"free_list\""));
728 assert!(json.contains("\"confidence\":0.7"));
729 assert!(!json.contains("raw_hex")); }
731
732 #[test]
733 fn test_field_value_to_sql() {
734 assert_eq!(field_value_to_sql(&FieldValue::Null), "NULL");
735 assert_eq!(field_value_to_sql(&FieldValue::Int(42)), "42");
736 assert_eq!(
737 field_value_to_sql(&FieldValue::Str("hello".into())),
738 "'hello'"
739 );
740 assert_eq!(
741 field_value_to_sql(&FieldValue::Str("it's".into())),
742 "'it''s'"
743 );
744 assert_eq!(
745 field_value_to_sql(&FieldValue::Hex("DEADBEEF".into())),
746 "X'DEADBEEF'"
747 );
748 }
749
750 #[test]
751 fn test_field_value_to_json() {
752 assert_eq!(
753 field_value_to_json(&FieldValue::Null),
754 serde_json::Value::Null
755 );
756 assert_eq!(
757 field_value_to_json(&FieldValue::Int(42)),
758 serde_json::json!(42)
759 );
760 assert_eq!(
761 field_value_to_json(&FieldValue::Str("test".into())),
762 serde_json::json!("test")
763 );
764 }
765
766 #[test]
767 fn test_hex_at_offset() {
768 let data = [0xDE, 0xAD, 0xBE, 0xEF];
769 assert_eq!(hex_at_offset(&data, 0, 4), "deadbeef");
770 assert_eq!(hex_at_offset(&data, 2, 10), "beef");
771 assert_eq!(hex_at_offset(&data, 10, 4), "");
772 }
773
774 #[test]
775 fn test_scan_delete_marked_empty_page() {
776 let page = vec![0u8; 16384];
778 let cols = vec![];
779 let result = scan_delete_marked_records(&page, 0, &cols, 16384);
780 assert!(result.is_empty());
781 }
782
783 #[test]
784 fn test_scan_free_list_no_index() {
785 let page = vec![0u8; 16384];
787 let cols = vec![];
788 let result = scan_free_list_records(&page, 0, &cols, 16384);
789 assert!(result.is_empty());
790 }
791
792 #[test]
793 fn test_scan_result_full_serialization() {
794 let result = UndeleteScanResult {
795 table_name: Some("users".to_string()),
796 column_names: vec!["id".to_string(), "name".to_string()],
797 records: vec![],
798 summary: UndeleteSummary {
799 total: 0,
800 delete_marked: 0,
801 free_list: 0,
802 undo_log: 0,
803 },
804 };
805 let json = serde_json::to_string(&result).unwrap();
806 assert!(json.contains("\"table_name\":\"users\""));
807 assert!(json.contains("\"column_names\""));
808 }
809}