1use std::io::Write;
2use std::sync::Arc;
3
4use byteorder::{BigEndian, ByteOrder};
5use colored::Colorize;
6use serde::Serialize;
7
8use crate::cli::wprintln;
9use crate::innodb::checksum::{validate_checksum, ChecksumAlgorithm};
10use crate::innodb::constants::*;
11use crate::innodb::page::FilHeader;
12use crate::innodb::page_types::PageType;
13use crate::innodb::write;
14use crate::util::audit::AuditLogger;
15use crate::IdbError;
16
17pub struct DefragOptions {
19 pub file: String,
21 pub output: String,
23 pub verbose: bool,
25 pub json: bool,
27 pub page_size: Option<u32>,
29 pub keyring: Option<String>,
31 pub mmap: bool,
33 pub audit_logger: Option<Arc<AuditLogger>>,
35}
36
37#[derive(Serialize)]
38struct DefragReport {
39 source: String,
40 output: String,
41 source_pages: u64,
42 output_pages: u64,
43 index_pages: u64,
44 empty_removed: u64,
45 corrupt_removed: u64,
46 post_validation: PostValidation,
47}
48
49#[derive(Serialize)]
50struct PostValidation {
51 total: u64,
52 valid: u64,
53}
54
55struct IndexPageInfo {
57 original_page_num: u64,
58 index_id: u64,
59 level: u16,
60 data: Vec<u8>,
61}
62
63pub fn execute(opts: &DefragOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
66 let mut ts = crate::cli::open_tablespace(&opts.file, opts.page_size, opts.mmap)?;
67
68 if let Some(ref keyring_path) = opts.keyring {
69 crate::cli::setup_decryption(&mut ts, keyring_path)?;
70 }
71
72 let page_size = ts.page_size();
73 let page_count = ts.page_count();
74 let vendor_info = ts.vendor_info().clone();
75 let all_data = ts.read_all_pages()?;
76 let ps = page_size as usize;
77
78 if !opts.json {
79 wprintln!(
80 writer,
81 "Defragmenting {} ({} pages)...",
82 opts.file,
83 page_count
84 )?;
85 }
86
87 let page0_data = if ps <= all_data.len() {
89 &all_data[..ps]
90 } else {
91 return Err(IdbError::Parse(
92 "File too small to contain page 0".to_string(),
93 ));
94 };
95
96 let space_id = BigEndian::read_u32(&page0_data[FIL_PAGE_SPACE_ID..]);
97 let fsp_offset = FIL_PAGE_DATA;
98 let flags = BigEndian::read_u32(&page0_data[fsp_offset + FSP_SPACE_FLAGS..]);
99
100 let algorithm = write::detect_algorithm(page0_data, page_size, Some(&vendor_info));
102 let algorithm = if algorithm == ChecksumAlgorithm::None {
103 ChecksumAlgorithm::Crc32c
104 } else {
105 algorithm
106 };
107
108 let mut index_pages: Vec<IndexPageInfo> = Vec::new();
110 let mut other_pages: Vec<Vec<u8>> = Vec::new();
111 let mut empty_removed = 0u64;
112 let mut corrupt_removed = 0u64;
113 let mut max_lsn = 0u64;
114
115 for page_num in 1..page_count {
116 let offset = page_num as usize * ps;
118 if offset + ps > all_data.len() {
119 break;
120 }
121 let page_data = &all_data[offset..offset + ps];
122
123 if page_data.iter().all(|&b| b == 0) {
125 empty_removed += 1;
126 continue;
127 }
128
129 let header = match FilHeader::parse(page_data) {
131 Some(h) => h,
132 None => {
133 corrupt_removed += 1;
134 continue;
135 }
136 };
137
138 let csum = validate_checksum(page_data, page_size, Some(&vendor_info));
140 if !csum.valid {
141 corrupt_removed += 1;
142 if opts.verbose && !opts.json {
143 wprintln!(
144 writer,
145 "Page {:>4}: {} (corrupt checksum)",
146 page_num,
147 "removed".red()
148 )?;
149 }
150 continue;
151 }
152
153 if header.lsn > max_lsn {
154 max_lsn = header.lsn;
155 }
156
157 if header.page_type == PageType::Index {
158 let ph = FIL_PAGE_DATA;
160 let index_id = BigEndian::read_u64(&page_data[ph + PAGE_INDEX_ID..]);
161 let level = BigEndian::read_u16(&page_data[ph + PAGE_LEVEL..]);
162
163 index_pages.push(IndexPageInfo {
164 original_page_num: page_num,
165 index_id,
166 level,
167 data: page_data.to_vec(),
168 });
169 } else {
170 other_pages.push(page_data.to_vec());
171 }
172 }
173
174 index_pages.sort_by(|a, b| {
176 a.index_id
177 .cmp(&b.index_id)
178 .then(a.level.cmp(&b.level))
179 .then(a.original_page_num.cmp(&b.original_page_num))
180 });
181
182 let total_output = 1 + index_pages.len() + other_pages.len();
184 let page0 = write::build_fsp_page(
185 space_id,
186 total_output as u32,
187 flags,
188 max_lsn,
189 page_size,
190 algorithm,
191 );
192
193 let mut output_pages: Vec<Vec<u8>> = Vec::with_capacity(total_output);
194 output_pages.push(page0);
195
196 let index_start_page = 1u32;
198 for (i, idx) in index_pages.iter_mut().enumerate() {
199 let new_page_num = index_start_page + i as u32;
200 BigEndian::write_u32(&mut idx.data[FIL_PAGE_OFFSET..], new_page_num);
201 }
202
203 let mut group_start = 0usize;
205 while group_start < index_pages.len() {
206 let group_index_id = index_pages[group_start].index_id;
207 let group_level = index_pages[group_start].level;
208
209 let mut group_end = group_start + 1;
211 while group_end < index_pages.len()
212 && index_pages[group_end].index_id == group_index_id
213 && index_pages[group_end].level == group_level
214 {
215 group_end += 1;
216 }
217
218 #[allow(clippy::needless_range_loop)]
220 for j in group_start..group_end {
221 let prev = if j == group_start {
222 FIL_NULL
223 } else {
224 index_start_page + j as u32 - 1
225 };
226 let next = if j == group_end - 1 {
227 FIL_NULL
228 } else {
229 index_start_page + j as u32 + 1
230 };
231 BigEndian::write_u32(&mut index_pages[j].data[FIL_PAGE_PREV..], prev);
232 BigEndian::write_u32(&mut index_pages[j].data[FIL_PAGE_NEXT..], next);
233 }
234
235 group_start = group_end;
236 }
237
238 for idx in &mut index_pages {
240 write::fix_page_checksum(&mut idx.data, page_size, algorithm);
241 output_pages.push(idx.data.clone());
242 }
243
244 let other_start_page = index_start_page + index_pages.len() as u32;
246 for (i, mut page) in other_pages.into_iter().enumerate() {
247 let new_page_num = other_start_page + i as u32;
248 BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], new_page_num);
249 BigEndian::write_u32(&mut page[FIL_PAGE_PREV..], FIL_NULL);
251 BigEndian::write_u32(&mut page[FIL_PAGE_NEXT..], FIL_NULL);
252 write::fix_page_checksum(&mut page, page_size, algorithm);
253 output_pages.push(page);
254 }
255
256 write::write_tablespace(&opts.output, &output_pages)?;
258 if let Some(ref logger) = opts.audit_logger {
259 let _ = logger.log_file_write(&opts.output, "defrag", output_pages.len() as u64);
260 }
261
262 let output_count = output_pages.len() as u64;
264 let mut valid_count = 0u64;
265 for i in 0..output_count {
266 let page = write::read_page_raw(&opts.output, i, page_size)?;
267 if validate_checksum(&page, page_size, Some(&vendor_info)).valid {
268 valid_count += 1;
269 }
270 }
271
272 let index_count = index_pages.len() as u64;
273
274 if opts.json {
275 let report = DefragReport {
276 source: opts.file.clone(),
277 output: opts.output.clone(),
278 source_pages: page_count,
279 output_pages: output_count,
280 index_pages: index_count,
281 empty_removed,
282 corrupt_removed,
283 post_validation: PostValidation {
284 total: output_count,
285 valid: valid_count,
286 },
287 };
288 let json = serde_json::to_string_pretty(&report)
289 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
290 wprintln!(writer, "{}", json)?;
291 } else {
292 wprintln!(writer)?;
293 wprintln!(writer, "Defrag Summary:")?;
294 wprintln!(
295 writer,
296 " Source: {} ({} pages)",
297 opts.file,
298 page_count
299 )?;
300 wprintln!(
301 writer,
302 " Output: {} ({} pages)",
303 opts.output,
304 output_count
305 )?;
306 wprintln!(writer, " INDEX pages: {}", index_count)?;
307 if empty_removed > 0 {
308 wprintln!(writer, " Empty removed: {}", empty_removed)?;
309 }
310 if corrupt_removed > 0 {
311 wprintln!(
312 writer,
313 " Corrupt removed: {}",
314 format!("{}", corrupt_removed).red()
315 )?;
316 }
317 wprintln!(
318 writer,
319 " Post-validation: {}/{} valid checksums",
320 valid_count,
321 output_count
322 )?;
323 }
324
325 Ok(())
326}