Skip to main content

idb/cli/
defrag.rs

1use std::io::Write;
2use std::sync::Arc;
3
4use byteorder::{BigEndian, ByteOrder};
5use colored::Colorize;
6use serde::Serialize;
7
8use crate::cli::wprintln;
9use crate::innodb::checksum::{validate_checksum, ChecksumAlgorithm};
10use crate::innodb::constants::*;
11use crate::innodb::page::FilHeader;
12use crate::innodb::page_types::PageType;
13use crate::innodb::write;
14use crate::util::audit::AuditLogger;
15use crate::IdbError;
16
17/// Options for the `inno defrag` subcommand.
18pub struct DefragOptions {
19    /// Path to the source InnoDB tablespace file (.ibd).
20    pub file: String,
21    /// Path to output file (always creates a new file).
22    pub output: String,
23    /// Show per-page details.
24    pub verbose: bool,
25    /// Emit output as JSON.
26    pub json: bool,
27    /// Override the auto-detected page size.
28    pub page_size: Option<u32>,
29    /// Path to MySQL keyring file for decrypting encrypted tablespaces.
30    pub keyring: Option<String>,
31    /// Use memory-mapped I/O for file access.
32    pub mmap: bool,
33    /// Audit logger for recording write operations.
34    pub audit_logger: Option<Arc<AuditLogger>>,
35}
36
37#[derive(Serialize)]
38struct DefragReport {
39    source: String,
40    output: String,
41    source_pages: u64,
42    output_pages: u64,
43    index_pages: u64,
44    empty_removed: u64,
45    corrupt_removed: u64,
46    post_validation: PostValidation,
47}
48
49#[derive(Serialize)]
50struct PostValidation {
51    total: u64,
52    valid: u64,
53}
54
55/// Information about an INDEX page used for sorting.
56struct IndexPageInfo {
57    original_page_num: u64,
58    index_id: u64,
59    level: u16,
60    data: Vec<u8>,
61}
62
63/// Defragment a tablespace: remove empty/corrupt pages, sort INDEX pages,
64/// fix prev/next chains, and write a clean output file.
65pub fn execute(opts: &DefragOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
66    let mut ts = crate::cli::open_tablespace(&opts.file, opts.page_size, opts.mmap)?;
67
68    if let Some(ref keyring_path) = opts.keyring {
69        crate::cli::setup_decryption(&mut ts, keyring_path)?;
70    }
71
72    let page_size = ts.page_size();
73    let page_count = ts.page_count();
74    let vendor_info = ts.vendor_info().clone();
75    let all_data = ts.read_all_pages()?;
76    let ps = page_size as usize;
77
78    if !opts.json {
79        wprintln!(
80            writer,
81            "Defragmenting {} ({} pages)...",
82            opts.file,
83            page_count
84        )?;
85    }
86
87    // Read page 0 metadata
88    let page0_data = if ps <= all_data.len() {
89        &all_data[..ps]
90    } else {
91        return Err(IdbError::Parse(
92            "File too small to contain page 0".to_string(),
93        ));
94    };
95
96    let space_id = BigEndian::read_u32(&page0_data[FIL_PAGE_SPACE_ID..]);
97    let fsp_offset = FIL_PAGE_DATA;
98    let flags = BigEndian::read_u32(&page0_data[fsp_offset + FSP_SPACE_FLAGS..]);
99
100    // Detect algorithm
101    let algorithm = write::detect_algorithm(page0_data, page_size, Some(&vendor_info));
102    let algorithm = if algorithm == ChecksumAlgorithm::None {
103        ChecksumAlgorithm::Crc32c
104    } else {
105        algorithm
106    };
107
108    // Classify pages
109    let mut index_pages: Vec<IndexPageInfo> = Vec::new();
110    let mut other_pages: Vec<Vec<u8>> = Vec::new();
111    let mut empty_removed = 0u64;
112    let mut corrupt_removed = 0u64;
113    let mut max_lsn = 0u64;
114
115    for page_num in 1..page_count {
116        // Skip page 0 — we'll build a new one
117        let offset = page_num as usize * ps;
118        if offset + ps > all_data.len() {
119            break;
120        }
121        let page_data = &all_data[offset..offset + ps];
122
123        // Empty page?
124        if page_data.iter().all(|&b| b == 0) {
125            empty_removed += 1;
126            continue;
127        }
128
129        // Parse header
130        let header = match FilHeader::parse(page_data) {
131            Some(h) => h,
132            None => {
133                corrupt_removed += 1;
134                continue;
135            }
136        };
137
138        // Validate checksum
139        let csum = validate_checksum(page_data, page_size, Some(&vendor_info));
140        if !csum.valid {
141            corrupt_removed += 1;
142            if opts.verbose && !opts.json {
143                wprintln!(
144                    writer,
145                    "Page {:>4}: {} (corrupt checksum)",
146                    page_num,
147                    "removed".red()
148                )?;
149            }
150            continue;
151        }
152
153        if header.lsn > max_lsn {
154            max_lsn = header.lsn;
155        }
156
157        if header.page_type == PageType::Index {
158            // Extract index_id and level from the INDEX page header
159            let ph = FIL_PAGE_DATA;
160            let index_id = BigEndian::read_u64(&page_data[ph + PAGE_INDEX_ID..]);
161            let level = BigEndian::read_u16(&page_data[ph + PAGE_LEVEL..]);
162
163            index_pages.push(IndexPageInfo {
164                original_page_num: page_num,
165                index_id,
166                level,
167                data: page_data.to_vec(),
168            });
169        } else {
170            other_pages.push(page_data.to_vec());
171        }
172    }
173
174    // Sort INDEX pages by (index_id, level, original_page_num)
175    index_pages.sort_by(|a, b| {
176        a.index_id
177            .cmp(&b.index_id)
178            .then(a.level.cmp(&b.level))
179            .then(a.original_page_num.cmp(&b.original_page_num))
180    });
181
182    // Build output: page 0 + INDEX pages (sorted) + other pages (original order)
183    let total_output = 1 + index_pages.len() + other_pages.len();
184    let page0 = write::build_fsp_page(
185        space_id,
186        total_output as u32,
187        flags,
188        max_lsn,
189        page_size,
190        algorithm,
191    );
192
193    let mut output_pages: Vec<Vec<u8>> = Vec::with_capacity(total_output);
194    output_pages.push(page0);
195
196    // Assign new page numbers to INDEX pages and fix prev/next chains
197    let index_start_page = 1u32;
198    for (i, idx) in index_pages.iter_mut().enumerate() {
199        let new_page_num = index_start_page + i as u32;
200        BigEndian::write_u32(&mut idx.data[FIL_PAGE_OFFSET..], new_page_num);
201    }
202
203    // Fix prev/next chain pointers within each (index_id, level) group
204    let mut group_start = 0usize;
205    while group_start < index_pages.len() {
206        let group_index_id = index_pages[group_start].index_id;
207        let group_level = index_pages[group_start].level;
208
209        // Find end of group
210        let mut group_end = group_start + 1;
211        while group_end < index_pages.len()
212            && index_pages[group_end].index_id == group_index_id
213            && index_pages[group_end].level == group_level
214        {
215            group_end += 1;
216        }
217
218        // Set prev/next within the group
219        #[allow(clippy::needless_range_loop)]
220        for j in group_start..group_end {
221            let prev = if j == group_start {
222                FIL_NULL
223            } else {
224                index_start_page + j as u32 - 1
225            };
226            let next = if j == group_end - 1 {
227                FIL_NULL
228            } else {
229                index_start_page + j as u32 + 1
230            };
231            BigEndian::write_u32(&mut index_pages[j].data[FIL_PAGE_PREV..], prev);
232            BigEndian::write_u32(&mut index_pages[j].data[FIL_PAGE_NEXT..], next);
233        }
234
235        group_start = group_end;
236    }
237
238    // Recalculate checksums for INDEX pages and add to output
239    for idx in &mut index_pages {
240        write::fix_page_checksum(&mut idx.data, page_size, algorithm);
241        output_pages.push(idx.data.clone());
242    }
243
244    // Assign new page numbers to other pages and add to output
245    let other_start_page = index_start_page + index_pages.len() as u32;
246    for (i, mut page) in other_pages.into_iter().enumerate() {
247        let new_page_num = other_start_page + i as u32;
248        BigEndian::write_u32(&mut page[FIL_PAGE_OFFSET..], new_page_num);
249        // Clear prev/next for non-INDEX pages (they're not part of B+Tree chains)
250        BigEndian::write_u32(&mut page[FIL_PAGE_PREV..], FIL_NULL);
251        BigEndian::write_u32(&mut page[FIL_PAGE_NEXT..], FIL_NULL);
252        write::fix_page_checksum(&mut page, page_size, algorithm);
253        output_pages.push(page);
254    }
255
256    // Write output
257    write::write_tablespace(&opts.output, &output_pages)?;
258    if let Some(ref logger) = opts.audit_logger {
259        let _ = logger.log_file_write(&opts.output, "defrag", output_pages.len() as u64);
260    }
261
262    // Post-validate
263    let output_count = output_pages.len() as u64;
264    let mut valid_count = 0u64;
265    for i in 0..output_count {
266        let page = write::read_page_raw(&opts.output, i, page_size)?;
267        if validate_checksum(&page, page_size, Some(&vendor_info)).valid {
268            valid_count += 1;
269        }
270    }
271
272    let index_count = index_pages.len() as u64;
273
274    if opts.json {
275        let report = DefragReport {
276            source: opts.file.clone(),
277            output: opts.output.clone(),
278            source_pages: page_count,
279            output_pages: output_count,
280            index_pages: index_count,
281            empty_removed,
282            corrupt_removed,
283            post_validation: PostValidation {
284                total: output_count,
285                valid: valid_count,
286            },
287        };
288        let json = serde_json::to_string_pretty(&report)
289            .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
290        wprintln!(writer, "{}", json)?;
291    } else {
292        wprintln!(writer)?;
293        wprintln!(writer, "Defrag Summary:")?;
294        wprintln!(
295            writer,
296            "  Source:          {} ({} pages)",
297            opts.file,
298            page_count
299        )?;
300        wprintln!(
301            writer,
302            "  Output:          {} ({} pages)",
303            opts.output,
304            output_count
305        )?;
306        wprintln!(writer, "  INDEX pages:     {}", index_count)?;
307        if empty_removed > 0 {
308            wprintln!(writer, "  Empty removed:   {}", empty_removed)?;
309        }
310        if corrupt_removed > 0 {
311            wprintln!(
312                writer,
313                "  Corrupt removed: {}",
314                format!("{}", corrupt_removed).red()
315            )?;
316        }
317        wprintln!(
318            writer,
319            "  Post-validation: {}/{} valid checksums",
320            valid_count,
321            output_count
322        )?;
323    }
324
325    Ok(())
326}