Skip to main content

idb/cli/
corrupt.rs

1use std::fs::{File, OpenOptions};
2use std::io::{Seek, SeekFrom, Write};
3
4use colored::Colorize;
5use rand::Rng;
6use serde::Serialize;
7
8use crate::cli::wprintln;
9use crate::innodb::checksum::{validate_checksum, ChecksumAlgorithm};
10use crate::innodb::constants::{SIZE_FIL_HEAD, SIZE_FIL_TRAILER};
11use crate::innodb::tablespace::Tablespace;
12use crate::util::hex::format_bytes;
13use crate::IdbError;
14
15/// Options for the `inno corrupt` subcommand.
16pub struct CorruptOptions {
17    /// Path to the InnoDB tablespace file (.ibd).
18    pub file: String,
19    /// Page number to corrupt (random page chosen when not specified).
20    pub page: Option<u64>,
21    /// Number of random bytes to write.
22    pub bytes: usize,
23    /// Target the FIL header area (first 38 bytes of the page).
24    pub header: bool,
25    /// Target the record data area (after page header, before trailer).
26    pub records: bool,
27    /// Absolute byte offset to corrupt (bypasses page calculation).
28    pub offset: Option<u64>,
29    /// Show before/after checksum comparison.
30    pub verify: bool,
31    /// Emit output as JSON.
32    pub json: bool,
33    /// Override the auto-detected page size.
34    pub page_size: Option<u32>,
35}
36
37#[derive(Serialize)]
38struct CorruptResultJson {
39    file: String,
40    offset: u64,
41    page: Option<u64>,
42    bytes_written: usize,
43    data: String,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    verify: Option<VerifyResultJson>,
46}
47
48#[derive(Serialize)]
49struct VerifyResultJson {
50    page: u64,
51    before: ChecksumInfoJson,
52    after: ChecksumInfoJson,
53}
54
55#[derive(Serialize)]
56struct ChecksumInfoJson {
57    valid: bool,
58    algorithm: String,
59    stored_checksum: u32,
60    calculated_checksum: u32,
61}
62
63/// Inject random bytes into an InnoDB tablespace file to simulate corruption.
64///
65/// Generates cryptographically random bytes and writes them into the file at a
66/// calculated or explicit offset. This is designed for testing checksum
67/// validation (`inno checksum`), InnoDB crash recovery, and backup-restore
68/// verification workflows.
69///
70/// Three targeting modes are available:
71///
72/// - **Header mode** (`-k`): Writes into the 38-byte FIL header area (bytes
73///   0–37 of the page), which will corrupt page metadata like the checksum,
74///   page number, LSN, or space ID.
75/// - **Records mode** (`-r`): Writes into the user data area (after the page
76///   header and before the FIL trailer), corrupting actual row or index data
77///   without necessarily invalidating the stored checksum.
78/// - **Offset mode** (`--offset`): Writes at an absolute file byte position,
79///   bypassing page calculations entirely. Note that `--verify` is unavailable
80///   in this mode since there is no page context.
81///
82/// If no page number is specified, one is chosen at random. With `--verify`,
83/// the page's checksum is validated before and after the write, showing a
84/// before/after comparison to confirm that corruption was successfully applied.
85pub fn execute(opts: &CorruptOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
86    // Absolute offset mode: bypass page calculation entirely
87    if let Some(abs_offset) = opts.offset {
88        return corrupt_at_offset(opts, abs_offset, writer);
89    }
90
91    // Open tablespace to get page size and count
92    let ts = match opts.page_size {
93        Some(ps) => Tablespace::open_with_page_size(&opts.file, ps)?,
94        None => Tablespace::open(&opts.file)?,
95    };
96
97    let page_size = ts.page_size() as usize;
98    let page_count = ts.page_count();
99
100    let mut rng = rand::rng();
101
102    // Choose page
103    let page_num = match opts.page {
104        Some(p) => {
105            if p >= page_count {
106                return Err(IdbError::Argument(format!(
107                    "Page {} out of range (tablespace has {} pages)",
108                    p, page_count
109                )));
110            }
111            p
112        }
113        None => {
114            let p = rng.random_range(0..page_count);
115            if !opts.json {
116                wprintln!(
117                    writer,
118                    "No page specified. Choosing random page {}.",
119                    format!("{}", p).yellow()
120                )?;
121            }
122            p
123        }
124    };
125
126    let byte_start = page_num * page_size as u64;
127
128    // Calculate the offset to corrupt within the page
129    let corrupt_offset = if opts.header {
130        // Corrupt within the FIL header area (first 38 bytes)
131        let header_offset = rng.random_range(0..SIZE_FIL_HEAD as u64);
132        byte_start + header_offset
133    } else if opts.records {
134        // Corrupt within the record data area (after page header, before trailer)
135        let user_data_start = 120u64; // matches Perl USER_DATA_START
136        let max_offset = page_size as u64 - user_data_start - SIZE_FIL_TRAILER as u64;
137        let record_offset = rng.random_range(0..max_offset);
138        byte_start + user_data_start + record_offset
139    } else {
140        // Default: corrupt at page start
141        byte_start
142    };
143
144    // Generate random bytes (full bytes, not nibbles like the Perl version)
145    let random_data: Vec<u8> = (0..opts.bytes).map(|_| rng.random::<u8>()).collect();
146
147    // Read pre-corruption page data for --verify
148    let pre_checksum = if opts.verify {
149        let pre_data = read_page_bytes(&opts.file, page_num, page_size as u32)?;
150        Some(validate_checksum(&pre_data, page_size as u32, None))
151    } else {
152        None
153    };
154
155    if opts.json {
156        // Write the corruption first, then verify
157        write_corruption(&opts.file, corrupt_offset, &random_data)?;
158        let verify_json = if opts.verify {
159            let post_data = read_page_bytes(&opts.file, page_num, page_size as u32)?;
160            let post_result = validate_checksum(&post_data, page_size as u32, None);
161            let pre = pre_checksum.expect("pre_checksum set when --verify is active");
162            Some(VerifyResultJson {
163                page: page_num,
164                before: checksum_to_json(&pre),
165                after: checksum_to_json(&post_result),
166            })
167        } else {
168            None
169        };
170        return output_json_with_verify(
171            opts,
172            corrupt_offset,
173            Some(page_num),
174            &random_data,
175            verify_json,
176            writer,
177        );
178    }
179
180    wprintln!(
181        writer,
182        "Writing {} bytes of random data to {} at offset {} (page {})...",
183        opts.bytes,
184        opts.file,
185        corrupt_offset,
186        format!("{}", page_num).yellow()
187    )?;
188
189    write_corruption(&opts.file, corrupt_offset, &random_data)?;
190
191    wprintln!(writer, "Data written: {}", format_bytes(&random_data).red())?;
192    wprintln!(writer, "Completed.")?;
193
194    // --verify: show before/after checksum comparison
195    if opts.verify {
196        let post_data = read_page_bytes(&opts.file, page_num, page_size as u32)?;
197        let post_result = validate_checksum(&post_data, page_size as u32, None);
198        let pre = pre_checksum.expect("pre_checksum set when --verify is active");
199        wprintln!(writer)?;
200        wprintln!(writer, "{}:", "Verification".bold())?;
201        wprintln!(
202            writer,
203            "  Before: {} (algorithm={:?}, stored={}, calculated={})",
204            if pre.valid {
205                "OK".green().to_string()
206            } else {
207                "INVALID".red().to_string()
208            },
209            pre.algorithm,
210            pre.stored_checksum,
211            pre.calculated_checksum
212        )?;
213        wprintln!(
214            writer,
215            "  After:  {} (algorithm={:?}, stored={}, calculated={})",
216            if post_result.valid {
217                "OK".green().to_string()
218            } else {
219                "INVALID".red().to_string()
220            },
221            post_result.algorithm,
222            post_result.stored_checksum,
223            post_result.calculated_checksum
224        )?;
225    }
226
227    Ok(())
228}
229
230fn corrupt_at_offset(
231    opts: &CorruptOptions,
232    abs_offset: u64,
233    writer: &mut dyn Write,
234) -> Result<(), IdbError> {
235    // Validate offset is within file
236    let file_size = File::open(&opts.file)
237        .map_err(|e| IdbError::Io(format!("Cannot open {}: {}", opts.file, e)))?
238        .metadata()
239        .map_err(|e| IdbError::Io(format!("Cannot stat {}: {}", opts.file, e)))?
240        .len();
241
242    if abs_offset >= file_size {
243        return Err(IdbError::Argument(format!(
244            "Offset {} is beyond file size {}",
245            abs_offset, file_size
246        )));
247    }
248
249    let mut rng = rand::rng();
250    let random_data: Vec<u8> = (0..opts.bytes).map(|_| rng.random::<u8>()).collect();
251
252    // Write the corruption
253    write_corruption(&opts.file, abs_offset, &random_data)?;
254
255    if opts.json {
256        return output_json_with_verify(opts, abs_offset, None, &random_data, None, writer);
257    }
258
259    wprintln!(
260        writer,
261        "Writing {} bytes of random data to {} at offset {}...",
262        opts.bytes,
263        opts.file,
264        abs_offset
265    )?;
266
267    wprintln!(writer, "Data written: {}", format_bytes(&random_data).red())?;
268    wprintln!(writer, "Completed.")?;
269
270    if opts.verify {
271        wprintln!(
272            writer,
273            "Note: --verify is not available in absolute offset mode (no page context)."
274        )?;
275    }
276
277    Ok(())
278}
279
280fn write_corruption(file_path: &str, offset: u64, data: &[u8]) -> Result<(), IdbError> {
281    let mut file = OpenOptions::new()
282        .write(true)
283        .open(file_path)
284        .map_err(|e| IdbError::Io(format!("Cannot open {} for writing: {}", file_path, e)))?;
285
286    file.seek(SeekFrom::Start(offset))
287        .map_err(|e| IdbError::Io(format!("Cannot seek to offset {}: {}", offset, e)))?;
288
289    file.write_all(data)
290        .map_err(|e| IdbError::Io(format!("Cannot write corruption data: {}", e)))?;
291
292    Ok(())
293}
294
295fn output_json_with_verify(
296    opts: &CorruptOptions,
297    offset: u64,
298    page: Option<u64>,
299    data: &[u8],
300    verify: Option<VerifyResultJson>,
301    writer: &mut dyn Write,
302) -> Result<(), IdbError> {
303    let result = CorruptResultJson {
304        file: opts.file.clone(),
305        offset,
306        page,
307        bytes_written: data.len(),
308        data: format_bytes(data),
309        verify,
310    };
311
312    let json = serde_json::to_string_pretty(&result)
313        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
314    wprintln!(writer, "{}", json)?;
315
316    Ok(())
317}
318
319fn read_page_bytes(file_path: &str, page_num: u64, page_size: u32) -> Result<Vec<u8>, IdbError> {
320    use std::io::Read;
321    let offset = page_num * page_size as u64;
322    let mut f = File::open(file_path)
323        .map_err(|e| IdbError::Io(format!("Cannot open {}: {}", file_path, e)))?;
324    f.seek(SeekFrom::Start(offset))
325        .map_err(|e| IdbError::Io(format!("Cannot seek to offset {}: {}", offset, e)))?;
326    let mut buf = vec![0u8; page_size as usize];
327    f.read_exact(&mut buf)
328        .map_err(|e| IdbError::Io(format!("Cannot read page {}: {}", page_num, e)))?;
329    Ok(buf)
330}
331
332fn checksum_to_json(result: &crate::innodb::checksum::ChecksumResult) -> ChecksumInfoJson {
333    let algorithm_name = match result.algorithm {
334        ChecksumAlgorithm::Crc32c => "crc32c",
335        ChecksumAlgorithm::InnoDB => "innodb",
336        ChecksumAlgorithm::MariaDbFullCrc32 => "mariadb_full_crc32",
337        ChecksumAlgorithm::None => "none",
338    };
339    ChecksumInfoJson {
340        valid: result.valid,
341        algorithm: algorithm_name.to_string(),
342        stored_checksum: result.stored_checksum,
343        calculated_checksum: result.calculated_checksum,
344    }
345}