Skip to main content

idb/cli/
corrupt.rs

1use std::fs::{File, OpenOptions};
2use std::io::{Seek, SeekFrom, Write};
3
4use colored::Colorize;
5use rand::Rng;
6use serde::Serialize;
7
8use crate::cli::wprintln;
9use crate::innodb::checksum::{validate_checksum, ChecksumAlgorithm};
10use crate::innodb::constants::{SIZE_FIL_HEAD, SIZE_FIL_TRAILER};
11use crate::innodb::tablespace::Tablespace;
12use crate::util::hex::format_bytes;
13use crate::IdbError;
14
15/// Options for the `inno corrupt` subcommand.
16pub struct CorruptOptions {
17    /// Path to the InnoDB tablespace file (.ibd).
18    pub file: String,
19    /// Page number to corrupt (random page chosen when not specified).
20    pub page: Option<u64>,
21    /// Number of random bytes to write.
22    pub bytes: usize,
23    /// Target the FIL header area (first 38 bytes of the page).
24    pub header: bool,
25    /// Target the record data area (after page header, before trailer).
26    pub records: bool,
27    /// Absolute byte offset to corrupt (bypasses page calculation).
28    pub offset: Option<u64>,
29    /// Show before/after checksum comparison.
30    pub verify: bool,
31    /// Emit output as JSON.
32    pub json: bool,
33    /// Override the auto-detected page size.
34    pub page_size: Option<u32>,
35}
36
37#[derive(Serialize)]
38struct CorruptResultJson {
39    file: String,
40    offset: u64,
41    page: Option<u64>,
42    bytes_written: usize,
43    data: String,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    verify: Option<VerifyResultJson>,
46}
47
48#[derive(Serialize)]
49struct VerifyResultJson {
50    page: u64,
51    before: ChecksumInfoJson,
52    after: ChecksumInfoJson,
53}
54
55#[derive(Serialize)]
56struct ChecksumInfoJson {
57    valid: bool,
58    algorithm: String,
59    stored_checksum: u32,
60    calculated_checksum: u32,
61}
62
63/// Inject random bytes into an InnoDB tablespace file to simulate corruption.
64///
65/// Generates cryptographically random bytes and writes them into the file at a
66/// calculated or explicit offset. This is designed for testing checksum
67/// validation (`inno checksum`), InnoDB crash recovery, and backup-restore
68/// verification workflows.
69///
70/// Three targeting modes are available:
71///
72/// - **Header mode** (`-k`): Writes into the 38-byte FIL header area (bytes
73///   0–37 of the page), which will corrupt page metadata like the checksum,
74///   page number, LSN, or space ID.
75/// - **Records mode** (`-r`): Writes into the user data area (after the page
76///   header and before the FIL trailer), corrupting actual row or index data
77///   without necessarily invalidating the stored checksum.
78/// - **Offset mode** (`--offset`): Writes at an absolute file byte position,
79///   bypassing page calculations entirely. Note that `--verify` is unavailable
80///   in this mode since there is no page context.
81///
82/// If no page number is specified, one is chosen at random. With `--verify`,
83/// the page's checksum is validated before and after the write, showing a
84/// before/after comparison to confirm that corruption was successfully applied.
85pub fn execute(opts: &CorruptOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
86    // Absolute offset mode: bypass page calculation entirely
87    if let Some(abs_offset) = opts.offset {
88        return corrupt_at_offset(opts, abs_offset, writer);
89    }
90
91    // Open tablespace to get page size and count
92    let ts = match opts.page_size {
93        Some(ps) => Tablespace::open_with_page_size(&opts.file, ps)?,
94        None => Tablespace::open(&opts.file)?,
95    };
96
97    let page_size = ts.page_size() as usize;
98    let page_count = ts.page_count();
99
100    let mut rng = rand::rng();
101
102    // Choose page
103    let page_num = match opts.page {
104        Some(p) => {
105            if p >= page_count {
106                return Err(IdbError::Argument(format!(
107                    "Page {} out of range (tablespace has {} pages)",
108                    p, page_count
109                )));
110            }
111            p
112        }
113        None => {
114            let p = rng.random_range(0..page_count);
115            if !opts.json {
116                wprintln!(
117                    writer,
118                    "No page specified. Choosing random page {}.",
119                    format!("{}", p).yellow()
120                )?;
121            }
122            p
123        }
124    };
125
126    let byte_start = page_num * page_size as u64;
127
128    // Calculate the offset to corrupt within the page
129    let corrupt_offset = if opts.header {
130        // Corrupt within the FIL header area (first 38 bytes)
131        let header_offset = rng.random_range(0..SIZE_FIL_HEAD as u64);
132        byte_start + header_offset
133    } else if opts.records {
134        // Corrupt within the record data area (after page header, before trailer)
135        let user_data_start = 120u64; // matches Perl USER_DATA_START
136        let max_offset = page_size as u64 - user_data_start - SIZE_FIL_TRAILER as u64;
137        let record_offset = rng.random_range(0..max_offset);
138        byte_start + user_data_start + record_offset
139    } else {
140        // Default: corrupt at page start
141        byte_start
142    };
143
144    // Generate random bytes (full bytes, not nibbles like the Perl version)
145    let random_data: Vec<u8> = (0..opts.bytes).map(|_| rng.random::<u8>()).collect();
146
147    // Read pre-corruption page data for --verify
148    let pre_checksum = if opts.verify {
149        let pre_data = read_page_bytes(&opts.file, page_num, page_size as u32)?;
150        Some(validate_checksum(&pre_data, page_size as u32))
151    } else {
152        None
153    };
154
155    if opts.json {
156        // Write the corruption first, then verify
157        write_corruption(&opts.file, corrupt_offset, &random_data)?;
158        let verify_json = if opts.verify {
159            let post_data = read_page_bytes(&opts.file, page_num, page_size as u32)?;
160            let post_result = validate_checksum(&post_data, page_size as u32);
161            let pre = pre_checksum.unwrap();
162            Some(VerifyResultJson {
163                page: page_num,
164                before: checksum_to_json(&pre),
165                after: checksum_to_json(&post_result),
166            })
167        } else {
168            None
169        };
170        return output_json_with_verify(opts, corrupt_offset, Some(page_num), &random_data, verify_json, writer);
171    }
172
173    wprintln!(
174        writer,
175        "Writing {} bytes of random data to {} at offset {} (page {})...",
176        opts.bytes,
177        opts.file,
178        corrupt_offset,
179        format!("{}", page_num).yellow()
180    )?;
181
182    write_corruption(&opts.file, corrupt_offset, &random_data)?;
183
184    wprintln!(writer, "Data written: {}", format_bytes(&random_data).red())?;
185    wprintln!(writer, "Completed.")?;
186
187    // --verify: show before/after checksum comparison
188    if opts.verify {
189        let post_data = read_page_bytes(&opts.file, page_num, page_size as u32)?;
190        let post_result = validate_checksum(&post_data, page_size as u32);
191        let pre = pre_checksum.unwrap();
192        wprintln!(writer)?;
193        wprintln!(writer, "{}:", "Verification".bold())?;
194        wprintln!(
195            writer,
196            "  Before: {} (algorithm={:?}, stored={}, calculated={})",
197            if pre.valid { "OK".green().to_string() } else { "INVALID".red().to_string() },
198            pre.algorithm, pre.stored_checksum, pre.calculated_checksum
199        )?;
200        wprintln!(
201            writer,
202            "  After:  {} (algorithm={:?}, stored={}, calculated={})",
203            if post_result.valid { "OK".green().to_string() } else { "INVALID".red().to_string() },
204            post_result.algorithm, post_result.stored_checksum, post_result.calculated_checksum
205        )?;
206    }
207
208    Ok(())
209}
210
211fn corrupt_at_offset(opts: &CorruptOptions, abs_offset: u64, writer: &mut dyn Write) -> Result<(), IdbError> {
212    // Validate offset is within file
213    let file_size = File::open(&opts.file)
214        .map_err(|e| IdbError::Io(format!("Cannot open {}: {}", opts.file, e)))?
215        .metadata()
216        .map_err(|e| IdbError::Io(format!("Cannot stat {}: {}", opts.file, e)))?
217        .len();
218
219    if abs_offset >= file_size {
220        return Err(IdbError::Argument(format!(
221            "Offset {} is beyond file size {}",
222            abs_offset, file_size
223        )));
224    }
225
226    let mut rng = rand::rng();
227    let random_data: Vec<u8> = (0..opts.bytes).map(|_| rng.random::<u8>()).collect();
228
229    // Write the corruption
230    write_corruption(&opts.file, abs_offset, &random_data)?;
231
232    if opts.json {
233        return output_json_with_verify(opts, abs_offset, None, &random_data, None, writer);
234    }
235
236    wprintln!(
237        writer,
238        "Writing {} bytes of random data to {} at offset {}...",
239        opts.bytes, opts.file, abs_offset
240    )?;
241
242    wprintln!(writer, "Data written: {}", format_bytes(&random_data).red())?;
243    wprintln!(writer, "Completed.")?;
244
245    if opts.verify {
246        wprintln!(writer, "Note: --verify is not available in absolute offset mode (no page context).")?;
247    }
248
249    Ok(())
250}
251
252fn write_corruption(file_path: &str, offset: u64, data: &[u8]) -> Result<(), IdbError> {
253    let mut file = OpenOptions::new()
254        .write(true)
255        .open(file_path)
256        .map_err(|e| IdbError::Io(format!("Cannot open {} for writing: {}", file_path, e)))?;
257
258    file.seek(SeekFrom::Start(offset))
259        .map_err(|e| IdbError::Io(format!("Cannot seek to offset {}: {}", offset, e)))?;
260
261    file.write_all(data)
262        .map_err(|e| IdbError::Io(format!("Cannot write corruption data: {}", e)))?;
263
264    Ok(())
265}
266
267fn output_json_with_verify(
268    opts: &CorruptOptions,
269    offset: u64,
270    page: Option<u64>,
271    data: &[u8],
272    verify: Option<VerifyResultJson>,
273    writer: &mut dyn Write,
274) -> Result<(), IdbError> {
275    let result = CorruptResultJson {
276        file: opts.file.clone(),
277        offset,
278        page,
279        bytes_written: data.len(),
280        data: format_bytes(data),
281        verify,
282    };
283
284    let json = serde_json::to_string_pretty(&result)
285        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
286    wprintln!(writer, "{}", json)?;
287
288    Ok(())
289}
290
291fn read_page_bytes(file_path: &str, page_num: u64, page_size: u32) -> Result<Vec<u8>, IdbError> {
292    use std::io::Read;
293    let offset = page_num * page_size as u64;
294    let mut f = File::open(file_path)
295        .map_err(|e| IdbError::Io(format!("Cannot open {}: {}", file_path, e)))?;
296    f.seek(SeekFrom::Start(offset))
297        .map_err(|e| IdbError::Io(format!("Cannot seek to offset {}: {}", offset, e)))?;
298    let mut buf = vec![0u8; page_size as usize];
299    f.read_exact(&mut buf)
300        .map_err(|e| IdbError::Io(format!("Cannot read page {}: {}", page_num, e)))?;
301    Ok(buf)
302}
303
304fn checksum_to_json(result: &crate::innodb::checksum::ChecksumResult) -> ChecksumInfoJson {
305    let algorithm_name = match result.algorithm {
306        ChecksumAlgorithm::Crc32c => "crc32c",
307        ChecksumAlgorithm::InnoDB => "innodb",
308        ChecksumAlgorithm::None => "none",
309    };
310    ChecksumInfoJson {
311        valid: result.valid,
312        algorithm: algorithm_name.to_string(),
313        stored_checksum: result.stored_checksum,
314        calculated_checksum: result.calculated_checksum,
315    }
316}