Skip to main content

idb/cli/
corrupt.rs

1use std::fs::{File, OpenOptions};
2use std::io::{Seek, SeekFrom, Write};
3use std::sync::Arc;
4
5use colored::Colorize;
6
7use serde::Serialize;
8
9use crate::cli::wprintln;
10use crate::innodb::checksum::{validate_checksum, ChecksumAlgorithm};
11use crate::innodb::constants::{SIZE_FIL_HEAD, SIZE_FIL_TRAILER, SIZE_PAGE_DEFAULT};
12use crate::util::audit::AuditLogger;
13use crate::util::hex::format_bytes;
14use crate::IdbError;
15
16/// Options for the `inno corrupt` subcommand.
17pub struct CorruptOptions {
18    /// Path to the InnoDB tablespace file (.ibd).
19    pub file: String,
20    /// Page number to corrupt (random page chosen when not specified).
21    pub page: Option<u64>,
22    /// Number of random bytes to write.
23    pub bytes: usize,
24    /// Target the FIL header area (first 38 bytes of the page).
25    pub header: bool,
26    /// Target the record data area (after page header, before trailer).
27    pub records: bool,
28    /// Absolute byte offset to corrupt (bypasses page calculation).
29    pub offset: Option<u64>,
30    /// Show before/after checksum comparison.
31    pub verify: bool,
32    /// Emit output as JSON.
33    pub json: bool,
34    /// Override the auto-detected page size.
35    pub page_size: Option<u32>,
36    /// Use memory-mapped I/O for file access.
37    pub mmap: bool,
38    /// Audit logger for recording write operations.
39    pub audit_logger: Option<Arc<AuditLogger>>,
40}
41
42#[derive(Serialize)]
43struct CorruptResultJson {
44    file: String,
45    offset: u64,
46    page: Option<u64>,
47    bytes_written: usize,
48    data: String,
49    #[serde(skip_serializing_if = "Option::is_none")]
50    verify: Option<VerifyResultJson>,
51}
52
53#[derive(Serialize)]
54struct VerifyResultJson {
55    page: u64,
56    before: ChecksumInfoJson,
57    after: ChecksumInfoJson,
58}
59
60#[derive(Serialize)]
61struct ChecksumInfoJson {
62    valid: bool,
63    algorithm: String,
64    stored_checksum: u32,
65    calculated_checksum: u32,
66}
67
68/// Inject random bytes into an InnoDB tablespace file to simulate corruption.
69///
70/// Generates cryptographically random bytes and writes them into the file at a
71/// calculated or explicit offset. This is designed for testing checksum
72/// validation (`inno checksum`), InnoDB crash recovery, and backup-restore
73/// verification workflows.
74///
75/// Three targeting modes are available:
76///
77/// - **Header mode** (`-k`): Writes into the 38-byte FIL header area (bytes
78///   0–37 of the page), which will corrupt page metadata like the checksum,
79///   page number, LSN, or space ID.
80/// - **Records mode** (`-r`): Writes into the user data area (after the page
81///   header and before the FIL trailer), corrupting actual row or index data
82///   without necessarily invalidating the stored checksum.
83/// - **Offset mode** (`--offset`): Writes at an absolute file byte position,
84///   bypassing page calculations entirely. Note that `--verify` is unavailable
85///   in this mode since there is no page context.
86///
87/// If no page number is specified, one is chosen at random. With `--verify`,
88/// the page's checksum is validated before and after the write, showing a
89/// before/after comparison to confirm that corruption was successfully applied.
90pub fn execute(opts: &CorruptOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
91    // Absolute offset mode: bypass page calculation entirely
92    if let Some(abs_offset) = opts.offset {
93        return corrupt_at_offset(opts, abs_offset, writer);
94    }
95
96    // Open tablespace to get page size and count
97    let ts = crate::cli::open_tablespace(&opts.file, opts.page_size, opts.mmap)?;
98
99    let page_size = ts.page_size() as usize;
100    let page_count = ts.page_count();
101
102    // Choose page
103    let page_num = match opts.page {
104        Some(p) => {
105            if p >= page_count {
106                return Err(IdbError::Argument(format!(
107                    "Page {} out of range (tablespace has {} pages)",
108                    p, page_count
109                )));
110            }
111            p
112        }
113        None => {
114            let p = rand::random_range(0..page_count);
115            if !opts.json {
116                wprintln!(
117                    writer,
118                    "No page specified. Choosing random page {}.",
119                    format!("{}", p).yellow()
120                )?;
121            }
122            p
123        }
124    };
125
126    let byte_start = page_num * page_size as u64;
127
128    // Calculate the offset to corrupt within the page
129    let corrupt_offset = if opts.header {
130        // Corrupt within the FIL header area (first 38 bytes)
131        let max_start = SIZE_FIL_HEAD.saturating_sub(opts.bytes) as u64;
132        let header_offset = rand::random_range(0..max_start.max(1));
133        byte_start + header_offset
134    } else if opts.records {
135        // Corrupt within the record data area (after page header, before trailer)
136        let user_data_start = 120u64; // matches Perl USER_DATA_START
137        let max_offset = page_size as u64 - user_data_start - SIZE_FIL_TRAILER as u64;
138        let record_offset = rand::random_range(0..max_offset);
139        byte_start + user_data_start + record_offset
140    } else {
141        // Default: corrupt at page start
142        byte_start
143    };
144
145    // Generate random bytes (full bytes, not nibbles like the Perl version)
146    let random_data: Vec<u8> = (0..opts.bytes).map(|_| rand::random::<u8>()).collect();
147
148    // Read pre-corruption page data for --verify
149    let pre_checksum = if opts.verify {
150        let pre_data = read_page_bytes(&opts.file, page_num, page_size as u32)?;
151        Some(validate_checksum(&pre_data, page_size as u32, None))
152    } else {
153        None
154    };
155
156    if opts.json {
157        // Write the corruption first, then verify
158        write_corruption(&opts.file, corrupt_offset, &random_data)?;
159        if let Some(ref logger) = opts.audit_logger {
160            let _ = logger.log_page_write(&opts.file, page_num, "corrupt", None, None);
161        }
162        let verify_json = if opts.verify {
163            let post_data = read_page_bytes(&opts.file, page_num, page_size as u32)?;
164            let post_result = validate_checksum(&post_data, page_size as u32, None);
165            let pre = pre_checksum.expect("pre_checksum set when --verify is active");
166            Some(VerifyResultJson {
167                page: page_num,
168                before: checksum_to_json(&pre),
169                after: checksum_to_json(&post_result),
170            })
171        } else {
172            None
173        };
174        return output_json_with_verify(
175            opts,
176            corrupt_offset,
177            Some(page_num),
178            &random_data,
179            verify_json,
180            writer,
181        );
182    }
183
184    wprintln!(
185        writer,
186        "Writing {} bytes of random data to {} at offset {} (page {})...",
187        opts.bytes,
188        opts.file,
189        corrupt_offset,
190        format!("{}", page_num).yellow()
191    )?;
192
193    write_corruption(&opts.file, corrupt_offset, &random_data)?;
194    if let Some(ref logger) = opts.audit_logger {
195        let _ = logger.log_page_write(&opts.file, page_num, "corrupt", None, None);
196    }
197
198    wprintln!(writer, "Data written: {}", format_bytes(&random_data).red())?;
199    wprintln!(writer, "Completed.")?;
200
201    // --verify: show before/after checksum comparison
202    if opts.verify {
203        let post_data = read_page_bytes(&opts.file, page_num, page_size as u32)?;
204        let post_result = validate_checksum(&post_data, page_size as u32, None);
205        let pre = pre_checksum.expect("pre_checksum set when --verify is active");
206        wprintln!(writer)?;
207        wprintln!(writer, "{}:", "Verification".bold())?;
208        wprintln!(
209            writer,
210            "  Before: {} (algorithm={:?}, stored={}, calculated={})",
211            if pre.valid {
212                "OK".green().to_string()
213            } else {
214                "INVALID".red().to_string()
215            },
216            pre.algorithm,
217            pre.stored_checksum,
218            pre.calculated_checksum
219        )?;
220        wprintln!(
221            writer,
222            "  After:  {} (algorithm={:?}, stored={}, calculated={})",
223            if post_result.valid {
224                "OK".green().to_string()
225            } else {
226                "INVALID".red().to_string()
227            },
228            post_result.algorithm,
229            post_result.stored_checksum,
230            post_result.calculated_checksum
231        )?;
232    }
233
234    Ok(())
235}
236
237fn corrupt_at_offset(
238    opts: &CorruptOptions,
239    abs_offset: u64,
240    writer: &mut dyn Write,
241) -> Result<(), IdbError> {
242    // Validate offset is within file
243    let file_size = File::open(&opts.file)
244        .map_err(|e| IdbError::Io(format!("Cannot open {}: {}", opts.file, e)))?
245        .metadata()
246        .map_err(|e| IdbError::Io(format!("Cannot stat {}: {}", opts.file, e)))?
247        .len();
248
249    if abs_offset >= file_size {
250        return Err(IdbError::Argument(format!(
251            "Offset {} is beyond file size {}",
252            abs_offset, file_size
253        )));
254    }
255
256    let random_data: Vec<u8> = (0..opts.bytes).map(|_| rand::random::<u8>()).collect();
257
258    // Write the corruption
259    write_corruption(&opts.file, abs_offset, &random_data)?;
260    if let Some(ref logger) = opts.audit_logger {
261        let ps = opts.page_size.unwrap_or(SIZE_PAGE_DEFAULT) as u64;
262        let page_num = abs_offset / ps;
263        let _ = logger.log_page_write(&opts.file, page_num, "corrupt_offset", None, None);
264    }
265
266    if opts.json {
267        return output_json_with_verify(opts, abs_offset, None, &random_data, None, writer);
268    }
269
270    wprintln!(
271        writer,
272        "Writing {} bytes of random data to {} at offset {}...",
273        opts.bytes,
274        opts.file,
275        abs_offset
276    )?;
277
278    wprintln!(writer, "Data written: {}", format_bytes(&random_data).red())?;
279    wprintln!(writer, "Completed.")?;
280
281    if opts.verify {
282        wprintln!(
283            writer,
284            "Note: --verify is not available in absolute offset mode (no page context)."
285        )?;
286    }
287
288    Ok(())
289}
290
291fn write_corruption(file_path: &str, offset: u64, data: &[u8]) -> Result<(), IdbError> {
292    let mut file = OpenOptions::new()
293        .write(true)
294        .open(file_path)
295        .map_err(|e| IdbError::Io(format!("Cannot open {} for writing: {}", file_path, e)))?;
296
297    file.seek(SeekFrom::Start(offset))
298        .map_err(|e| IdbError::Io(format!("Cannot seek to offset {}: {}", offset, e)))?;
299
300    file.write_all(data)
301        .map_err(|e| IdbError::Io(format!("Cannot write corruption data: {}", e)))?;
302
303    Ok(())
304}
305
306fn output_json_with_verify(
307    opts: &CorruptOptions,
308    offset: u64,
309    page: Option<u64>,
310    data: &[u8],
311    verify: Option<VerifyResultJson>,
312    writer: &mut dyn Write,
313) -> Result<(), IdbError> {
314    let result = CorruptResultJson {
315        file: opts.file.clone(),
316        offset,
317        page,
318        bytes_written: data.len(),
319        data: format_bytes(data),
320        verify,
321    };
322
323    let json = serde_json::to_string_pretty(&result)
324        .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
325    wprintln!(writer, "{}", json)?;
326
327    Ok(())
328}
329
330fn read_page_bytes(file_path: &str, page_num: u64, page_size: u32) -> Result<Vec<u8>, IdbError> {
331    use std::io::Read;
332    let offset = page_num * page_size as u64;
333    let mut f = File::open(file_path)
334        .map_err(|e| IdbError::Io(format!("Cannot open {}: {}", file_path, e)))?;
335    f.seek(SeekFrom::Start(offset))
336        .map_err(|e| IdbError::Io(format!("Cannot seek to offset {}: {}", offset, e)))?;
337    let mut buf = vec![0u8; page_size as usize];
338    f.read_exact(&mut buf)
339        .map_err(|e| IdbError::Io(format!("Cannot read page {}: {}", page_num, e)))?;
340    Ok(buf)
341}
342
343fn checksum_to_json(result: &crate::innodb::checksum::ChecksumResult) -> ChecksumInfoJson {
344    let algorithm_name = match result.algorithm {
345        ChecksumAlgorithm::Crc32c => "crc32c",
346        ChecksumAlgorithm::InnoDB => "innodb",
347        ChecksumAlgorithm::MariaDbFullCrc32 => "mariadb_full_crc32",
348        ChecksumAlgorithm::None => "none",
349    };
350    ChecksumInfoJson {
351        valid: result.valid,
352        algorithm: algorithm_name.to_string(),
353        stored_checksum: result.stored_checksum,
354        calculated_checksum: result.calculated_checksum,
355    }
356}