Skip to main content

idb/cli/
find.rs

1use std::io::Write;
2use std::path::Path;
3
4use rayon::prelude::*;
5use serde::Serialize;
6
7use crate::cli::{create_progress_bar, wprintln};
8use crate::innodb::checksum::validate_checksum;
9use crate::innodb::corruption::classify_corruption;
10use crate::innodb::page::FilHeader;
11use crate::util::fs::find_tablespace_files;
12use crate::IdbError;
13
14/// Options for the `inno find` subcommand.
15pub struct FindOptions {
16    /// MySQL data directory path to search.
17    pub datadir: String,
18    /// Page number to search for across all tablespace files.
19    pub page: Option<u64>,
20    /// Optional checksum filter — only match pages with this stored checksum.
21    pub checksum: Option<u32>,
22    /// Optional space ID filter — only match pages in this tablespace.
23    pub space_id: Option<u32>,
24    /// Scan for pages with checksum mismatches.
25    pub corrupt: bool,
26    /// Stop searching after the first match.
27    pub first: bool,
28    /// Emit output as JSON.
29    pub json: bool,
30    /// Override the auto-detected page size.
31    pub page_size: Option<u32>,
32    /// Number of threads for parallel processing (0 = auto-detect).
33    pub threads: usize,
34    /// Use memory-mapped I/O for file access.
35    pub mmap: bool,
36    /// Maximum directory recursion depth (None = default 2, Some(0) = unlimited).
37    pub depth: Option<u32>,
38}
39
40// -----------------------------------------------------------------------
41// Page-number search structs
42// -----------------------------------------------------------------------
43
44#[derive(Serialize)]
45struct FindResultJson {
46    datadir: String,
47    target_page: u64,
48    matches: Vec<FindMatchJson>,
49    files_searched: usize,
50}
51
52#[derive(Serialize, Clone)]
53struct FindMatchJson {
54    file: String,
55    page_number: u64,
56    checksum: u32,
57    space_id: u32,
58}
59
60// -----------------------------------------------------------------------
61// Corrupt search structs
62// -----------------------------------------------------------------------
63
64#[derive(Serialize)]
65struct FindCorruptResultJson {
66    datadir: String,
67    corrupt_pages: Vec<FindCorruptMatchJson>,
68    files_searched: usize,
69    total_corrupt: usize,
70}
71
72#[derive(Serialize, Clone)]
73struct FindCorruptMatchJson {
74    file: String,
75    page_number: u64,
76    stored_checksum: u32,
77    calculated_checksum: u32,
78    algorithm: String,
79    corruption_pattern: String,
80}
81
82// -----------------------------------------------------------------------
83// Page-number search implementation
84// -----------------------------------------------------------------------
85
86/// Search a single tablespace file for pages matching the target page number.
87#[allow(clippy::too_many_arguments)]
88fn search_file(
89    ibd_path: &Path,
90    datadir: &Path,
91    target_page: u64,
92    checksum_filter: Option<u32>,
93    space_id_filter: Option<u32>,
94    page_size_override: Option<u32>,
95    first: bool,
96    use_mmap: bool,
97) -> (Vec<FindMatchJson>, bool) {
98    let display_path = ibd_path.strip_prefix(datadir).unwrap_or(ibd_path);
99
100    let path_str = ibd_path.to_string_lossy();
101    let ts_result = crate::cli::open_tablespace(&path_str, page_size_override, use_mmap);
102    let mut ts = match ts_result {
103        Ok(t) => t,
104        Err(_) => return (Vec::new(), false),
105    };
106
107    let all_data = match ts.read_all_pages() {
108        Ok(d) => d,
109        Err(_) => return (Vec::new(), true),
110    };
111
112    let page_size = ts.page_size() as usize;
113    let page_count = ts.page_count();
114
115    let file_matches: Vec<FindMatchJson> = (0..page_count)
116        .into_par_iter()
117        .filter_map(|page_num| {
118            let offset = page_num as usize * page_size;
119            if offset + page_size > all_data.len() {
120                return None;
121            }
122            let page_data = &all_data[offset..offset + page_size];
123
124            let header = FilHeader::parse(page_data)?;
125
126            if header.page_number as u64 != target_page {
127                return None;
128            }
129
130            if let Some(expected_csum) = checksum_filter {
131                if header.checksum != expected_csum {
132                    return None;
133                }
134            }
135
136            if let Some(expected_sid) = space_id_filter {
137                if header.space_id != expected_sid {
138                    return None;
139                }
140            }
141
142            Some(FindMatchJson {
143                file: display_path.display().to_string(),
144                page_number: header.page_number as u64,
145                checksum: header.checksum,
146                space_id: header.space_id,
147            })
148        })
149        .collect();
150
151    let matches = if first {
152        file_matches.into_iter().take(1).collect()
153    } else {
154        file_matches
155    };
156    (matches, true)
157}
158
159fn execute_find_page(opts: &FindOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
160    let target_page = opts.page.unwrap();
161    let datadir = Path::new(&opts.datadir);
162
163    let ibd_files = find_tablespace_files(datadir, &["ibd"], opts.depth)?;
164
165    if ibd_files.is_empty() {
166        if opts.json {
167            let result = FindResultJson {
168                datadir: opts.datadir.clone(),
169                target_page,
170                matches: Vec::new(),
171                files_searched: 0,
172            };
173            let json = serde_json::to_string_pretty(&result)
174                .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
175            wprintln!(writer, "{}", json)?;
176        } else {
177            wprintln!(writer, "No .ibd files found in {}", opts.datadir)?;
178        }
179        return Ok(());
180    }
181
182    let pb = if !opts.json {
183        Some(create_progress_bar(ibd_files.len() as u64, "files"))
184    } else {
185        None
186    };
187
188    let checksum_filter = opts.checksum;
189    let space_id_filter = opts.space_id;
190    let page_size_override = opts.page_size;
191    let first = opts.first;
192    let use_mmap = opts.mmap;
193
194    let all_results: Vec<(Vec<FindMatchJson>, bool)> = ibd_files
195        .par_iter()
196        .map(|ibd_path| {
197            let result = search_file(
198                ibd_path,
199                datadir,
200                target_page,
201                checksum_filter,
202                space_id_filter,
203                page_size_override,
204                first,
205                use_mmap,
206            );
207            if let Some(ref pb) = pb {
208                pb.inc(1);
209            }
210            result
211        })
212        .collect();
213
214    if let Some(ref pb) = pb {
215        pb.finish_and_clear();
216    }
217
218    let mut matches: Vec<FindMatchJson> = Vec::new();
219    let files_searched: usize = all_results.iter().filter(|(_, opened)| *opened).count();
220
221    for (file_matches, _opened) in &all_results {
222        for m in file_matches {
223            if !opts.json {
224                wprintln!(
225                    writer,
226                    "Found page {} in {} (checksum: {}, space_id: {})",
227                    target_page,
228                    m.file,
229                    m.checksum,
230                    m.space_id
231                )?;
232            }
233            matches.push(m.clone());
234            if opts.first {
235                break;
236            }
237        }
238        if opts.first && !matches.is_empty() {
239            break;
240        }
241    }
242
243    if opts.json {
244        let result = FindResultJson {
245            datadir: opts.datadir.clone(),
246            target_page,
247            matches,
248            files_searched,
249        };
250        let json = serde_json::to_string_pretty(&result)
251            .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
252        wprintln!(writer, "{}", json)?;
253    } else if matches.is_empty() {
254        wprintln!(writer, "Page {} not found in any .ibd file.", target_page)?;
255    } else {
256        wprintln!(writer)?;
257        wprintln!(
258            writer,
259            "Found {} match(es) in {} file(s) searched.",
260            matches.len(),
261            files_searched
262        )?;
263    }
264
265    Ok(())
266}
267
268// -----------------------------------------------------------------------
269// Corrupt search implementation
270// -----------------------------------------------------------------------
271
272/// Search a single tablespace file for pages with invalid checksums.
273fn search_file_corrupt(
274    ibd_path: &Path,
275    datadir: &Path,
276    space_id_filter: Option<u32>,
277    page_size_override: Option<u32>,
278    first: bool,
279    use_mmap: bool,
280) -> (Vec<FindCorruptMatchJson>, bool) {
281    let display_path = ibd_path.strip_prefix(datadir).unwrap_or(ibd_path);
282
283    let path_str = ibd_path.to_string_lossy();
284    let ts_result = crate::cli::open_tablespace(&path_str, page_size_override, use_mmap);
285    let mut ts = match ts_result {
286        Ok(t) => t,
287        Err(_) => return (Vec::new(), false),
288    };
289
290    let page_size = ts.page_size();
291    let page_count = ts.page_count();
292    let vendor_info = ts.vendor_info().clone();
293
294    let all_data = match ts.read_all_pages() {
295        Ok(d) => d,
296        Err(_) => return (Vec::new(), true),
297    };
298
299    let ps = page_size as usize;
300    let file_str = display_path.display().to_string();
301
302    let file_matches: Vec<FindCorruptMatchJson> = (0..page_count)
303        .into_par_iter()
304        .filter_map(|page_num| {
305            let offset = page_num as usize * ps;
306            if offset + ps > all_data.len() {
307                return None;
308            }
309            let page_data = &all_data[offset..offset + ps];
310
311            // Skip empty pages
312            if page_data.iter().all(|&b| b == 0) {
313                return None;
314            }
315
316            // Apply space_id filter if specified
317            if let Some(expected_sid) = space_id_filter {
318                if let Some(header) = FilHeader::parse(page_data) {
319                    if header.space_id != expected_sid {
320                        return None;
321                    }
322                }
323            }
324
325            let csum = validate_checksum(page_data, page_size, Some(&vendor_info));
326            if csum.valid {
327                return None;
328            }
329
330            let pattern = classify_corruption(page_data, page_size);
331
332            Some(FindCorruptMatchJson {
333                file: file_str.clone(),
334                page_number: page_num,
335                stored_checksum: csum.stored_checksum,
336                calculated_checksum: csum.calculated_checksum,
337                algorithm: format!("{:?}", csum.algorithm),
338                corruption_pattern: pattern.name().to_string(),
339            })
340        })
341        .collect();
342
343    let matches = if first {
344        file_matches.into_iter().take(1).collect()
345    } else {
346        file_matches
347    };
348    (matches, true)
349}
350
351fn execute_find_corrupt(opts: &FindOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
352    let datadir = Path::new(&opts.datadir);
353
354    let ibd_files = find_tablespace_files(datadir, &["ibd"], opts.depth)?;
355
356    if ibd_files.is_empty() {
357        if opts.json {
358            let result = FindCorruptResultJson {
359                datadir: opts.datadir.clone(),
360                corrupt_pages: Vec::new(),
361                files_searched: 0,
362                total_corrupt: 0,
363            };
364            let json = serde_json::to_string_pretty(&result)
365                .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
366            wprintln!(writer, "{}", json)?;
367        } else {
368            wprintln!(writer, "No .ibd files found in {}", opts.datadir)?;
369        }
370        return Ok(());
371    }
372
373    let pb = if !opts.json {
374        Some(create_progress_bar(ibd_files.len() as u64, "files"))
375    } else {
376        None
377    };
378
379    let space_id_filter = opts.space_id;
380    let page_size_override = opts.page_size;
381    let first = opts.first;
382    let use_mmap = opts.mmap;
383
384    let all_results: Vec<(Vec<FindCorruptMatchJson>, bool)> = ibd_files
385        .par_iter()
386        .map(|ibd_path| {
387            let result = search_file_corrupt(
388                ibd_path,
389                datadir,
390                space_id_filter,
391                page_size_override,
392                first,
393                use_mmap,
394            );
395            if let Some(ref pb) = pb {
396                pb.inc(1);
397            }
398            result
399        })
400        .collect();
401
402    if let Some(ref pb) = pb {
403        pb.finish_and_clear();
404    }
405
406    let mut corrupt_pages: Vec<FindCorruptMatchJson> = Vec::new();
407    let files_searched: usize = all_results.iter().filter(|(_, opened)| *opened).count();
408
409    for (file_matches, _opened) in &all_results {
410        for m in file_matches {
411            if !opts.json {
412                wprintln!(
413                    writer,
414                    "Corrupt page {} in {} (stored: 0x{:08x}, calculated: 0x{:08x}, algo: {}, pattern: {})",
415                    m.page_number,
416                    m.file,
417                    m.stored_checksum,
418                    m.calculated_checksum,
419                    m.algorithm,
420                    m.corruption_pattern
421                )?;
422            }
423            corrupt_pages.push(m.clone());
424            if opts.first {
425                break;
426            }
427        }
428        if opts.first && !corrupt_pages.is_empty() {
429            break;
430        }
431    }
432
433    if opts.json {
434        let total_corrupt = corrupt_pages.len();
435        let result = FindCorruptResultJson {
436            datadir: opts.datadir.clone(),
437            corrupt_pages,
438            files_searched,
439            total_corrupt,
440        };
441        let json = serde_json::to_string_pretty(&result)
442            .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
443        wprintln!(writer, "{}", json)?;
444    } else if corrupt_pages.is_empty() {
445        wprintln!(
446            writer,
447            "No corrupt pages found in {} file(s) searched.",
448            files_searched
449        )?;
450    } else {
451        wprintln!(writer)?;
452        wprintln!(
453            writer,
454            "Found {} corrupt page(s) in {} file(s) searched.",
455            corrupt_pages.len(),
456            files_searched
457        )?;
458    }
459
460    Ok(())
461}
462
463// -----------------------------------------------------------------------
464// Entry point
465// -----------------------------------------------------------------------
466
467/// Search a MySQL data directory for pages matching a given page number,
468/// or scan for pages with checksum mismatches (`--corrupt`).
469pub fn execute(opts: &FindOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
470    // Validate mutually exclusive modes
471    if opts.corrupt && opts.page.is_some() {
472        return Err(IdbError::Argument(
473            "--corrupt and --page are mutually exclusive".to_string(),
474        ));
475    }
476    if !opts.corrupt && opts.page.is_none() {
477        return Err(IdbError::Argument(
478            "Either --page or --corrupt must be specified".to_string(),
479        ));
480    }
481    if opts.corrupt && opts.checksum.is_some() {
482        return Err(IdbError::Argument(
483            "--checksum is not compatible with --corrupt".to_string(),
484        ));
485    }
486
487    let datadir = Path::new(&opts.datadir);
488    if !datadir.is_dir() {
489        return Err(IdbError::Argument(format!(
490            "Data directory does not exist: {}",
491            opts.datadir
492        )));
493    }
494
495    if opts.corrupt {
496        execute_find_corrupt(opts, writer)
497    } else {
498        execute_find_page(opts, writer)
499    }
500}