1use std::io::Write;
2use std::path::Path;
3
4use rayon::prelude::*;
5use serde::Serialize;
6
7use crate::cli::{create_progress_bar, wprintln};
8use crate::innodb::checksum::validate_checksum;
9use crate::innodb::corruption::classify_corruption;
10use crate::innodb::page::FilHeader;
11use crate::util::fs::find_tablespace_files;
12use crate::IdbError;
13
14pub struct FindOptions {
16 pub datadir: String,
18 pub page: Option<u64>,
20 pub checksum: Option<u32>,
22 pub space_id: Option<u32>,
24 pub corrupt: bool,
26 pub first: bool,
28 pub json: bool,
30 pub page_size: Option<u32>,
32 pub threads: usize,
34 pub mmap: bool,
36 pub depth: Option<u32>,
38}
39
40#[derive(Serialize)]
45struct FindResultJson {
46 datadir: String,
47 target_page: u64,
48 matches: Vec<FindMatchJson>,
49 files_searched: usize,
50}
51
52#[derive(Serialize, Clone)]
53struct FindMatchJson {
54 file: String,
55 page_number: u64,
56 checksum: u32,
57 space_id: u32,
58}
59
60#[derive(Serialize)]
65struct FindCorruptResultJson {
66 datadir: String,
67 corrupt_pages: Vec<FindCorruptMatchJson>,
68 files_searched: usize,
69 total_corrupt: usize,
70}
71
72#[derive(Serialize, Clone)]
73struct FindCorruptMatchJson {
74 file: String,
75 page_number: u64,
76 stored_checksum: u32,
77 calculated_checksum: u32,
78 algorithm: String,
79 corruption_pattern: String,
80}
81
82#[allow(clippy::too_many_arguments)]
88fn search_file(
89 ibd_path: &Path,
90 datadir: &Path,
91 target_page: u64,
92 checksum_filter: Option<u32>,
93 space_id_filter: Option<u32>,
94 page_size_override: Option<u32>,
95 first: bool,
96 use_mmap: bool,
97) -> (Vec<FindMatchJson>, bool) {
98 let display_path = ibd_path.strip_prefix(datadir).unwrap_or(ibd_path);
99
100 let path_str = ibd_path.to_string_lossy();
101 let ts_result = crate::cli::open_tablespace(&path_str, page_size_override, use_mmap);
102 let mut ts = match ts_result {
103 Ok(t) => t,
104 Err(_) => return (Vec::new(), false),
105 };
106
107 let all_data = match ts.read_all_pages() {
108 Ok(d) => d,
109 Err(_) => return (Vec::new(), true),
110 };
111
112 let page_size = ts.page_size() as usize;
113 let page_count = ts.page_count();
114
115 let file_matches: Vec<FindMatchJson> = (0..page_count)
116 .into_par_iter()
117 .filter_map(|page_num| {
118 let offset = page_num as usize * page_size;
119 if offset + page_size > all_data.len() {
120 return None;
121 }
122 let page_data = &all_data[offset..offset + page_size];
123
124 let header = FilHeader::parse(page_data)?;
125
126 if header.page_number as u64 != target_page {
127 return None;
128 }
129
130 if let Some(expected_csum) = checksum_filter {
131 if header.checksum != expected_csum {
132 return None;
133 }
134 }
135
136 if let Some(expected_sid) = space_id_filter {
137 if header.space_id != expected_sid {
138 return None;
139 }
140 }
141
142 Some(FindMatchJson {
143 file: display_path.display().to_string(),
144 page_number: header.page_number as u64,
145 checksum: header.checksum,
146 space_id: header.space_id,
147 })
148 })
149 .collect();
150
151 let matches = if first {
152 file_matches.into_iter().take(1).collect()
153 } else {
154 file_matches
155 };
156 (matches, true)
157}
158
159fn execute_find_page(opts: &FindOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
160 let target_page = opts.page.unwrap();
161 let datadir = Path::new(&opts.datadir);
162
163 let ibd_files = find_tablespace_files(datadir, &["ibd"], opts.depth)?;
164
165 if ibd_files.is_empty() {
166 if opts.json {
167 let result = FindResultJson {
168 datadir: opts.datadir.clone(),
169 target_page,
170 matches: Vec::new(),
171 files_searched: 0,
172 };
173 let json = serde_json::to_string_pretty(&result)
174 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
175 wprintln!(writer, "{}", json)?;
176 } else {
177 wprintln!(writer, "No .ibd files found in {}", opts.datadir)?;
178 }
179 return Ok(());
180 }
181
182 let pb = if !opts.json {
183 Some(create_progress_bar(ibd_files.len() as u64, "files"))
184 } else {
185 None
186 };
187
188 let checksum_filter = opts.checksum;
189 let space_id_filter = opts.space_id;
190 let page_size_override = opts.page_size;
191 let first = opts.first;
192 let use_mmap = opts.mmap;
193
194 let all_results: Vec<(Vec<FindMatchJson>, bool)> = ibd_files
195 .par_iter()
196 .map(|ibd_path| {
197 let result = search_file(
198 ibd_path,
199 datadir,
200 target_page,
201 checksum_filter,
202 space_id_filter,
203 page_size_override,
204 first,
205 use_mmap,
206 );
207 if let Some(ref pb) = pb {
208 pb.inc(1);
209 }
210 result
211 })
212 .collect();
213
214 if let Some(ref pb) = pb {
215 pb.finish_and_clear();
216 }
217
218 let mut matches: Vec<FindMatchJson> = Vec::new();
219 let files_searched: usize = all_results.iter().filter(|(_, opened)| *opened).count();
220
221 for (file_matches, _opened) in &all_results {
222 for m in file_matches {
223 if !opts.json {
224 wprintln!(
225 writer,
226 "Found page {} in {} (checksum: {}, space_id: {})",
227 target_page,
228 m.file,
229 m.checksum,
230 m.space_id
231 )?;
232 }
233 matches.push(m.clone());
234 if opts.first {
235 break;
236 }
237 }
238 if opts.first && !matches.is_empty() {
239 break;
240 }
241 }
242
243 if opts.json {
244 let result = FindResultJson {
245 datadir: opts.datadir.clone(),
246 target_page,
247 matches,
248 files_searched,
249 };
250 let json = serde_json::to_string_pretty(&result)
251 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
252 wprintln!(writer, "{}", json)?;
253 } else if matches.is_empty() {
254 wprintln!(writer, "Page {} not found in any .ibd file.", target_page)?;
255 } else {
256 wprintln!(writer)?;
257 wprintln!(
258 writer,
259 "Found {} match(es) in {} file(s) searched.",
260 matches.len(),
261 files_searched
262 )?;
263 }
264
265 Ok(())
266}
267
268fn search_file_corrupt(
274 ibd_path: &Path,
275 datadir: &Path,
276 space_id_filter: Option<u32>,
277 page_size_override: Option<u32>,
278 first: bool,
279 use_mmap: bool,
280) -> (Vec<FindCorruptMatchJson>, bool) {
281 let display_path = ibd_path.strip_prefix(datadir).unwrap_or(ibd_path);
282
283 let path_str = ibd_path.to_string_lossy();
284 let ts_result = crate::cli::open_tablespace(&path_str, page_size_override, use_mmap);
285 let mut ts = match ts_result {
286 Ok(t) => t,
287 Err(_) => return (Vec::new(), false),
288 };
289
290 let page_size = ts.page_size();
291 let page_count = ts.page_count();
292 let vendor_info = ts.vendor_info().clone();
293
294 let all_data = match ts.read_all_pages() {
295 Ok(d) => d,
296 Err(_) => return (Vec::new(), true),
297 };
298
299 let ps = page_size as usize;
300 let file_str = display_path.display().to_string();
301
302 let file_matches: Vec<FindCorruptMatchJson> = (0..page_count)
303 .into_par_iter()
304 .filter_map(|page_num| {
305 let offset = page_num as usize * ps;
306 if offset + ps > all_data.len() {
307 return None;
308 }
309 let page_data = &all_data[offset..offset + ps];
310
311 if page_data.iter().all(|&b| b == 0) {
313 return None;
314 }
315
316 if let Some(expected_sid) = space_id_filter {
318 if let Some(header) = FilHeader::parse(page_data) {
319 if header.space_id != expected_sid {
320 return None;
321 }
322 }
323 }
324
325 let csum = validate_checksum(page_data, page_size, Some(&vendor_info));
326 if csum.valid {
327 return None;
328 }
329
330 let pattern = classify_corruption(page_data, page_size);
331
332 Some(FindCorruptMatchJson {
333 file: file_str.clone(),
334 page_number: page_num,
335 stored_checksum: csum.stored_checksum,
336 calculated_checksum: csum.calculated_checksum,
337 algorithm: format!("{:?}", csum.algorithm),
338 corruption_pattern: pattern.name().to_string(),
339 })
340 })
341 .collect();
342
343 let matches = if first {
344 file_matches.into_iter().take(1).collect()
345 } else {
346 file_matches
347 };
348 (matches, true)
349}
350
351fn execute_find_corrupt(opts: &FindOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
352 let datadir = Path::new(&opts.datadir);
353
354 let ibd_files = find_tablespace_files(datadir, &["ibd"], opts.depth)?;
355
356 if ibd_files.is_empty() {
357 if opts.json {
358 let result = FindCorruptResultJson {
359 datadir: opts.datadir.clone(),
360 corrupt_pages: Vec::new(),
361 files_searched: 0,
362 total_corrupt: 0,
363 };
364 let json = serde_json::to_string_pretty(&result)
365 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
366 wprintln!(writer, "{}", json)?;
367 } else {
368 wprintln!(writer, "No .ibd files found in {}", opts.datadir)?;
369 }
370 return Ok(());
371 }
372
373 let pb = if !opts.json {
374 Some(create_progress_bar(ibd_files.len() as u64, "files"))
375 } else {
376 None
377 };
378
379 let space_id_filter = opts.space_id;
380 let page_size_override = opts.page_size;
381 let first = opts.first;
382 let use_mmap = opts.mmap;
383
384 let all_results: Vec<(Vec<FindCorruptMatchJson>, bool)> = ibd_files
385 .par_iter()
386 .map(|ibd_path| {
387 let result = search_file_corrupt(
388 ibd_path,
389 datadir,
390 space_id_filter,
391 page_size_override,
392 first,
393 use_mmap,
394 );
395 if let Some(ref pb) = pb {
396 pb.inc(1);
397 }
398 result
399 })
400 .collect();
401
402 if let Some(ref pb) = pb {
403 pb.finish_and_clear();
404 }
405
406 let mut corrupt_pages: Vec<FindCorruptMatchJson> = Vec::new();
407 let files_searched: usize = all_results.iter().filter(|(_, opened)| *opened).count();
408
409 for (file_matches, _opened) in &all_results {
410 for m in file_matches {
411 if !opts.json {
412 wprintln!(
413 writer,
414 "Corrupt page {} in {} (stored: 0x{:08x}, calculated: 0x{:08x}, algo: {}, pattern: {})",
415 m.page_number,
416 m.file,
417 m.stored_checksum,
418 m.calculated_checksum,
419 m.algorithm,
420 m.corruption_pattern
421 )?;
422 }
423 corrupt_pages.push(m.clone());
424 if opts.first {
425 break;
426 }
427 }
428 if opts.first && !corrupt_pages.is_empty() {
429 break;
430 }
431 }
432
433 if opts.json {
434 let total_corrupt = corrupt_pages.len();
435 let result = FindCorruptResultJson {
436 datadir: opts.datadir.clone(),
437 corrupt_pages,
438 files_searched,
439 total_corrupt,
440 };
441 let json = serde_json::to_string_pretty(&result)
442 .map_err(|e| IdbError::Parse(format!("JSON serialization error: {}", e)))?;
443 wprintln!(writer, "{}", json)?;
444 } else if corrupt_pages.is_empty() {
445 wprintln!(
446 writer,
447 "No corrupt pages found in {} file(s) searched.",
448 files_searched
449 )?;
450 } else {
451 wprintln!(writer)?;
452 wprintln!(
453 writer,
454 "Found {} corrupt page(s) in {} file(s) searched.",
455 corrupt_pages.len(),
456 files_searched
457 )?;
458 }
459
460 Ok(())
461}
462
463pub fn execute(opts: &FindOptions, writer: &mut dyn Write) -> Result<(), IdbError> {
470 if opts.corrupt && opts.page.is_some() {
472 return Err(IdbError::Argument(
473 "--corrupt and --page are mutually exclusive".to_string(),
474 ));
475 }
476 if !opts.corrupt && opts.page.is_none() {
477 return Err(IdbError::Argument(
478 "Either --page or --corrupt must be specified".to_string(),
479 ));
480 }
481 if opts.corrupt && opts.checksum.is_some() {
482 return Err(IdbError::Argument(
483 "--checksum is not compatible with --corrupt".to_string(),
484 ));
485 }
486
487 let datadir = Path::new(&opts.datadir);
488 if !datadir.is_dir() {
489 return Err(IdbError::Argument(format!(
490 "Data directory does not exist: {}",
491 opts.datadir
492 )));
493 }
494
495 if opts.corrupt {
496 execute_find_corrupt(opts, writer)
497 } else {
498 execute_find_page(opts, writer)
499 }
500}