1#![deny(clippy::pedantic)]
2
3use std::collections::BTreeMap;
4use std::fs::File;
5use std::io::{self, BufRead, BufReader, IsTerminal, Read};
6use std::path::{Path, PathBuf};
7
8use blake2::{Blake2b512, Blake2s256};
9use digest::Digest;
10use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle};
11use rayon::prelude::*;
12use walkdir::WalkDir;
13
14#[derive(Copy, Clone, Debug)]
16pub enum Algorithm {
17 Md5,
18 Sha1,
19 Sha256,
20 Sha512,
21 Sha3_256,
22 Sha3_512,
23 Blake2s,
24 Blake2b,
25 Blake3,
26}
27
28#[derive(Clone, Debug)]
30pub struct Target {
31 pub hash: Vec<u8>,
32 pub name: Option<String>,
33}
34
35pub struct SearchConfig {
37 pub dir: PathBuf,
38 pub algorithm: Algorithm,
39 pub targets: Vec<Target>,
40 pub threads: Option<usize>,
41}
42
43#[derive(Clone, Debug)]
45pub struct MatchResult {
46 pub path: PathBuf,
47 pub target: Target,
48}
49
50#[derive(Debug)]
52pub struct SearchReport {
53 pub matches: Vec<MatchResult>,
54 pub total_files_checked: usize,
55 pub failed_files: Vec<FileCheckFailure>,
56}
57
58#[derive(Debug)]
60pub struct FileCheckFailure {
61 pub path: PathBuf,
62 pub error: String,
63}
64
65pub fn search(config: &SearchConfig) -> io::Result<SearchReport> {
103 if config.targets.is_empty() {
104 return Err(io::Error::new(
105 io::ErrorKind::InvalidInput,
106 "at least one target is required",
107 ));
108 }
109
110 if let Some(threads) = config.threads {
111 status_message(&format!("configuring {threads} hashing threads"));
112 rayon::ThreadPoolBuilder::new()
113 .num_threads(threads)
114 .build_global()
115 .map_err(io::Error::other)?;
116 }
117
118 status_message("preparing targets");
119 let (name_map, hash_only) = split_targets(&config.targets);
120 status_message("scanning directory tree");
121 let search_root = config.dir.canonicalize()?;
122
123 let file_paths: Vec<PathBuf> = WalkDir::new(&search_root)
124 .follow_links(false)
125 .into_iter()
126 .par_bridge()
127 .filter_map(Result::ok)
128 .filter(|entry| entry.file_type().is_file())
129 .map(|entry| entry.path().to_path_buf())
130 .collect();
131
132 status_message(&format!(
133 "found {} file(s); hashing in progress",
134 file_paths.len()
135 ));
136 let progress = progress_bar(file_paths.len());
137 let results = file_paths
138 .par_iter()
139 .progress_with(progress.clone())
140 .filter_map(|path| {
141 let file_name = path
142 .file_name()
143 .map(|value| value.to_string_lossy().to_string())
144 .unwrap_or_default();
145 let name_targets = name_map.get(&file_name).cloned().unwrap_or_default();
146 let needs_hash = !name_targets.is_empty() || !hash_only.is_empty();
147 if !needs_hash {
148 return Some(ResultEntry::SkippedNameMismatch);
149 }
150 let hash = match compute_hash(path, config.algorithm) {
151 Ok(value) => value,
152 Err(err) => return Some(ResultEntry::Error {
153 path: path.clone(),
154 err,
155 }),
156 };
157 let mut matches = Vec::new();
158 for idx in name_targets.iter().chain(hash_only.iter()) {
159 if config.targets[*idx].hash == hash {
160 matches.push(*idx);
161 }
162 }
163 Some(ResultEntry::Hashed {
164 path: path.clone(),
165 matches,
166 })
167 })
168 .collect::<Vec<_>>();
169 progress.finish_with_message("scan complete");
170
171 status_message("summarizing results");
172 let mut output = Vec::new();
173 let mut failures = Vec::new();
174 let mut total_files_checked = 0usize;
175 for result in results {
176 total_files_checked += 1;
177 match result {
178 ResultEntry::Hashed { path, matches } => {
179 for idx in matches {
180 output.push(MatchResult {
181 path: normalize_path(&path),
182 target: config.targets[idx].clone(),
183 });
184 }
185 }
186 ResultEntry::Error { path, err } => {
187 failures.push(FileCheckFailure {
188 path: normalize_path(&path),
189 error: err.to_string(),
190 });
191 eprintln!("failed to hash {}: {err}", path.display());
192 }
193 ResultEntry::SkippedNameMismatch => {}
194 }
195 }
196
197 Ok(SearchReport {
198 matches: output,
199 total_files_checked,
200 failed_files: failures,
201 })
202}
203
204fn status_message(message: &str) {
205 if io::stderr().is_terminal() {
206 eprintln!("{message}");
207 }
208}
209
210fn progress_bar(total_files: usize) -> ProgressBar {
211 if !io::stderr().is_terminal() {
212 return ProgressBar::hidden();
213 }
214
215 let progress = ProgressBar::new(total_files as u64);
216 let style = ProgressStyle::with_template(
217 "{spinner:.green} [{elapsed_precise}] {msg} {bar:40.cyan/blue} {pos}/{len} files ({eta})",
218 )
219 .unwrap_or_else(|_| ProgressStyle::default_bar())
220 .progress_chars("=>-");
221 progress.set_style(style);
222 progress.set_message("hashing");
223 progress
224}
225
226#[cfg(windows)]
227fn normalize_path(path: &Path) -> PathBuf {
228 let path_str = path.to_string_lossy();
229 if let Some(stripped) = path_str.strip_prefix(r"\\?\") {
230 if let Some(unc_path) = stripped.strip_prefix("UNC\\") {
231 PathBuf::from(format!(r"\\{}", unc_path))
232 } else {
233 PathBuf::from(stripped)
234 }
235 } else {
236 path.to_path_buf()
237 }
238}
239
240#[cfg(not(windows))]
241fn normalize_path(path: &Path) -> PathBuf {
242 path.to_path_buf()
243}
244
245pub fn load_batch(path: &Path) -> io::Result<Vec<Target>> {
275 let file = File::open(path)?;
276 let reader = BufReader::new(file);
277 let mut targets = Vec::new();
278 for (line_number, line) in reader.lines().enumerate() {
279 let line = line?;
280 let line = line.trim();
281 if line.is_empty() || line.starts_with('#') {
282 continue;
283 }
284 let mut parts = line.split_whitespace();
285 let hash = parts
286 .next()
287 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing hash"))?;
288 let name = parts.next().map(std::string::ToString::to_string);
289 if parts.next().is_some() {
290 return Err(io::Error::new(
291 io::ErrorKind::InvalidData,
292 format!("line {}: too many fields", line_number + 1),
293 ));
294 }
295 let hash = parse_hex(hash).map_err(|err| {
296 io::Error::new(
297 io::ErrorKind::InvalidData,
298 format!("line {}: {err}", line_number + 1),
299 )
300 })?;
301 targets.push(Target { hash, name });
302 }
303 Ok(targets)
304}
305
306pub fn parse_hex(input: &str) -> io::Result<Vec<u8>> {
324 let cleaned = input.trim();
325 hex::decode(cleaned).map_err(|err| io::Error::new(io::ErrorKind::InvalidInput, err))
326}
327
328enum ResultEntry {
329 Hashed { path: PathBuf, matches: Vec<usize> },
330 Error { path: PathBuf, err: io::Error },
331 SkippedNameMismatch,
332}
333
334fn split_targets(targets: &[Target]) -> (BTreeMap<String, Vec<usize>>, Vec<usize>) {
344 let mut name_map: BTreeMap<String, Vec<usize>> = BTreeMap::new();
345 let mut hash_only = Vec::new();
346 for (idx, target) in targets.iter().enumerate() {
347 if let Some(name) = &target.name {
348 name_map.entry(name.clone()).or_default().push(idx);
349 } else {
350 hash_only.push(idx);
351 }
352 }
353 (name_map, hash_only)
354}
355
356fn compute_hash(path: &Path, algo: Algorithm) -> io::Result<Vec<u8>> {
367 let file = File::open(path)?;
368 let mut reader = BufReader::new(file);
369 match algo {
370 Algorithm::Md5 => hash_with_digest::<md5::Md5>(&mut reader),
371 Algorithm::Sha1 => hash_with_digest::<sha1::Sha1>(&mut reader),
372 Algorithm::Sha256 => hash_with_digest::<sha2::Sha256>(&mut reader),
373 Algorithm::Sha512 => hash_with_digest::<sha2::Sha512>(&mut reader),
374 Algorithm::Sha3_256 => hash_with_digest::<sha3::Sha3_256>(&mut reader),
375 Algorithm::Sha3_512 => hash_with_digest::<sha3::Sha3_512>(&mut reader),
376 Algorithm::Blake2s => hash_with_digest::<Blake2s256>(&mut reader),
377 Algorithm::Blake2b => hash_with_digest::<Blake2b512>(&mut reader),
378 Algorithm::Blake3 => hash_blake3(&mut reader),
379 }
380}
381
382fn hash_with_digest<D: Digest>(reader: &mut BufReader<File>) -> io::Result<Vec<u8>> {
391 let mut hasher = D::new();
392 let mut buffer = vec![0u8; 128 * 1024];
393 loop {
394 let read = reader.read(&mut buffer)?;
395 if read == 0 {
396 break;
397 }
398 hasher.update(&buffer[..read]);
399 }
400 Ok(hasher.finalize().to_vec())
401}
402
403fn hash_blake3(reader: &mut BufReader<File>) -> io::Result<Vec<u8>> {
412 let mut hasher = blake3::Hasher::new();
413 let mut buffer = vec![0u8; 128 * 1024];
414 loop {
415 let read = reader.read(&mut buffer)?;
416 if read == 0 {
417 break;
418 }
419 hasher.update(&buffer[..read]);
420 }
421 Ok(hasher.finalize().as_bytes().to_vec())
422}
423
424#[cfg(test)]
425mod tests {
426 use super::*;
427 use std::fs;
428 use std::io::Write;
429
430 fn write_file(dir: &tempfile::TempDir, name: &str, contents: &[u8]) -> PathBuf {
431 let path = dir.path().join(name);
432 let mut file = File::create(&path).expect("create file");
433 file.write_all(contents).expect("write file");
434 path
435 }
436
437 #[test]
438 fn parse_hex_trims_and_parses() {
439 let bytes = parse_hex(" 0a0b0c ").expect("parse hex");
440 assert_eq!(bytes, vec![0x0a, 0x0b, 0x0c]);
441 }
442
443 #[test]
444 fn parse_hex_rejects_invalid() {
445 let err = parse_hex("not-hex").expect_err("invalid hex should fail");
446 assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
447 }
448
449 #[test]
450 fn load_batch_parses_names_and_hash_only() {
451 let dir = tempfile::tempdir().expect("tempdir");
452 let path = dir.path().join("batch.txt");
453 fs::write(
454 &path,
455 "0a0b0c report.txt\n\n# comment line\n0d0e0f\n",
456 )
457 .expect("write batch");
458 let targets = load_batch(&path).expect("load batch");
459 assert_eq!(targets.len(), 2);
460 assert_eq!(targets[0].hash, vec![0x0a, 0x0b, 0x0c]);
461 assert_eq!(targets[0].name.as_deref(), Some("report.txt"));
462 assert_eq!(targets[1].hash, vec![0x0d, 0x0e, 0x0f]);
463 assert!(targets[1].name.is_none());
464 }
465
466 #[test]
467 fn load_batch_reports_too_many_fields() {
468 let dir = tempfile::tempdir().expect("tempdir");
469 let path = dir.path().join("batch.txt");
470 fs::write(&path, "0a0b0c one two\n").expect("write batch");
471 let err = load_batch(&path).expect_err("expected too many fields error");
472 assert_eq!(err.kind(), io::ErrorKind::InvalidData);
473 assert!(err.to_string().contains("line 1"));
474 }
475
476 #[test]
477 fn load_batch_reports_invalid_hex_with_line_number() {
478 let dir = tempfile::tempdir().expect("tempdir");
479 let path = dir.path().join("batch.txt");
480 fs::write(&path, "0a0b0c\nzzzz\n").expect("write batch");
481 let err = load_batch(&path).expect_err("expected invalid hex");
482 assert_eq!(err.kind(), io::ErrorKind::InvalidData);
483 assert!(err.to_string().contains("line 2"));
484 }
485
486 #[test]
487 fn split_targets_separates_named_and_hash_only() {
488 let targets = vec![
489 Target {
490 hash: vec![1],
491 name: Some("a.txt".to_string()),
492 },
493 Target {
494 hash: vec![2],
495 name: None,
496 },
497 Target {
498 hash: vec![3],
499 name: Some("a.txt".to_string()),
500 },
501 ];
502 let (name_map, hash_only) = split_targets(&targets);
503 assert_eq!(hash_only, vec![1]);
504 let entries = name_map.get("a.txt").expect("name entry");
505 assert_eq!(entries, &vec![0, 2]);
506 }
507
508 #[test]
509 fn compute_hash_errors_for_missing_file() {
510 let path = PathBuf::from("missing-file");
511 let err = compute_hash(&path, Algorithm::Sha256).expect_err("missing file");
512 assert_eq!(err.kind(), io::ErrorKind::NotFound);
513 }
514
515 #[test]
516 fn hash_with_digest_matches_md5() {
517 let dir = tempfile::tempdir().expect("tempdir");
518 let path = write_file(&dir, "file.txt", b"hash-hunter");
519 let file = File::open(&path).expect("open file");
520 let mut reader = BufReader::new(file);
521 let hash = hash_with_digest::<md5::Md5>(&mut reader).expect("hash");
522 let expected = md5::Md5::digest(b"hash-hunter").to_vec();
523 assert_eq!(hash, expected);
524 }
525
526 #[test]
527 fn hash_blake3_matches_expected() {
528 let dir = tempfile::tempdir().expect("tempdir");
529 let path = write_file(&dir, "file.txt", b"hash-hunter");
530 let file = File::open(&path).expect("open file");
531 let mut reader = BufReader::new(file);
532 let hash = hash_blake3(&mut reader).expect("hash");
533 let expected = blake3::hash(b"hash-hunter").as_bytes().to_vec();
534 assert_eq!(hash, expected);
535 }
536
537 #[test]
538 fn compute_hash_supports_all_algorithms() {
539 let dir = tempfile::tempdir().expect("tempdir");
540 let path = write_file(&dir, "file.txt", b"hash-hunter");
541 let cases = [
542 (Algorithm::Md5, md5::Md5::digest(b"hash-hunter").to_vec()),
543 (Algorithm::Sha1, sha1::Sha1::digest(b"hash-hunter").to_vec()),
544 (Algorithm::Sha256, sha2::Sha256::digest(b"hash-hunter").to_vec()),
545 (Algorithm::Sha512, sha2::Sha512::digest(b"hash-hunter").to_vec()),
546 (
547 Algorithm::Sha3_256,
548 sha3::Sha3_256::digest(b"hash-hunter").to_vec(),
549 ),
550 (
551 Algorithm::Sha3_512,
552 sha3::Sha3_512::digest(b"hash-hunter").to_vec(),
553 ),
554 (Algorithm::Blake2s, Blake2s256::digest(b"hash-hunter").to_vec()),
555 (Algorithm::Blake2b, Blake2b512::digest(b"hash-hunter").to_vec()),
556 (
557 Algorithm::Blake3,
558 blake3::hash(b"hash-hunter").as_bytes().to_vec(),
559 ),
560 ];
561 for (algo, expected) in cases {
562 let digest = compute_hash(&path, algo).expect("compute hash");
563 assert_eq!(digest, expected, "mismatch for {algo:?}");
564 }
565 }
566
567 #[test]
568 fn search_requires_targets() {
569 let dir = tempfile::tempdir().expect("tempdir");
570 let config = SearchConfig {
571 dir: dir.path().to_path_buf(),
572 algorithm: Algorithm::Sha256,
573 targets: Vec::new(),
574 threads: None,
575 };
576 let err = search(&config).expect_err("should require targets");
577 assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
578 }
579
580 #[test]
581 fn search_matches_named_and_hash_only_targets() {
582 let dir = tempfile::tempdir().expect("tempdir");
583 let alpha_path = write_file(&dir, "alpha.txt", b"alpha");
584 let beta_path = write_file(&dir, "beta.txt", b"beta");
585 let alpha_hash = compute_hash(&alpha_path, Algorithm::Sha256).expect("hash");
586 let beta_hash = compute_hash(&beta_path, Algorithm::Sha256).expect("hash");
587 let targets = vec![
588 Target {
589 hash: alpha_hash.clone(),
590 name: Some("alpha.txt".to_string()),
591 },
592 Target {
593 hash: beta_hash.clone(),
594 name: None,
595 },
596 Target {
597 hash: vec![0xff],
598 name: Some("missing.txt".to_string()),
599 },
600 ];
601 let config = SearchConfig {
602 dir: dir.path().to_path_buf(),
603 algorithm: Algorithm::Sha256,
604 targets: targets.clone(),
605 threads: None,
606 };
607 let report = search(&config).expect("search");
608 assert_eq!(report.matches.len(), 2);
609 let mut matched_paths: Vec<_> = report
610 .matches
611 .iter()
612 .map(|result| result.path.clone())
613 .collect();
614 matched_paths.sort();
615 let mut expected = vec![alpha_path, beta_path];
616 expected.sort();
617 assert_eq!(matched_paths, expected);
618 let matched_targets: Vec<_> = report
619 .matches
620 .into_iter()
621 .map(|result| result.target)
622 .collect();
623 assert!(matched_targets.iter().any(|target| {
624 target.hash == targets[0].hash && target.name == targets[0].name
625 }));
626 assert!(matched_targets.iter().any(|target| {
627 target.hash == targets[1].hash && target.name == targets[1].name
628 }));
629 }
630
631 #[test]
632 fn search_counts_name_mismatches_as_checked() {
633 let dir = tempfile::tempdir().expect("tempdir");
634 let alpha_path = write_file(&dir, "alpha.txt", b"alpha");
635 write_file(&dir, "beta.txt", b"beta");
636 let alpha_hash = compute_hash(&alpha_path, Algorithm::Sha256).expect("hash");
637 let targets = vec![Target {
638 hash: alpha_hash,
639 name: Some("alpha.txt".to_string()),
640 }];
641 let config = SearchConfig {
642 dir: dir.path().to_path_buf(),
643 algorithm: Algorithm::Sha256,
644 targets,
645 threads: None,
646 };
647 let report = search(&config).expect("search");
648 assert_eq!(report.total_files_checked, 2);
649 }
650}