1use std::fs::File;
2use std::io::{BufReader, Read, Seek, SeekFrom};
3use std::num::NonZero;
4use std::path::{Path, PathBuf};
5use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
6use std::thread::{self};
7
8use content_inspector::{ContentType, inspect};
9use fancy_regex::Regex as FancyRegex;
10use ignore::overrides::Override;
11use ignore::{WalkBuilder, WalkState};
12use regex::Regex;
13
14use crate::{
15 line_reader::{BufReadExt, LineEnding},
16 replace::{self, ReplaceResult},
17};
18
19#[derive(Clone, Debug, PartialEq, Eq)]
20pub struct SearchResult {
21 pub path: PathBuf,
22 pub line_number: usize,
24 pub line: String,
25 pub line_ending: LineEnding,
26 pub included: bool,
27}
28
29#[derive(Clone, Debug, PartialEq, Eq)]
30pub struct SearchResultWithReplacement {
31 pub search_result: SearchResult,
32 pub replacement: String,
33 pub replace_result: Option<ReplaceResult>,
34}
35
36impl SearchResultWithReplacement {
37 pub fn display_error(&self) -> (String, &str) {
38 let error = match &self.replace_result {
39 Some(ReplaceResult::Error(error)) => error,
40 None => panic!("Found error result with no error message"),
41 Some(ReplaceResult::Success) => {
42 panic!("Found successful result in errors: {self:?}")
43 }
44 };
45
46 let path_display = format!(
47 "{}:{}",
48 self.search_result.path.display(),
49 self.search_result.line_number
50 );
51
52 (path_display, error)
53 }
54}
55
56#[derive(Clone, Debug)]
57pub enum SearchType {
58 Pattern(Regex),
59 PatternAdvanced(FancyRegex),
60 Fixed(String),
61}
62
63impl SearchType {
64 pub fn is_empty(&self) -> bool {
65 let str = match &self {
66 SearchType::Pattern(r) => &r.to_string(),
67 SearchType::PatternAdvanced(r) => &r.to_string(),
68 SearchType::Fixed(s) => s,
69 };
70 str.is_empty()
71 }
72}
73
74type FileVisitor = Box<dyn FnMut(Vec<SearchResult>) -> WalkState + Send>;
76
77impl FileSearcher {
78 pub fn search(&self) -> &SearchType {
79 &self.search_config.search
80 }
81
82 pub fn replace(&self) -> &String {
83 &self.search_config.replace
84 }
85}
86
87#[derive(Clone, Debug)]
89pub struct RegexOptions {
90 pub whole_word: bool,
92 pub match_case: bool,
94}
95
96#[derive(Clone, Debug)]
97pub struct ParsedSearchConfig {
98 pub search: SearchType,
100 pub replace: String,
102}
103
104#[derive(Clone, Debug)]
105pub struct ParsedDirConfig {
106 pub overrides: Override,
108 pub root_dir: PathBuf,
110 pub include_hidden: bool,
112}
113
114#[derive(Clone, Debug)]
115pub struct FileSearcher {
116 search_config: ParsedSearchConfig,
117 dir_config: ParsedDirConfig,
118}
119
120impl FileSearcher {
121 pub fn new(search_config: ParsedSearchConfig, dir_config: ParsedDirConfig) -> Self {
122 Self {
123 search_config,
124 dir_config,
125 }
126 }
127
128 fn build_walker(&self) -> ignore::WalkParallel {
129 let num_threads = thread::available_parallelism()
130 .map(NonZero::get)
131 .unwrap_or(4)
132 .min(12);
133
134 WalkBuilder::new(&self.dir_config.root_dir)
135 .hidden(!self.dir_config.include_hidden)
136 .overrides(self.dir_config.overrides.clone())
137 .threads(num_threads)
138 .build_parallel()
139 }
140
141 pub fn walk_files<F>(&self, cancelled: Option<&AtomicBool>, mut file_handler: F)
194 where
195 F: FnMut() -> FileVisitor + Send,
196 {
197 if let Some(cancelled) = cancelled {
198 cancelled.store(false, Ordering::Relaxed);
199 }
200
201 let walker = self.build_walker();
202 walker.run(|| {
203 let mut on_file_found = file_handler();
204 Box::new(move |result| {
205 if let Some(cancelled) = cancelled {
206 if cancelled.load(Ordering::Relaxed) {
207 return WalkState::Quit;
208 }
209 }
210
211 let Ok(entry) = result else {
212 return WalkState::Continue;
213 };
214
215 if is_searchable(&entry) {
216 let results = match search_file(entry.path(), &self.search_config.search) {
217 Ok(r) => r,
218 Err(e) => {
219 log::warn!(
220 "Skipping {} due to error when searching: {e}",
221 entry.path().display()
222 );
223 return WalkState::Continue;
224 }
225 };
226
227 if !results.is_empty() {
228 return on_file_found(results);
229 }
230 }
231 WalkState::Continue
232 })
233 });
234 }
235
236 pub fn walk_files_and_replace(&self, cancelled: Option<&AtomicBool>) -> usize {
251 if let Some(cancelled) = cancelled {
252 cancelled.store(false, Ordering::Relaxed);
253 }
254
255 let num_files_replaced_in = std::sync::Arc::new(AtomicUsize::new(0));
256
257 let walker = self.build_walker();
258 walker.run(|| {
259 let counter = num_files_replaced_in.clone();
260
261 Box::new(move |result| {
262 if let Some(cancelled) = cancelled {
263 if cancelled.load(Ordering::Relaxed) {
264 return WalkState::Quit;
265 }
266 }
267
268 let Ok(entry) = result else {
269 return WalkState::Continue;
270 };
271
272 if is_searchable(&entry) {
273 match replace::replace_all_in_file(entry.path(), self.search(), self.replace())
274 {
275 Ok(replaced_in_file) => {
276 if replaced_in_file {
277 counter.fetch_add(1, Ordering::Relaxed);
278 }
279 }
280 Err(e) => {
281 log::error!(
282 "Found error when performing replacement in {path_display}: {e}",
283 path_display = entry.path().display()
284 );
285 }
286 }
287 }
288 WalkState::Continue
289 })
290 });
291
292 num_files_replaced_in.load(Ordering::Relaxed)
293 }
294}
295
296const BINARY_EXTENSIONS: &[&str] = &[
297 "png", "gif", "jpg", "jpeg", "ico", "svg", "pdf", "exe", "dll", "so", "bin", "class", "jar",
298 "zip", "gz", "bz2", "xz", "7z", "tar",
299];
300
301fn is_likely_binary(path: &Path) -> bool {
302 path.extension()
303 .and_then(|ext| ext.to_str())
304 .is_some_and(|ext_str| {
305 BINARY_EXTENSIONS
306 .iter()
307 .any(|&bin_ext| ext_str.eq_ignore_ascii_case(bin_ext))
308 })
309}
310
311fn is_searchable(entry: &ignore::DirEntry) -> bool {
312 entry.file_type().is_some_and(|ft| ft.is_file()) && !is_likely_binary(entry.path())
313}
314
315pub fn contains_search(line: &str, search: &SearchType) -> bool {
316 match search {
317 SearchType::Fixed(fixed_str) => line.contains(fixed_str),
318 SearchType::Pattern(pattern) => pattern.is_match(line),
319 SearchType::PatternAdvanced(pattern) => pattern.is_match(line).is_ok_and(|r| r),
320 }
321}
322
323pub fn search_file(path: &Path, search: &SearchType) -> anyhow::Result<Vec<SearchResult>> {
324 if search.is_empty() {
325 return Ok(vec![]);
326 }
327 let mut file = File::open(path)?;
328
329 let mut probe = [0u8; 8192];
331 let read = file.read(&mut probe).unwrap_or(0);
332 if matches!(inspect(&probe[..read]), ContentType::BINARY) {
333 return Ok(Vec::new());
334 }
335 file.seek(SeekFrom::Start(0))?;
336
337 let reader = BufReader::with_capacity(16384, file);
338 let mut results = Vec::new();
339
340 let mut read_errors = 0;
341
342 for (mut line_number, line_result) in reader.lines_with_endings().enumerate() {
343 line_number += 1; let (line_bytes, line_ending) = match line_result {
346 Ok(l) => l,
347 Err(err) => {
348 read_errors += 1;
349 log::warn!(
350 "Error retrieving line {line_number} of {}: {err}",
351 path.display()
352 );
353 if read_errors >= 10 {
354 anyhow::bail!(
355 "Aborting search of {path:?}: too many read errors ({read_errors}). Most recent error: {err}",
356 );
357 }
358 continue;
359 }
360 };
361
362 if let Ok(line) = String::from_utf8(line_bytes) {
363 if contains_search(&line, search) {
364 let result = SearchResult {
365 path: path.to_path_buf(),
366 line_number,
367 line,
368 line_ending,
369 included: true,
370 };
371 results.push(result);
372 }
373 }
374 }
375
376 Ok(results)
377}
378
379#[cfg(test)]
380mod tests {
381 use super::*;
382
383 mod test_helpers {
384 use super::*;
385
386 pub fn create_test_search_result_with_replacement(
387 path: &str,
388 line_number: usize,
389 replace_result: Option<ReplaceResult>,
390 ) -> SearchResultWithReplacement {
391 SearchResultWithReplacement {
392 search_result: SearchResult {
393 path: PathBuf::from(path),
394 line_number,
395 line: "test line".to_string(),
396 line_ending: LineEnding::Lf,
397 included: true,
398 },
399 replacement: "replacement".to_string(),
400 replace_result,
401 }
402 }
403
404 pub fn create_fixed_search(term: &str) -> SearchType {
405 SearchType::Fixed(term.to_string())
406 }
407
408 pub fn create_pattern_search(pattern: &str) -> SearchType {
409 SearchType::Pattern(Regex::new(pattern).unwrap())
410 }
411
412 pub fn create_advanced_pattern_search(pattern: &str) -> SearchType {
413 SearchType::PatternAdvanced(FancyRegex::new(pattern).unwrap())
414 }
415 }
416
417 mod unicode_handling {
418 use super::*;
419
420 #[test]
421 fn test_complex_unicode_replacement() {
422 let text = "ASCII text with 世界 (CJK), Здравствуйте (Cyrillic), 안녕하세요 (Hangul), αβγδ (Greek), עִבְרִית (Hebrew)";
423 let search = SearchType::Fixed("世界".to_string());
424
425 let result = replace::replacement_if_match(text, &search, "World");
426
427 assert_eq!(
428 result,
429 Some("ASCII text with World (CJK), Здравствуйте (Cyrillic), 안녕하세요 (Hangul), αβγδ (Greek), עִבְרִית (Hebrew)".to_string())
430 );
431 }
432
433 #[test]
434 fn test_unicode_normalization() {
435 let text = "café";
436 let search = SearchType::Fixed("é".to_string());
437 assert_eq!(
438 replace::replacement_if_match(text, &search, "e"),
439 Some("cafe".to_string())
440 );
441 }
442
443 #[test]
444 fn test_unicode_regex_classes() {
445 let text = "Latin A, Cyrillic Б, Greek Γ, Hebrew א";
446
447 let search = SearchType::Pattern(Regex::new(r"\p{Cyrillic}").unwrap());
448 assert_eq!(
449 replace::replacement_if_match(text, &search, "X"),
450 Some("Latin A, Cyrillic X, Greek Γ, Hebrew א".to_string())
451 );
452
453 let search = SearchType::Pattern(Regex::new(r"\p{Greek}").unwrap());
454 assert_eq!(
455 replace::replacement_if_match(text, &search, "X"),
456 Some("Latin A, Cyrillic Б, Greek X, Hebrew א".to_string())
457 );
458 }
459
460 #[test]
461 fn test_unicode_capture_groups() {
462 let text = "Name: 李明 (ID: A12345)";
463
464 let search =
465 SearchType::Pattern(Regex::new(r"Name: (\p{Han}+) \(ID: ([A-Z0-9]+)\)").unwrap());
466 assert_eq!(
467 replace::replacement_if_match(text, &search, "ID $2 belongs to $1"),
468 Some("ID A12345 belongs to 李明".to_string())
469 );
470 }
471 }
472
473 mod replace_any {
474 use super::*;
475
476 #[test]
477 fn test_simple_match_subword() {
478 assert_eq!(
479 replace::replacement_if_match(
480 "foobarbaz",
481 &SearchType::Fixed("bar".to_string()),
482 "REPL"
483 ),
484 Some("fooREPLbaz".to_string())
485 );
486 assert_eq!(
487 replace::replacement_if_match(
488 "foobarbaz",
489 &SearchType::Pattern(Regex::new(r"bar").unwrap()),
490 "REPL"
491 ),
492 Some("fooREPLbaz".to_string())
493 );
494 assert_eq!(
495 replace::replacement_if_match(
496 "foobarbaz",
497 &SearchType::PatternAdvanced(FancyRegex::new(r"bar").unwrap()),
498 "REPL"
499 ),
500 Some("fooREPLbaz".to_string())
501 );
502 }
503
504 #[test]
505 fn test_no_match() {
506 assert_eq!(
507 replace::replacement_if_match(
508 "foobarbaz",
509 &SearchType::Fixed("xyz".to_string()),
510 "REPL"
511 ),
512 None
513 );
514 assert_eq!(
515 replace::replacement_if_match(
516 "foobarbaz",
517 &SearchType::Pattern(Regex::new(r"xyz").unwrap()),
518 "REPL"
519 ),
520 None
521 );
522 assert_eq!(
523 replace::replacement_if_match(
524 "foobarbaz",
525 &SearchType::PatternAdvanced(FancyRegex::new(r"xyz").unwrap()),
526 "REPL"
527 ),
528 None
529 );
530 }
531
532 #[test]
533 fn test_word_boundaries() {
534 assert_eq!(
535 replace::replacement_if_match(
536 "foo bar baz",
537 &SearchType::Pattern(Regex::new(r"\bbar\b").unwrap()),
538 "REPL"
539 ),
540 Some("foo REPL baz".to_string())
541 );
542 assert_eq!(
543 replace::replacement_if_match(
544 "embargo",
545 &SearchType::Pattern(Regex::new(r"\bbar\b").unwrap()),
546 "REPL"
547 ),
548 None
549 );
550 assert_eq!(
551 replace::replacement_if_match(
552 "foo bar baz",
553 &SearchType::PatternAdvanced(FancyRegex::new(r"\bbar\b").unwrap()),
554 "REPL"
555 ),
556 Some("foo REPL baz".to_string())
557 );
558 assert_eq!(
559 replace::replacement_if_match(
560 "embargo",
561 &SearchType::PatternAdvanced(FancyRegex::new(r"\bbar\b").unwrap()),
562 "REPL"
563 ),
564 None
565 );
566 }
567
568 #[test]
569 fn test_capture_groups() {
570 assert_eq!(
571 replace::replacement_if_match(
572 "John Doe",
573 &SearchType::Pattern(Regex::new(r"(\w+)\s+(\w+)").unwrap()),
574 "$2, $1"
575 ),
576 Some("Doe, John".to_string())
577 );
578 assert_eq!(
579 replace::replacement_if_match(
580 "John Doe",
581 &SearchType::PatternAdvanced(FancyRegex::new(r"(\w+)\s+(\w+)").unwrap()),
582 "$2, $1"
583 ),
584 Some("Doe, John".to_string())
585 );
586 }
587
588 #[test]
589 fn test_lookaround() {
590 assert_eq!(
591 replace::replacement_if_match(
592 "123abc456",
593 &SearchType::PatternAdvanced(
594 FancyRegex::new(r"(?<=\d{3})abc(?=\d{3})").unwrap()
595 ),
596 "REPL"
597 ),
598 Some("123REPL456".to_string())
599 );
600 }
601
602 #[test]
603 fn test_quantifiers() {
604 assert_eq!(
605 replace::replacement_if_match(
606 "aaa123456bbb",
607 &SearchType::Pattern(Regex::new(r"\d+").unwrap()),
608 "REPL"
609 ),
610 Some("aaaREPLbbb".to_string())
611 );
612 assert_eq!(
613 replace::replacement_if_match(
614 "abc123def456",
615 &SearchType::Pattern(Regex::new(r"\d{3}").unwrap()),
616 "REPL"
617 ),
618 Some("abcREPLdefREPL".to_string())
619 );
620 assert_eq!(
621 replace::replacement_if_match(
622 "aaa123456bbb",
623 &SearchType::PatternAdvanced(FancyRegex::new(r"\d+").unwrap()),
624 "REPL"
625 ),
626 Some("aaaREPLbbb".to_string())
627 );
628 assert_eq!(
629 replace::replacement_if_match(
630 "abc123def456",
631 &SearchType::PatternAdvanced(FancyRegex::new(r"\d{3}").unwrap()),
632 "REPL"
633 ),
634 Some("abcREPLdefREPL".to_string())
635 );
636 }
637
638 #[test]
639 fn test_special_characters() {
640 assert_eq!(
641 replace::replacement_if_match(
642 "foo.bar*baz",
643 &SearchType::Fixed(".bar*".to_string()),
644 "REPL"
645 ),
646 Some("fooREPLbaz".to_string())
647 );
648 assert_eq!(
649 replace::replacement_if_match(
650 "foo.bar*baz",
651 &SearchType::Pattern(Regex::new(r"\.bar\*").unwrap()),
652 "REPL"
653 ),
654 Some("fooREPLbaz".to_string())
655 );
656 assert_eq!(
657 replace::replacement_if_match(
658 "foo.bar*baz",
659 &SearchType::PatternAdvanced(FancyRegex::new(r"\.bar\*").unwrap()),
660 "REPL"
661 ),
662 Some("fooREPLbaz".to_string())
663 );
664 }
665
666 #[test]
667 fn test_unicode() {
668 assert_eq!(
669 replace::replacement_if_match(
670 "Hello 世界!",
671 &SearchType::Fixed("世界".to_string()),
672 "REPL"
673 ),
674 Some("Hello REPL!".to_string())
675 );
676 assert_eq!(
677 replace::replacement_if_match(
678 "Hello 世界!",
679 &SearchType::Pattern(Regex::new(r"世界").unwrap()),
680 "REPL"
681 ),
682 Some("Hello REPL!".to_string())
683 );
684 assert_eq!(
685 replace::replacement_if_match(
686 "Hello 世界!",
687 &SearchType::PatternAdvanced(FancyRegex::new(r"世界").unwrap()),
688 "REPL"
689 ),
690 Some("Hello REPL!".to_string())
691 );
692 }
693
694 #[test]
695 fn test_case_insensitive() {
696 assert_eq!(
697 replace::replacement_if_match(
698 "HELLO world",
699 &SearchType::Pattern(Regex::new(r"(?i)hello").unwrap()),
700 "REPL"
701 ),
702 Some("REPL world".to_string())
703 );
704 assert_eq!(
705 replace::replacement_if_match(
706 "HELLO world",
707 &SearchType::PatternAdvanced(FancyRegex::new(r"(?i)hello").unwrap()),
708 "REPL"
709 ),
710 Some("REPL world".to_string())
711 );
712 }
713 }
714
715 mod search_result_tests {
716 use super::*;
717
718 #[test]
719 fn test_display_error_with_error_result() {
720 let result = test_helpers::create_test_search_result_with_replacement(
721 "/path/to/file.txt",
722 42,
723 Some(ReplaceResult::Error("Test error message".to_string())),
724 );
725
726 let (path_display, error) = result.display_error();
727
728 assert_eq!(path_display, "/path/to/file.txt:42");
729 assert_eq!(error, "Test error message");
730 }
731
732 #[test]
733 fn test_display_error_with_unicode_path() {
734 let result = test_helpers::create_test_search_result_with_replacement(
735 "/path/to/файл.txt",
736 123,
737 Some(ReplaceResult::Error("Unicode test".to_string())),
738 );
739
740 let (path_display, error) = result.display_error();
741
742 assert_eq!(path_display, "/path/to/файл.txt:123");
743 assert_eq!(error, "Unicode test");
744 }
745
746 #[test]
747 fn test_display_error_with_complex_error_message() {
748 let complex_error = "Failed to write: Permission denied (os error 13)";
749 let result = test_helpers::create_test_search_result_with_replacement(
750 "/readonly/file.txt",
751 1,
752 Some(ReplaceResult::Error(complex_error.to_string())),
753 );
754
755 let (path_display, error) = result.display_error();
756
757 assert_eq!(path_display, "/readonly/file.txt:1");
758 assert_eq!(error, complex_error);
759 }
760
761 #[test]
762 #[should_panic(expected = "Found error result with no error message")]
763 fn test_display_error_panics_with_none_result() {
764 let result = test_helpers::create_test_search_result_with_replacement(
765 "/path/to/file.txt",
766 1,
767 None,
768 );
769 result.display_error();
770 }
771
772 #[test]
773 #[should_panic(expected = "Found successful result in errors")]
774 fn test_display_error_panics_with_success_result() {
775 let result = test_helpers::create_test_search_result_with_replacement(
776 "/path/to/file.txt",
777 1,
778 Some(ReplaceResult::Success),
779 );
780 result.display_error();
781 }
782 }
783
784 mod search_type_tests {
785 use super::*;
786
787 #[test]
788 fn test_search_type_emptiness() {
789 let test_cases = [
790 (test_helpers::create_fixed_search(""), true),
791 (test_helpers::create_fixed_search("hello"), false),
792 (test_helpers::create_fixed_search(" "), false), (test_helpers::create_pattern_search(""), true),
794 (test_helpers::create_pattern_search("test"), false),
795 (test_helpers::create_pattern_search(r"\s+"), false),
796 (test_helpers::create_advanced_pattern_search(""), true),
797 (test_helpers::create_advanced_pattern_search("test"), false),
798 ];
799
800 for (search_type, expected_empty) in test_cases {
801 assert_eq!(
802 search_type.is_empty(),
803 expected_empty,
804 "Emptiness test failed for: {search_type:?}"
805 );
806 }
807 }
808 }
809
810 mod file_searcher_tests {
811 use super::*;
812
813 #[test]
814 fn test_is_likely_binary_extensions() {
815 const BINARY_EXTENSIONS: &[&str] = &[
816 "image.png",
817 "document.pdf",
818 "archive.zip",
819 "program.exe",
820 "library.dll",
821 "photo.jpg",
822 "icon.ico",
823 "vector.svg",
824 "compressed.gz",
825 "backup.7z",
826 "java.class",
827 "application.jar",
828 ];
829
830 const TEXT_EXTENSIONS: &[&str] = &[
831 "code.rs",
832 "script.py",
833 "document.txt",
834 "config.json",
835 "readme.md",
836 "style.css",
837 "page.html",
838 "source.c",
839 "header.h",
840 "makefile",
841 "no_extension",
842 ];
843
844 const MIXED_CASE_BINARY: &[&str] =
845 &["IMAGE.PNG", "Document.PDF", "ARCHIVE.ZIP", "Photo.JPG"];
846
847 let test_cases = [
848 (BINARY_EXTENSIONS, true),
849 (TEXT_EXTENSIONS, false),
850 (MIXED_CASE_BINARY, true),
851 ];
852
853 for (files, expected_binary) in test_cases {
854 for file in files {
855 assert_eq!(
856 is_likely_binary(Path::new(file)),
857 expected_binary,
858 "Binary detection failed for {file}"
859 );
860 }
861 }
862 }
863
864 #[test]
865 fn test_is_likely_binary_no_extension() {
866 assert!(!is_likely_binary(Path::new("filename")));
867 assert!(!is_likely_binary(Path::new("/path/to/file")));
868 }
869
870 #[test]
871 fn test_is_likely_binary_empty_extension() {
872 assert!(!is_likely_binary(Path::new("file.")));
873 }
874
875 #[test]
876 fn test_is_likely_binary_complex_paths() {
877 assert!(is_likely_binary(Path::new("/complex/path/to/image.png")));
878 assert!(!is_likely_binary(Path::new("/complex/path/to/source.rs")));
879 }
880
881 #[test]
882 fn test_is_likely_binary_hidden_files() {
883 assert!(is_likely_binary(Path::new(".hidden.png")));
884 assert!(!is_likely_binary(Path::new(".hidden.txt")));
885 }
886 }
887}