1use std::fs::File;
2use std::io::{BufReader, Read, Seek, SeekFrom};
3use std::num::NonZero;
4use std::path::{Path, PathBuf};
5use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
6use std::thread::{self};
7
8use content_inspector::{ContentType, inspect};
9use fancy_regex::Regex as FancyRegex;
10use ignore::overrides::Override;
11use ignore::{WalkBuilder, WalkState};
12use regex::Regex;
13
14use crate::{
15 line_reader::{BufReadExt, LineEnding},
16 replace::{self, ReplaceResult},
17};
18
19#[derive(Clone, Debug, PartialEq, Eq)]
20pub struct SearchResult {
21 pub path: Option<PathBuf>,
22 pub line_number: usize,
24 pub line: String,
25 pub line_ending: LineEnding,
26 pub included: bool,
27}
28
29#[derive(Clone, Debug, PartialEq, Eq)]
30pub struct SearchResultWithReplacement {
31 pub search_result: SearchResult,
32 pub replacement: String,
33 pub replace_result: Option<ReplaceResult>,
34}
35
36impl SearchResultWithReplacement {
37 pub fn display_error(&self) -> (String, &str) {
38 let error = match &self.replace_result {
39 Some(ReplaceResult::Error(error)) => error,
40 None => panic!("Found error result with no error message"),
41 Some(ReplaceResult::Success) => {
42 panic!("Found successful result in errors: {self:?}")
43 }
44 };
45
46 let path_display = format!(
47 "{}:{}",
48 self.search_result
49 .path
50 .clone()
51 .unwrap_or_default()
52 .display(),
53 self.search_result.line_number
54 );
55
56 (path_display, error)
57 }
58}
59
60#[derive(Clone, Debug)]
61pub enum SearchType {
62 Pattern(Regex),
63 PatternAdvanced(FancyRegex),
64 Fixed(String),
65}
66
67impl SearchType {
68 pub fn is_empty(&self) -> bool {
69 let str = match &self {
70 SearchType::Pattern(r) => &r.to_string(),
71 SearchType::PatternAdvanced(r) => &r.to_string(),
72 SearchType::Fixed(s) => s,
73 };
74 str.is_empty()
75 }
76}
77
78type FileVisitor = Box<dyn FnMut(Vec<SearchResult>) -> WalkState + Send>;
80
81impl FileSearcher {
82 pub fn search(&self) -> &SearchType {
83 &self.search_config.search
84 }
85
86 pub fn replace(&self) -> &String {
87 &self.search_config.replace
88 }
89}
90
91#[derive(Clone, Debug)]
93pub struct RegexOptions {
94 pub whole_word: bool,
96 pub match_case: bool,
98}
99
100#[derive(Clone, Debug)]
101pub struct ParsedSearchConfig {
102 pub search: SearchType,
104 pub replace: String,
106}
107
108#[derive(Clone, Debug)]
109pub struct ParsedDirConfig {
110 pub overrides: Override,
112 pub root_dir: PathBuf,
114 pub include_hidden: bool,
116}
117
118#[derive(Clone, Debug)]
119pub struct FileSearcher {
120 search_config: ParsedSearchConfig,
121 dir_config: ParsedDirConfig,
122}
123
124impl FileSearcher {
125 pub fn new(search_config: ParsedSearchConfig, dir_config: ParsedDirConfig) -> Self {
126 Self {
127 search_config,
128 dir_config,
129 }
130 }
131
132 fn build_walker(&self) -> ignore::WalkParallel {
133 let num_threads = thread::available_parallelism()
134 .map(NonZero::get)
135 .unwrap_or(4)
136 .min(12);
137
138 WalkBuilder::new(&self.dir_config.root_dir)
139 .hidden(!self.dir_config.include_hidden)
140 .overrides(self.dir_config.overrides.clone())
141 .threads(num_threads)
142 .build_parallel()
143 }
144
145 pub fn walk_files<F>(&self, cancelled: Option<&AtomicBool>, mut file_handler: F)
198 where
199 F: FnMut() -> FileVisitor + Send,
200 {
201 if let Some(cancelled) = cancelled {
202 cancelled.store(false, Ordering::Relaxed);
203 }
204
205 let walker = self.build_walker();
206 walker.run(|| {
207 let mut on_file_found = file_handler();
208 Box::new(move |result| {
209 if let Some(cancelled) = cancelled {
210 if cancelled.load(Ordering::Relaxed) {
211 return WalkState::Quit;
212 }
213 }
214
215 let Ok(entry) = result else {
216 return WalkState::Continue;
217 };
218
219 if is_searchable(&entry) {
220 let results = match search_file(entry.path(), &self.search_config.search) {
221 Ok(r) => r,
222 Err(e) => {
223 log::warn!(
224 "Skipping {} due to error when searching: {e}",
225 entry.path().display()
226 );
227 return WalkState::Continue;
228 }
229 };
230
231 if !results.is_empty() {
232 return on_file_found(results);
233 }
234 }
235 WalkState::Continue
236 })
237 });
238 }
239
240 pub fn walk_files_and_replace(&self, cancelled: Option<&AtomicBool>) -> usize {
255 if let Some(cancelled) = cancelled {
256 cancelled.store(false, Ordering::Relaxed);
257 }
258
259 let num_files_replaced_in = std::sync::Arc::new(AtomicUsize::new(0));
260
261 let walker = self.build_walker();
262 walker.run(|| {
263 let counter = num_files_replaced_in.clone();
264
265 Box::new(move |result| {
266 if let Some(cancelled) = cancelled {
267 if cancelled.load(Ordering::Relaxed) {
268 return WalkState::Quit;
269 }
270 }
271
272 let Ok(entry) = result else {
273 return WalkState::Continue;
274 };
275
276 if is_searchable(&entry) {
277 match replace::replace_all_in_file(entry.path(), self.search(), self.replace())
278 {
279 Ok(replaced_in_file) => {
280 if replaced_in_file {
281 counter.fetch_add(1, Ordering::Relaxed);
282 }
283 }
284 Err(e) => {
285 log::error!(
286 "Found error when performing replacement in {path_display}: {e}",
287 path_display = entry.path().display()
288 );
289 }
290 }
291 }
292 WalkState::Continue
293 })
294 });
295
296 num_files_replaced_in.load(Ordering::Relaxed)
297 }
298}
299
300const BINARY_EXTENSIONS: &[&str] = &[
301 "png", "gif", "jpg", "jpeg", "ico", "svg", "pdf", "exe", "dll", "so", "bin", "class", "jar",
302 "zip", "gz", "bz2", "xz", "7z", "tar",
303];
304
305fn is_likely_binary(path: &Path) -> bool {
306 path.extension()
307 .and_then(|ext| ext.to_str())
308 .is_some_and(|ext_str| {
309 BINARY_EXTENSIONS
310 .iter()
311 .any(|&bin_ext| ext_str.eq_ignore_ascii_case(bin_ext))
312 })
313}
314
315fn is_searchable(entry: &ignore::DirEntry) -> bool {
316 entry.file_type().is_some_and(|ft| ft.is_file()) && !is_likely_binary(entry.path())
317}
318
319pub fn contains_search(line: &str, search: &SearchType) -> bool {
320 match search {
321 SearchType::Fixed(fixed_str) => line.contains(fixed_str),
322 SearchType::Pattern(pattern) => pattern.is_match(line),
323 SearchType::PatternAdvanced(pattern) => pattern.is_match(line).is_ok_and(|r| r),
324 }
325}
326
327pub fn search_file(path: &Path, search: &SearchType) -> anyhow::Result<Vec<SearchResult>> {
328 if search.is_empty() {
329 return Ok(vec![]);
330 }
331 let mut file = File::open(path)?;
332
333 let mut probe = [0u8; 8192];
335 let read = file.read(&mut probe).unwrap_or(0);
336 if matches!(inspect(&probe[..read]), ContentType::BINARY) {
337 return Ok(Vec::new());
338 }
339 file.seek(SeekFrom::Start(0))?;
340
341 let reader = BufReader::with_capacity(16384, file);
342 let mut results = Vec::new();
343
344 let mut read_errors = 0;
345
346 for (mut line_number, line_result) in reader.lines_with_endings().enumerate() {
347 line_number += 1; let (line_bytes, line_ending) = match line_result {
350 Ok(l) => l,
351 Err(err) => {
352 read_errors += 1;
353 log::warn!(
354 "Error retrieving line {line_number} of {}: {err}",
355 path.display()
356 );
357 if read_errors >= 10 {
358 anyhow::bail!(
359 "Aborting search of {path:?}: too many read errors ({read_errors}). Most recent error: {err}",
360 );
361 }
362 continue;
363 }
364 };
365
366 if let Ok(line) = String::from_utf8(line_bytes) {
367 if contains_search(&line, search) {
368 let result = SearchResult {
369 path: Some(path.to_path_buf()),
370 line_number,
371 line,
372 line_ending,
373 included: true,
374 };
375 results.push(result);
376 }
377 }
378 }
379
380 Ok(results)
381}
382
383#[cfg(test)]
384mod tests {
385 use super::*;
386
387 mod test_helpers {
388 use super::*;
389
390 pub fn create_test_search_result_with_replacement(
391 path: &str,
392 line_number: usize,
393 replace_result: Option<ReplaceResult>,
394 ) -> SearchResultWithReplacement {
395 SearchResultWithReplacement {
396 search_result: SearchResult {
397 path: Some(PathBuf::from(path)),
398 line_number,
399 line: "test line".to_string(),
400 line_ending: LineEnding::Lf,
401 included: true,
402 },
403 replacement: "replacement".to_string(),
404 replace_result,
405 }
406 }
407
408 pub fn create_fixed_search(term: &str) -> SearchType {
409 SearchType::Fixed(term.to_string())
410 }
411
412 pub fn create_pattern_search(pattern: &str) -> SearchType {
413 SearchType::Pattern(Regex::new(pattern).unwrap())
414 }
415
416 pub fn create_advanced_pattern_search(pattern: &str) -> SearchType {
417 SearchType::PatternAdvanced(FancyRegex::new(pattern).unwrap())
418 }
419 }
420
421 mod unicode_handling {
422 use super::*;
423
424 #[test]
425 fn test_complex_unicode_replacement() {
426 let text = "ASCII text with 世界 (CJK), Здравствуйте (Cyrillic), 안녕하세요 (Hangul), αβγδ (Greek), עִבְרִית (Hebrew)";
427 let search = SearchType::Fixed("世界".to_string());
428
429 let result = replace::replacement_if_match(text, &search, "World");
430
431 assert_eq!(
432 result,
433 Some("ASCII text with World (CJK), Здравствуйте (Cyrillic), 안녕하세요 (Hangul), αβγδ (Greek), עִבְרִית (Hebrew)".to_string())
434 );
435 }
436
437 #[test]
438 fn test_unicode_normalization() {
439 let text = "café";
440 let search = SearchType::Fixed("é".to_string());
441 assert_eq!(
442 replace::replacement_if_match(text, &search, "e"),
443 Some("cafe".to_string())
444 );
445 }
446
447 #[test]
448 fn test_unicode_regex_classes() {
449 let text = "Latin A, Cyrillic Б, Greek Γ, Hebrew א";
450
451 let search = SearchType::Pattern(Regex::new(r"\p{Cyrillic}").unwrap());
452 assert_eq!(
453 replace::replacement_if_match(text, &search, "X"),
454 Some("Latin A, Cyrillic X, Greek Γ, Hebrew א".to_string())
455 );
456
457 let search = SearchType::Pattern(Regex::new(r"\p{Greek}").unwrap());
458 assert_eq!(
459 replace::replacement_if_match(text, &search, "X"),
460 Some("Latin A, Cyrillic Б, Greek X, Hebrew א".to_string())
461 );
462 }
463
464 #[test]
465 fn test_unicode_capture_groups() {
466 let text = "Name: 李明 (ID: A12345)";
467
468 let search =
469 SearchType::Pattern(Regex::new(r"Name: (\p{Han}+) \(ID: ([A-Z0-9]+)\)").unwrap());
470 assert_eq!(
471 replace::replacement_if_match(text, &search, "ID $2 belongs to $1"),
472 Some("ID A12345 belongs to 李明".to_string())
473 );
474 }
475 }
476
477 mod replace_any {
478 use super::*;
479
480 #[test]
481 fn test_simple_match_subword() {
482 assert_eq!(
483 replace::replacement_if_match(
484 "foobarbaz",
485 &SearchType::Fixed("bar".to_string()),
486 "REPL"
487 ),
488 Some("fooREPLbaz".to_string())
489 );
490 assert_eq!(
491 replace::replacement_if_match(
492 "foobarbaz",
493 &SearchType::Pattern(Regex::new(r"bar").unwrap()),
494 "REPL"
495 ),
496 Some("fooREPLbaz".to_string())
497 );
498 assert_eq!(
499 replace::replacement_if_match(
500 "foobarbaz",
501 &SearchType::PatternAdvanced(FancyRegex::new(r"bar").unwrap()),
502 "REPL"
503 ),
504 Some("fooREPLbaz".to_string())
505 );
506 }
507
508 #[test]
509 fn test_no_match() {
510 assert_eq!(
511 replace::replacement_if_match(
512 "foobarbaz",
513 &SearchType::Fixed("xyz".to_string()),
514 "REPL"
515 ),
516 None
517 );
518 assert_eq!(
519 replace::replacement_if_match(
520 "foobarbaz",
521 &SearchType::Pattern(Regex::new(r"xyz").unwrap()),
522 "REPL"
523 ),
524 None
525 );
526 assert_eq!(
527 replace::replacement_if_match(
528 "foobarbaz",
529 &SearchType::PatternAdvanced(FancyRegex::new(r"xyz").unwrap()),
530 "REPL"
531 ),
532 None
533 );
534 }
535
536 #[test]
537 fn test_word_boundaries() {
538 assert_eq!(
539 replace::replacement_if_match(
540 "foo bar baz",
541 &SearchType::Pattern(Regex::new(r"\bbar\b").unwrap()),
542 "REPL"
543 ),
544 Some("foo REPL baz".to_string())
545 );
546 assert_eq!(
547 replace::replacement_if_match(
548 "embargo",
549 &SearchType::Pattern(Regex::new(r"\bbar\b").unwrap()),
550 "REPL"
551 ),
552 None
553 );
554 assert_eq!(
555 replace::replacement_if_match(
556 "foo bar baz",
557 &SearchType::PatternAdvanced(FancyRegex::new(r"\bbar\b").unwrap()),
558 "REPL"
559 ),
560 Some("foo REPL baz".to_string())
561 );
562 assert_eq!(
563 replace::replacement_if_match(
564 "embargo",
565 &SearchType::PatternAdvanced(FancyRegex::new(r"\bbar\b").unwrap()),
566 "REPL"
567 ),
568 None
569 );
570 }
571
572 #[test]
573 fn test_capture_groups() {
574 assert_eq!(
575 replace::replacement_if_match(
576 "John Doe",
577 &SearchType::Pattern(Regex::new(r"(\w+)\s+(\w+)").unwrap()),
578 "$2, $1"
579 ),
580 Some("Doe, John".to_string())
581 );
582 assert_eq!(
583 replace::replacement_if_match(
584 "John Doe",
585 &SearchType::PatternAdvanced(FancyRegex::new(r"(\w+)\s+(\w+)").unwrap()),
586 "$2, $1"
587 ),
588 Some("Doe, John".to_string())
589 );
590 }
591
592 #[test]
593 fn test_lookaround() {
594 assert_eq!(
595 replace::replacement_if_match(
596 "123abc456",
597 &SearchType::PatternAdvanced(
598 FancyRegex::new(r"(?<=\d{3})abc(?=\d{3})").unwrap()
599 ),
600 "REPL"
601 ),
602 Some("123REPL456".to_string())
603 );
604 }
605
606 #[test]
607 fn test_quantifiers() {
608 assert_eq!(
609 replace::replacement_if_match(
610 "aaa123456bbb",
611 &SearchType::Pattern(Regex::new(r"\d+").unwrap()),
612 "REPL"
613 ),
614 Some("aaaREPLbbb".to_string())
615 );
616 assert_eq!(
617 replace::replacement_if_match(
618 "abc123def456",
619 &SearchType::Pattern(Regex::new(r"\d{3}").unwrap()),
620 "REPL"
621 ),
622 Some("abcREPLdefREPL".to_string())
623 );
624 assert_eq!(
625 replace::replacement_if_match(
626 "aaa123456bbb",
627 &SearchType::PatternAdvanced(FancyRegex::new(r"\d+").unwrap()),
628 "REPL"
629 ),
630 Some("aaaREPLbbb".to_string())
631 );
632 assert_eq!(
633 replace::replacement_if_match(
634 "abc123def456",
635 &SearchType::PatternAdvanced(FancyRegex::new(r"\d{3}").unwrap()),
636 "REPL"
637 ),
638 Some("abcREPLdefREPL".to_string())
639 );
640 }
641
642 #[test]
643 fn test_special_characters() {
644 assert_eq!(
645 replace::replacement_if_match(
646 "foo.bar*baz",
647 &SearchType::Fixed(".bar*".to_string()),
648 "REPL"
649 ),
650 Some("fooREPLbaz".to_string())
651 );
652 assert_eq!(
653 replace::replacement_if_match(
654 "foo.bar*baz",
655 &SearchType::Pattern(Regex::new(r"\.bar\*").unwrap()),
656 "REPL"
657 ),
658 Some("fooREPLbaz".to_string())
659 );
660 assert_eq!(
661 replace::replacement_if_match(
662 "foo.bar*baz",
663 &SearchType::PatternAdvanced(FancyRegex::new(r"\.bar\*").unwrap()),
664 "REPL"
665 ),
666 Some("fooREPLbaz".to_string())
667 );
668 }
669
670 #[test]
671 fn test_unicode() {
672 assert_eq!(
673 replace::replacement_if_match(
674 "Hello 世界!",
675 &SearchType::Fixed("世界".to_string()),
676 "REPL"
677 ),
678 Some("Hello REPL!".to_string())
679 );
680 assert_eq!(
681 replace::replacement_if_match(
682 "Hello 世界!",
683 &SearchType::Pattern(Regex::new(r"世界").unwrap()),
684 "REPL"
685 ),
686 Some("Hello REPL!".to_string())
687 );
688 assert_eq!(
689 replace::replacement_if_match(
690 "Hello 世界!",
691 &SearchType::PatternAdvanced(FancyRegex::new(r"世界").unwrap()),
692 "REPL"
693 ),
694 Some("Hello REPL!".to_string())
695 );
696 }
697
698 #[test]
699 fn test_case_insensitive() {
700 assert_eq!(
701 replace::replacement_if_match(
702 "HELLO world",
703 &SearchType::Pattern(Regex::new(r"(?i)hello").unwrap()),
704 "REPL"
705 ),
706 Some("REPL world".to_string())
707 );
708 assert_eq!(
709 replace::replacement_if_match(
710 "HELLO world",
711 &SearchType::PatternAdvanced(FancyRegex::new(r"(?i)hello").unwrap()),
712 "REPL"
713 ),
714 Some("REPL world".to_string())
715 );
716 }
717 }
718
719 mod search_result_tests {
720 use super::*;
721
722 #[test]
723 fn test_display_error_with_error_result() {
724 let result = test_helpers::create_test_search_result_with_replacement(
725 "/path/to/file.txt",
726 42,
727 Some(ReplaceResult::Error("Test error message".to_string())),
728 );
729
730 let (path_display, error) = result.display_error();
731
732 assert_eq!(path_display, "/path/to/file.txt:42");
733 assert_eq!(error, "Test error message");
734 }
735
736 #[test]
737 fn test_display_error_with_unicode_path() {
738 let result = test_helpers::create_test_search_result_with_replacement(
739 "/path/to/файл.txt",
740 123,
741 Some(ReplaceResult::Error("Unicode test".to_string())),
742 );
743
744 let (path_display, error) = result.display_error();
745
746 assert_eq!(path_display, "/path/to/файл.txt:123");
747 assert_eq!(error, "Unicode test");
748 }
749
750 #[test]
751 fn test_display_error_with_complex_error_message() {
752 let complex_error = "Failed to write: Permission denied (os error 13)";
753 let result = test_helpers::create_test_search_result_with_replacement(
754 "/readonly/file.txt",
755 1,
756 Some(ReplaceResult::Error(complex_error.to_string())),
757 );
758
759 let (path_display, error) = result.display_error();
760
761 assert_eq!(path_display, "/readonly/file.txt:1");
762 assert_eq!(error, complex_error);
763 }
764
765 #[test]
766 #[should_panic(expected = "Found error result with no error message")]
767 fn test_display_error_panics_with_none_result() {
768 let result = test_helpers::create_test_search_result_with_replacement(
769 "/path/to/file.txt",
770 1,
771 None,
772 );
773 result.display_error();
774 }
775
776 #[test]
777 #[should_panic(expected = "Found successful result in errors")]
778 fn test_display_error_panics_with_success_result() {
779 let result = test_helpers::create_test_search_result_with_replacement(
780 "/path/to/file.txt",
781 1,
782 Some(ReplaceResult::Success),
783 );
784 result.display_error();
785 }
786 }
787
788 mod search_type_tests {
789 use super::*;
790
791 #[test]
792 fn test_search_type_emptiness() {
793 let test_cases = [
794 (test_helpers::create_fixed_search(""), true),
795 (test_helpers::create_fixed_search("hello"), false),
796 (test_helpers::create_fixed_search(" "), false), (test_helpers::create_pattern_search(""), true),
798 (test_helpers::create_pattern_search("test"), false),
799 (test_helpers::create_pattern_search(r"\s+"), false),
800 (test_helpers::create_advanced_pattern_search(""), true),
801 (test_helpers::create_advanced_pattern_search("test"), false),
802 ];
803
804 for (search_type, expected_empty) in test_cases {
805 assert_eq!(
806 search_type.is_empty(),
807 expected_empty,
808 "Emptiness test failed for: {search_type:?}"
809 );
810 }
811 }
812 }
813
814 mod file_searcher_tests {
815 use super::*;
816
817 #[test]
818 fn test_is_likely_binary_extensions() {
819 const BINARY_EXTENSIONS: &[&str] = &[
820 "image.png",
821 "document.pdf",
822 "archive.zip",
823 "program.exe",
824 "library.dll",
825 "photo.jpg",
826 "icon.ico",
827 "vector.svg",
828 "compressed.gz",
829 "backup.7z",
830 "java.class",
831 "application.jar",
832 ];
833
834 const TEXT_EXTENSIONS: &[&str] = &[
835 "code.rs",
836 "script.py",
837 "document.txt",
838 "config.json",
839 "readme.md",
840 "style.css",
841 "page.html",
842 "source.c",
843 "header.h",
844 "makefile",
845 "no_extension",
846 ];
847
848 const MIXED_CASE_BINARY: &[&str] =
849 &["IMAGE.PNG", "Document.PDF", "ARCHIVE.ZIP", "Photo.JPG"];
850
851 let test_cases = [
852 (BINARY_EXTENSIONS, true),
853 (TEXT_EXTENSIONS, false),
854 (MIXED_CASE_BINARY, true),
855 ];
856
857 for (files, expected_binary) in test_cases {
858 for file in files {
859 assert_eq!(
860 is_likely_binary(Path::new(file)),
861 expected_binary,
862 "Binary detection failed for {file}"
863 );
864 }
865 }
866 }
867
868 #[test]
869 fn test_is_likely_binary_no_extension() {
870 assert!(!is_likely_binary(Path::new("filename")));
871 assert!(!is_likely_binary(Path::new("/path/to/file")));
872 }
873
874 #[test]
875 fn test_is_likely_binary_empty_extension() {
876 assert!(!is_likely_binary(Path::new("file.")));
877 }
878
879 #[test]
880 fn test_is_likely_binary_complex_paths() {
881 assert!(is_likely_binary(Path::new("/complex/path/to/image.png")));
882 assert!(!is_likely_binary(Path::new("/complex/path/to/source.rs")));
883 }
884
885 #[test]
886 fn test_is_likely_binary_hidden_files() {
887 assert!(is_likely_binary(Path::new(".hidden.png")));
888 assert!(!is_likely_binary(Path::new(".hidden.txt")));
889 }
890 }
891}