Skip to main content

scooter_core/
search.rs

1use std::fs::File;
2use std::io::{BufReader, Read, Seek, SeekFrom};
3use std::num::NonZero;
4use std::path::{Path, PathBuf};
5use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
6use std::thread::{self};
7
8use anyhow::Context;
9use content_inspector::{ContentType, inspect};
10use fancy_regex::Regex as FancyRegex;
11use ignore::overrides::Override;
12use ignore::{WalkBuilder, WalkState};
13use regex::Regex;
14
15use crate::{
16    line_reader::{BufReadExt, LineEnding},
17    replace::{self, ReplaceResult},
18};
19
20#[derive(Clone, Debug, PartialEq, Eq, Hash)]
21pub struct Line {
22    pub content: String,
23    pub line_ending: LineEnding,
24}
25
26#[derive(Clone, Debug)]
27pub enum Searcher {
28    FileSearcher(FileSearcher),
29    TextSearcher { search_config: ParsedSearchConfig },
30}
31
32impl Searcher {
33    pub fn search(&self) -> &SearchType {
34        match self {
35            Self::FileSearcher(file_searcher) => file_searcher.search(),
36            Self::TextSearcher { search_config } => &search_config.search,
37        }
38    }
39
40    pub fn replace(&self) -> &str {
41        match self {
42            Self::FileSearcher(file_searcher) => file_searcher.replace(),
43            Self::TextSearcher { search_config } => &search_config.replace,
44        }
45    }
46}
47
48#[derive(Clone, Debug, Hash, PartialEq, Eq)]
49pub struct LinePos {
50    pub line: usize, // 1-indexed
51    pub byte_pos: usize,
52}
53
54#[derive(Clone, Debug, Hash, PartialEq, Eq)]
55pub enum MatchContent {
56    /// Line-mode: Replace all occurrences of pattern on the line
57    /// Used for non-multiline search where we replace ALL matches on a single line
58    Line {
59        line_number: usize,
60        content: String,
61        line_ending: LineEnding,
62    },
63    /// Byte-mode: Replace only the specific byte range
64    /// Used for multiline search where we track individual matches precisely
65    ByteRange {
66        lines: Vec<(usize, Line)>, // Line numbers (1-indexed) and line contents
67        match_start_in_first_line: usize, // Byte offset where match starts in first line
68        match_end_in_last_line: usize, // Byte offset where match ends in last line (exclusive)
69        byte_start: usize,         // Absolute byte position in file
70        byte_end: usize,           // Absolute byte position in file (exclusive)
71        content: String,           // The matched bytes
72    },
73}
74
75#[derive(Clone, Copy, Debug, PartialEq, Eq)]
76pub enum MatchMode {
77    Line,
78    ByteRange,
79}
80
81impl MatchContent {
82    /// Returns the matched text without line ending
83    pub fn matched_text(&self) -> &str {
84        match self {
85            MatchContent::Line { content, .. } | MatchContent::ByteRange { content, .. } => content,
86        }
87    }
88
89    pub fn mode(&self) -> MatchMode {
90        match self {
91            MatchContent::Line { .. } => MatchMode::Line,
92            MatchContent::ByteRange { .. } => MatchMode::ByteRange,
93        }
94    }
95}
96
97/// Asserts all results use the same `MatchContent` variant and returns the mode.
98/// Returns `None` if results is empty.
99pub fn match_mode_of_results(results: &[SearchResultWithReplacement]) -> Option<MatchMode> {
100    let first = results.first()?;
101    let mode = first.search_result.content.mode();
102    assert!(
103        results
104            .iter()
105            .all(|r| r.search_result.content.mode() == mode),
106        "Inconsistent MatchContent variants detected in results"
107    );
108    Some(mode)
109}
110
111/// Parameters for constructing a `SearchResult` with byte-range content.
112#[derive(Clone, Debug)]
113pub struct ByteRangeParams {
114    pub path: Option<PathBuf>,
115    pub lines: Vec<(usize, Line)>,
116    pub match_start_in_first_line: usize,
117    pub match_end_in_last_line: usize,
118    pub byte_start: usize,
119    pub byte_end: usize,
120    pub content: String,
121    pub included: bool,
122}
123
124#[derive(Clone, Debug, PartialEq, Eq)]
125pub struct SearchResult {
126    pub path: Option<PathBuf>,
127    pub content: MatchContent,
128    /// Whether to replace the given match
129    pub included: bool,
130}
131
132impl SearchResult {
133    /// Creates a `SearchResult` with line-mode content (single line)
134    pub fn new_line(
135        path: Option<PathBuf>,
136        line_number: usize,
137        content: String,
138        line_ending: LineEnding,
139        included: bool,
140    ) -> Self {
141        Self {
142            path,
143            content: MatchContent::Line {
144                line_number,
145                content,
146                line_ending,
147            },
148            included,
149        }
150    }
151
152    /// Creates a `SearchResult` with byte-range content
153    pub fn new_byte_range(params: ByteRangeParams) -> Self {
154        let ByteRangeParams {
155            path,
156            lines,
157            match_start_in_first_line,
158            match_end_in_last_line,
159            byte_start,
160            byte_end,
161            content,
162            included,
163        } = params;
164        assert!(!lines.is_empty(), "ByteRange must have at least one line");
165        assert!(
166            match_start_in_first_line <= lines[0].1.content.len(),
167            "match_start_in_first_line ({}) exceeds first line length ({})",
168            match_start_in_first_line,
169            lines[0].1.content.len()
170        );
171        assert!(
172            match_end_in_last_line <= lines.last().unwrap().1.content.len(),
173            "match_end_in_last_line ({}) exceeds last line length ({})",
174            match_end_in_last_line,
175            lines.last().unwrap().1.content.len()
176        );
177        assert!(
178            byte_start <= byte_end,
179            "byte_start ({byte_start}) must be <= byte_end ({byte_end})",
180        );
181
182        for i in 1..lines.len() {
183            assert!(
184                lines[i].0 == lines[i - 1].0 + 1,
185                "Line numbers must be sequential: {} followed by {}",
186                lines[i - 1].0,
187                lines[i].0
188            );
189        }
190
191        Self {
192            path,
193            content: MatchContent::ByteRange {
194                lines,
195                match_start_in_first_line,
196                match_end_in_last_line,
197                byte_start,
198                byte_end,
199                content,
200            },
201            included,
202        }
203    }
204
205    /// Returns the full content string for this match (including line ending for Lines mode)
206    pub fn content_string(&self) -> String {
207        match &self.content {
208            MatchContent::Line {
209                content,
210                line_ending,
211                ..
212            } => format!("{}{}", content, line_ending.as_str()),
213            MatchContent::ByteRange { content, .. } => content.clone(),
214        }
215    }
216
217    /// Returns start line number
218    pub fn start_line_number(&self) -> usize {
219        match &self.content {
220            MatchContent::Line { line_number, .. } => *line_number,
221            MatchContent::ByteRange { lines, .. } => {
222                lines
223                    .first()
224                    .expect("ByteRange must have at least one line")
225                    .0
226            }
227        }
228    }
229
230    /// Returns end line number
231    pub fn end_line_number(&self) -> usize {
232        match &self.content {
233            MatchContent::Line { line_number, .. } => *line_number,
234            MatchContent::ByteRange { lines, .. } => {
235                lines
236                    .last()
237                    .expect("ByteRange must have at least one line")
238                    .0
239            }
240        }
241    }
242}
243
244#[derive(Clone, Debug, PartialEq, Eq)]
245pub struct SearchResultWithReplacement {
246    pub search_result: SearchResult,
247    pub replacement: String,
248    pub replace_result: Option<ReplaceResult>,
249    pub preview_error: Option<String>,
250}
251
252impl SearchResultWithReplacement {
253    pub fn display_error(&self) -> (String, &str) {
254        let error = match &self.replace_result {
255            Some(ReplaceResult::Error(error)) => error,
256            None => panic!("Found error result with no error message"),
257            Some(ReplaceResult::Success) => {
258                panic!("Found successful result in errors: {self:?}")
259            }
260        };
261
262        let path_display = format!(
263            "{}:{}",
264            self.search_result
265                .path
266                .clone()
267                .unwrap_or_default()
268                .display(),
269            self.search_result.start_line_number()
270        );
271
272        (path_display, error)
273    }
274}
275
276#[derive(Clone, Debug)]
277pub enum SearchType {
278    Pattern(Regex),
279    PatternAdvanced(FancyRegex),
280    Fixed(String),
281}
282
283impl SearchType {
284    pub fn is_empty(&self) -> bool {
285        let str = match &self {
286            SearchType::Pattern(r) => &r.to_string(),
287            SearchType::PatternAdvanced(r) => &r.to_string(),
288            SearchType::Fixed(s) => s,
289        };
290        str.is_empty()
291    }
292
293    /// Returns true when full-haystack context is required to validate matches/replacements
294    /// (e.g. advanced regex lookarounds that depend on surrounding text).
295    pub fn needs_haystack_context(&self) -> bool {
296        matches!(self, SearchType::PatternAdvanced(_))
297    }
298}
299
300/// A function that processes search results for a file and determines whether to continue searching.
301type FileVisitor = Box<dyn FnMut(Vec<SearchResult>) -> WalkState + Send>;
302
303impl FileSearcher {
304    pub fn search(&self) -> &SearchType {
305        &self.search_config.search
306    }
307
308    pub fn replace(&self) -> &String {
309        &self.search_config.replace
310    }
311
312    pub fn multiline(&self) -> bool {
313        self.search_config.multiline
314    }
315}
316
317/// Options for regex pattern conversion
318#[derive(Clone, Debug)]
319pub struct RegexOptions {
320    /// Whether to match only whole words (bounded by non-word characters)
321    pub whole_word: bool,
322    /// Whether to perform case-sensitive matching
323    pub match_case: bool,
324}
325
326#[derive(Clone, Debug)]
327pub struct ParsedSearchConfig {
328    /// The pattern to search for (fixed string or regex). Should be produced by `validation::parse_search_text`
329    pub search: SearchType,
330    /// The text to replace matches with
331    pub replace: String,
332    /// Whether to search and replace across multiple lines
333    pub multiline: bool,
334}
335
336#[derive(Clone, Debug)]
337pub struct ParsedDirConfig {
338    /// Configuration for file inclusion/exclusion patterns
339    pub overrides: Override,
340    /// The root directory to start searching from
341    pub root_dir: PathBuf,
342    /// Whether to include hidden files/directories in the search
343    pub include_hidden: bool,
344}
345
346#[derive(Clone, Debug)]
347pub struct FileSearcher {
348    search_config: ParsedSearchConfig,
349    dir_config: ParsedDirConfig,
350}
351
352impl FileSearcher {
353    pub fn new(search_config: ParsedSearchConfig, dir_config: ParsedDirConfig) -> Self {
354        Self {
355            search_config,
356            dir_config,
357        }
358    }
359
360    fn build_walker(&self) -> ignore::WalkParallel {
361        let num_threads = thread::available_parallelism()
362            .map(NonZero::get)
363            .unwrap_or(4)
364            .min(12);
365
366        WalkBuilder::new(&self.dir_config.root_dir)
367            .hidden(!self.dir_config.include_hidden)
368            .overrides(self.dir_config.overrides.clone())
369            .threads(num_threads)
370            .build_parallel()
371    }
372
373    /// Walks through files in the configured directory and processes matches.
374    ///
375    /// This method traverses the filesystem starting from the `root_dir` specified in the `FileSearcher`,
376    /// respecting the configured overrides (include/exclude patterns) and hidden file settings.
377    /// It uses parallel processing when possible for better performance.
378    ///
379    /// # Parameters
380    ///
381    /// * `cancelled` - An optional atomic boolean that can be used to signal cancellation from another thread.
382    ///   If this is set to `true` during execution, the search will stop as soon as possible.
383    ///
384    /// * `file_handler` - A closure that returns a `FileVisitor`.
385    ///   The returned `FileVisitor` is a function that processes search results for each file with matches.
386    ///
387    /// # Example
388    ///
389    /// ```no_run
390    /// use std::{
391    ///     sync::{atomic::AtomicBool, mpsc},
392    ///     path::PathBuf,
393    /// };
394    /// use regex::Regex;
395    /// use ignore::{WalkState, overrides::Override};
396    /// use scooter_core::search::{FileSearcher, ParsedSearchConfig, ParsedDirConfig, SearchResult, SearchType};
397    ///
398    /// let search_config = ParsedSearchConfig {
399    ///     search: SearchType::Pattern(Regex::new("pattern").unwrap()),
400    ///     replace: "replacement".to_string(),
401    ///     multiline: false,
402    /// };
403    /// let dir_config = ParsedDirConfig {
404    ///     overrides: Override::empty(),
405    ///     root_dir: PathBuf::from("."),
406    ///     include_hidden: false,
407    /// };
408    /// let searcher = FileSearcher::new(search_config, dir_config);
409    /// let cancelled = AtomicBool::new(false);
410    ///
411    /// searcher.walk_files(Some(&cancelled), move || {
412    ///     Box::new(move |results| {
413    ///         if process(results).is_err() {
414    ///             WalkState::Quit
415    ///         } else {
416    ///             WalkState::Continue
417    ///         }
418    ///     })
419    /// });
420    ///
421    /// fn process(results: Vec<SearchResult>) -> anyhow::Result<()> {
422    ///     println!("{results:?}");
423    ///     Ok(())
424    /// }
425    /// ```
426    pub fn walk_files<F>(&self, cancelled: Option<&AtomicBool>, mut file_handler: F)
427    where
428        F: FnMut() -> FileVisitor + Send,
429    {
430        let walker = self.build_walker();
431        walker.run(|| {
432            let mut on_file_found = file_handler();
433            Box::new(move |result| {
434                if let Some(cancelled) = cancelled
435                    && cancelled.load(Ordering::Relaxed)
436                {
437                    return WalkState::Quit;
438                }
439
440                let Ok(entry) = result else {
441                    return WalkState::Continue;
442                };
443
444                if is_searchable(&entry) {
445                    let results = match search_file(
446                        entry.path(),
447                        &self.search_config.search,
448                        self.search_config.multiline,
449                    ) {
450                        Ok(r) => r,
451                        Err(e) => {
452                            log::warn!(
453                                "Skipping {} due to error when searching: {e}",
454                                entry.path().display()
455                            );
456                            return WalkState::Continue;
457                        }
458                    };
459
460                    if !results.is_empty() {
461                        return on_file_found(results);
462                    }
463                }
464                WalkState::Continue
465            })
466        });
467    }
468
469    /// Walks through files in the configured directory and replaces matches.
470    ///
471    /// This method traverses the filesystem starting from the `root_dir` specified in the `FileSearcher`,
472    /// respecting the configured overrides (include/exclude patterns) and hidden file settings.
473    /// It replaces all matches of the search pattern with the replacement text in each file.
474    ///
475    /// # Parameters
476    ///
477    /// * `cancelled` - An optional atomic boolean that can be used to signal cancellation from another thread.
478    ///   If this is set to `true` during execution, the search will stop as soon as possible.
479    ///
480    /// # Returns
481    ///
482    /// The number of files that had replacements performed in them.
483    pub fn walk_files_and_replace(&self, cancelled: Option<&AtomicBool>) -> usize {
484        let num_files_replaced_in = std::sync::Arc::new(AtomicUsize::new(0));
485
486        let walker = self.build_walker();
487        walker.run(|| {
488            let counter = num_files_replaced_in.clone();
489
490            Box::new(move |result| {
491                if let Some(cancelled) = cancelled
492                    && cancelled.load(Ordering::Relaxed)
493                {
494                    return WalkState::Quit;
495                }
496
497                let Ok(entry) = result else {
498                    return WalkState::Continue;
499                };
500
501                if is_searchable(&entry) {
502                    match replace::replace_all_in_file(
503                        entry.path(),
504                        self.search(),
505                        self.replace(),
506                        self.multiline(),
507                    ) {
508                        Ok(replaced_in_file) => {
509                            if replaced_in_file {
510                                counter.fetch_add(1, Ordering::Relaxed);
511                            }
512                        }
513                        Err(e) => {
514                            log::error!(
515                                "Found error when performing replacement in {path_display}: {e}",
516                                path_display = entry.path().display()
517                            );
518                        }
519                    }
520                }
521                WalkState::Continue
522            })
523        });
524
525        num_files_replaced_in.load(Ordering::Relaxed)
526    }
527}
528
529const BINARY_EXTENSIONS: &[&str] = &[
530    "png", "gif", "jpg", "jpeg", "ico", "svg", "pdf", "exe", "dll", "so", "bin", "class", "jar",
531    "zip", "gz", "bz2", "xz", "7z", "tar",
532];
533
534fn is_likely_binary(path: &Path) -> bool {
535    path.extension()
536        .and_then(|ext| ext.to_str())
537        .is_some_and(|ext_str| {
538            BINARY_EXTENSIONS
539                .iter()
540                .any(|&bin_ext| ext_str.eq_ignore_ascii_case(bin_ext))
541        })
542}
543
544fn is_searchable(entry: &ignore::DirEntry) -> bool {
545    entry.file_type().is_some_and(|ft| ft.is_file()) && !is_likely_binary(entry.path())
546}
547
548pub fn contains_search(haystack: &str, needle: &SearchType) -> bool {
549    match needle {
550        SearchType::Fixed(fixed_str) => haystack.contains(fixed_str),
551        SearchType::Pattern(pattern) => pattern.is_match(haystack),
552        SearchType::PatternAdvanced(pattern) => pattern.is_match(haystack).is_ok_and(|r| r),
553    }
554}
555
556pub fn search_file(
557    path: &Path,
558    search: &SearchType,
559    multiline: bool,
560) -> anyhow::Result<Vec<SearchResult>> {
561    if search.is_empty() {
562        return Ok(vec![]);
563    }
564    let mut file = File::open(path)?;
565
566    // Fast upfront binary sniff (8 KiB)
567    let mut probe = [0u8; 8192];
568    let read = file.read(&mut probe).unwrap_or(0);
569    if matches!(inspect(&probe[..read]), ContentType::BINARY) {
570        return Ok(Vec::new());
571    }
572    file.seek(SeekFrom::Start(0))?;
573
574    if multiline {
575        let content = std::fs::read_to_string(path).with_context(|| {
576            format!(
577                "Failed to read file as UTF-8 for multiline search: {}",
578                path.display()
579            )
580        })?;
581        return Ok(search_multiline(&content, search, Some(path)));
582    }
583
584    // Line-by-line search for non-multiline mode
585    let reader = BufReader::with_capacity(16384, file);
586    let mut results = Vec::new();
587
588    let mut read_errors = 0;
589
590    for (mut line_number, line_result) in reader.lines_with_endings().enumerate() {
591        line_number += 1; // Ensure line-number is 1-indexed
592
593        let (line_bytes, line_ending) = match line_result {
594            Ok(l) => l,
595            Err(err) => {
596                read_errors += 1;
597                log::warn!(
598                    "Error retrieving line {line_number} of {}: {err}",
599                    path.display()
600                );
601                #[allow(clippy::unnecessary_debug_formatting)]
602                if read_errors >= 10 {
603                    anyhow::bail!(
604                        "Aborting search of {path:?}: too many read errors ({read_errors}). Most recent error: {err}",
605                    );
606                }
607                continue;
608            }
609        };
610
611        if let Ok(line_content) = String::from_utf8(line_bytes)
612            && contains_search(&line_content, search)
613        {
614            let result = SearchResult::new_line(
615                Some(path.to_path_buf()),
616                line_number,
617                line_content,
618                line_ending,
619                true,
620            );
621            results.push(result);
622        }
623    }
624
625    Ok(results)
626}
627
628/// Search content for multiline patterns and return `SearchResults`
629pub(crate) fn search_multiline(
630    content: &str,
631    search: &SearchType,
632    path: Option<&Path>,
633) -> Vec<SearchResult> {
634    // Pre-compute newline positions for efficient line number lookups
635    let line_index = LineIndex::new(content);
636
637    let matches: Box<dyn Iterator<Item = (usize, usize)>> = match search {
638        SearchType::Fixed(pattern) => Box::new(
639            content
640                .match_indices(pattern.as_str())
641                .map(|(byte_offset, _)| (byte_offset, byte_offset + pattern.len())),
642        ),
643        SearchType::Pattern(regex) => {
644            Box::new(regex.find_iter(content).map(|mat| (mat.start(), mat.end())))
645        }
646        SearchType::PatternAdvanced(regex) => Box::new(
647            regex
648                .find_iter(content)
649                .flatten()
650                .map(|mat| (mat.start(), mat.end())),
651        ),
652    };
653
654    matches
655        .map(|(start, end)| create_search_result_from_bytes(start, end, path, &line_index))
656        .collect()
657}
658
659/// Helper struct to efficiently convert byte offsets to line numbers and extract lines
660pub(crate) struct LineIndex<'a> {
661    content: &'a str,
662    /// Byte positions of newline characters
663    newline_positions: Vec<usize>,
664}
665
666impl<'a> LineIndex<'a> {
667    pub(crate) fn new(content: &'a str) -> Self {
668        let newline_positions: Vec<usize> = content
669            .char_indices()
670            .filter_map(|(i, c)| if c == '\n' { Some(i) } else { None })
671            .collect();
672        Self {
673            content,
674            newline_positions,
675        }
676    }
677
678    /// Get line number (1-indexed) for a byte offset
679    pub(crate) fn line_number_at(&self, byte_offset: usize) -> usize {
680        // Binary search to find how many newlines come before this offset
681        // Both Ok and Err return the same value: the number of newlines before/at this position + 1.
682        // If `byte_offset` lands on a '\n', we treat it as part of the line it terminates.
683        match self.newline_positions.binary_search(&byte_offset) {
684            Ok(idx) | Err(idx) => idx + 1,
685        }
686    }
687
688    /// Get the byte offset where a line starts (`line_num` is 1-indexed)
689    pub(crate) fn line_start_byte(&self, line_num: usize) -> usize {
690        assert!(line_num >= 1, "Line numbers are 1-indexed");
691        if line_num == 1 {
692            0
693        } else {
694            // Line N starts after the (N-1)th newline
695            self.newline_positions[line_num - 2] + 1
696        }
697    }
698
699    /// Get the byte offset where a line ends (exclusive of line ending).
700    /// For `CrLf` lines this excludes the `\r`, matching `BufReadExt::lines_with_endings` behaviour.
701    fn line_end_byte(&self, line_num: usize) -> usize {
702        assert!(line_num >= 1, "Line numbers are 1-indexed");
703        // The end of line N is at the N-1 index in newline_positions (0-indexed)
704        if line_num <= self.newline_positions.len() {
705            let newline_pos = self.newline_positions[line_num - 1];
706            if newline_pos > 0 && self.content.as_bytes()[newline_pos - 1] == b'\r' {
707                newline_pos - 1
708            } else {
709                newline_pos
710            }
711        } else {
712            // Last line without trailing newline
713            self.content.len()
714        }
715    }
716
717    /// Returns the total number of lines in the content
718    fn total_lines(&self) -> usize {
719        // Number of newlines + 1, unless the file is empty
720        if self.content.is_empty() {
721            0
722        } else {
723            self.newline_positions.len() + 1
724        }
725    }
726
727    /// Extract full lines from `start_line` to `end_line` (both 1-indexed, inclusive)
728    pub(crate) fn extract_lines(&self, start_line: usize, end_line: usize) -> Vec<(usize, Line)> {
729        assert!(start_line >= 1, "Line numbers are 1-indexed");
730        assert!(start_line <= end_line, "start_line must be <= end_line");
731
732        (start_line..=end_line)
733            .map(|line_num| {
734                let start = self.line_start_byte(line_num);
735                let end = self.line_end_byte(line_num);
736                let content = self.content[start..end].to_string();
737
738                // Determine line ending
739                let line_ending = if line_num <= self.newline_positions.len() {
740                    let newline_pos = self.newline_positions[line_num - 1];
741                    if newline_pos > 0 && self.content.as_bytes()[newline_pos - 1] == b'\r' {
742                        LineEnding::CrLf
743                    } else {
744                        LineEnding::Lf
745                    }
746                } else {
747                    LineEnding::None
748                };
749
750                (
751                    line_num,
752                    Line {
753                        content,
754                        line_ending,
755                    },
756                )
757            })
758            .collect()
759    }
760}
761
762/// Create a `SearchResult` from byte offsets in the content.
763/// `end_byte` is exclusive (standard Rust range semantics).
764fn create_search_result_from_bytes(
765    start_byte: usize,
766    end_byte: usize,
767    path: Option<&Path>,
768    line_index: &LineIndex<'_>,
769) -> SearchResult {
770    debug_assert!(
771        start_byte <= end_byte,
772        "Invalid byte range: start_byte={start_byte}, end_byte={end_byte}"
773    );
774
775    let start_line_num = line_index.line_number_at(start_byte);
776    // end_byte is exclusive; for zero-length matches, keep start_line_num
777    let mut end_line_num = if start_byte == end_byte {
778        start_line_num
779    } else {
780        line_index.line_number_at(end_byte.saturating_sub(1))
781    };
782
783    // Compute byte offsets within each line (for preview highlighting)
784    let match_start_in_first_line = start_byte - line_index.line_start_byte(start_line_num);
785
786    let last_line_start = line_index.line_start_byte(end_line_num);
787    let last_line_end = line_index.line_end_byte(end_line_num);
788    let last_line_content_len = last_line_end - last_line_start;
789
790    // Check if match extends into the line ending (newline)
791    // If so, and there's a next line, include it so the preview shows the merge
792    let match_end_in_last_line = if end_byte > last_line_start + last_line_content_len {
793        // Match extends past line content into line ending
794        let has_next_line = end_line_num < line_index.total_lines();
795        if has_next_line {
796            // Include next line with match_end = 0 (match doesn't extend into its content)
797            end_line_num += 1;
798            0
799        } else {
800            last_line_content_len
801        }
802    } else {
803        end_byte - last_line_start
804    };
805
806    // Extract full lines containing the match
807    let lines = line_index.extract_lines(start_line_num, end_line_num);
808
809    // Extract the matched content
810    let expected_content = line_index.content[start_byte..end_byte].to_string();
811
812    SearchResult::new_byte_range(ByteRangeParams {
813        path: path.map(Path::to_path_buf),
814        lines,
815        match_start_in_first_line,
816        match_end_in_last_line,
817        byte_start: start_byte,
818        byte_end: end_byte,
819        content: expected_content,
820        included: true,
821    })
822}
823
824#[cfg(test)]
825mod tests {
826    use super::*;
827
828    fn byte_range_content(result: &SearchResult) -> &str {
829        match &result.content {
830            MatchContent::ByteRange { content, .. } => content,
831            MatchContent::Line { .. } => panic!("Expected ByteRange"),
832        }
833    }
834
835    fn byte_range_bytes(result: &SearchResult) -> (usize, usize) {
836        match &result.content {
837            MatchContent::ByteRange {
838                byte_start,
839                byte_end,
840                ..
841            } => (*byte_start, *byte_end),
842            MatchContent::Line { .. } => panic!("Expected ByteRange"),
843        }
844    }
845
846    mod test_helpers {
847        use super::*;
848
849        pub fn create_test_search_result_with_replacement(
850            path: &str,
851            line_number: usize,
852            replace_result: Option<ReplaceResult>,
853        ) -> SearchResultWithReplacement {
854            SearchResultWithReplacement {
855                search_result: SearchResult::new_line(
856                    Some(PathBuf::from(path)),
857                    line_number,
858                    "test line".to_string(),
859                    LineEnding::Lf,
860                    true,
861                ),
862                replacement: "replacement".to_string(),
863                replace_result,
864                preview_error: None,
865            }
866        }
867
868        pub fn create_fixed_search(term: &str) -> SearchType {
869            SearchType::Fixed(term.to_string())
870        }
871
872        pub fn create_pattern_search(pattern: &str) -> SearchType {
873            SearchType::Pattern(Regex::new(pattern).unwrap())
874        }
875
876        pub fn create_advanced_pattern_search(pattern: &str) -> SearchType {
877            SearchType::PatternAdvanced(FancyRegex::new(pattern).unwrap())
878        }
879    }
880
881    mod unicode_handling {
882        use super::*;
883
884        #[test]
885        fn test_complex_unicode_replacement() {
886            let text = "ASCII text with 世界 (CJK), Здравствуйте (Cyrillic), 안녕하세요 (Hangul), αβγδ (Greek), עִבְרִית (Hebrew)";
887            let search = SearchType::Fixed("世界".to_string());
888
889            let result = replace::replace_all_if_match(text, &search, "World");
890
891            assert_eq!(
892                result,
893                Some("ASCII text with World (CJK), Здравствуйте (Cyrillic), 안녕하세요 (Hangul), αβγδ (Greek), עִבְרִית (Hebrew)".to_string())
894            );
895        }
896
897        #[test]
898        fn test_unicode_normalization() {
899            let text = "café";
900            let search = SearchType::Fixed("é".to_string());
901            assert_eq!(
902                replace::replace_all_if_match(text, &search, "e"),
903                Some("cafe".to_string())
904            );
905        }
906
907        #[test]
908        fn test_unicode_regex_classes() {
909            let text = "Latin A, Cyrillic Б, Greek Γ, Hebrew א";
910
911            let search = SearchType::Pattern(Regex::new(r"\p{Cyrillic}").unwrap());
912            assert_eq!(
913                replace::replace_all_if_match(text, &search, "X"),
914                Some("Latin A, Cyrillic X, Greek Γ, Hebrew א".to_string())
915            );
916
917            let search = SearchType::Pattern(Regex::new(r"\p{Greek}").unwrap());
918            assert_eq!(
919                replace::replace_all_if_match(text, &search, "X"),
920                Some("Latin A, Cyrillic Б, Greek X, Hebrew א".to_string())
921            );
922        }
923
924        #[test]
925        fn test_unicode_capture_groups() {
926            let text = "Name: 李明 (ID: A12345)";
927
928            let search =
929                SearchType::Pattern(Regex::new(r"Name: (\p{Han}+) \(ID: ([A-Z0-9]+)\)").unwrap());
930            assert_eq!(
931                replace::replace_all_if_match(text, &search, "ID $2 belongs to $1"),
932                Some("ID A12345 belongs to 李明".to_string())
933            );
934        }
935    }
936
937    mod replace_any {
938        use super::*;
939
940        #[test]
941        fn test_simple_match_subword() {
942            assert_eq!(
943                replace::replace_all_if_match(
944                    "foobarbaz",
945                    &SearchType::Fixed("bar".to_string()),
946                    "REPL"
947                ),
948                Some("fooREPLbaz".to_string())
949            );
950            assert_eq!(
951                replace::replace_all_if_match(
952                    "foobarbaz",
953                    &SearchType::Pattern(Regex::new(r"bar").unwrap()),
954                    "REPL"
955                ),
956                Some("fooREPLbaz".to_string())
957            );
958            assert_eq!(
959                replace::replace_all_if_match(
960                    "foobarbaz",
961                    &SearchType::PatternAdvanced(FancyRegex::new(r"bar").unwrap()),
962                    "REPL"
963                ),
964                Some("fooREPLbaz".to_string())
965            );
966        }
967
968        #[test]
969        fn test_no_match() {
970            assert_eq!(
971                replace::replace_all_if_match(
972                    "foobarbaz",
973                    &SearchType::Fixed("xyz".to_string()),
974                    "REPL"
975                ),
976                None
977            );
978            assert_eq!(
979                replace::replace_all_if_match(
980                    "foobarbaz",
981                    &SearchType::Pattern(Regex::new(r"xyz").unwrap()),
982                    "REPL"
983                ),
984                None
985            );
986            assert_eq!(
987                replace::replace_all_if_match(
988                    "foobarbaz",
989                    &SearchType::PatternAdvanced(FancyRegex::new(r"xyz").unwrap()),
990                    "REPL"
991                ),
992                None
993            );
994        }
995
996        #[test]
997        fn test_word_boundaries() {
998            assert_eq!(
999                replace::replace_all_if_match(
1000                    "foo bar baz",
1001                    &SearchType::Pattern(Regex::new(r"\bbar\b").unwrap()),
1002                    "REPL"
1003                ),
1004                Some("foo REPL baz".to_string())
1005            );
1006            assert_eq!(
1007                replace::replace_all_if_match(
1008                    "embargo",
1009                    &SearchType::Pattern(Regex::new(r"\bbar\b").unwrap()),
1010                    "REPL"
1011                ),
1012                None
1013            );
1014            assert_eq!(
1015                replace::replace_all_if_match(
1016                    "foo bar baz",
1017                    &SearchType::PatternAdvanced(FancyRegex::new(r"\bbar\b").unwrap()),
1018                    "REPL"
1019                ),
1020                Some("foo REPL baz".to_string())
1021            );
1022            assert_eq!(
1023                replace::replace_all_if_match(
1024                    "embargo",
1025                    &SearchType::PatternAdvanced(FancyRegex::new(r"\bbar\b").unwrap()),
1026                    "REPL"
1027                ),
1028                None
1029            );
1030        }
1031
1032        #[test]
1033        fn test_capture_groups() {
1034            assert_eq!(
1035                replace::replace_all_if_match(
1036                    "John Doe",
1037                    &SearchType::Pattern(Regex::new(r"(\w+)\s+(\w+)").unwrap()),
1038                    "$2, $1"
1039                ),
1040                Some("Doe, John".to_string())
1041            );
1042            assert_eq!(
1043                replace::replace_all_if_match(
1044                    "John Doe",
1045                    &SearchType::PatternAdvanced(FancyRegex::new(r"(\w+)\s+(\w+)").unwrap()),
1046                    "$2, $1"
1047                ),
1048                Some("Doe, John".to_string())
1049            );
1050        }
1051
1052        #[test]
1053        fn test_lookaround() {
1054            assert_eq!(
1055                replace::replace_all_if_match(
1056                    "123abc456",
1057                    &SearchType::PatternAdvanced(
1058                        FancyRegex::new(r"(?<=\d{3})abc(?=\d{3})").unwrap()
1059                    ),
1060                    "REPL"
1061                ),
1062                Some("123REPL456".to_string())
1063            );
1064        }
1065
1066        #[test]
1067        fn test_quantifiers() {
1068            assert_eq!(
1069                replace::replace_all_if_match(
1070                    "aaa123456bbb",
1071                    &SearchType::Pattern(Regex::new(r"\d+").unwrap()),
1072                    "REPL"
1073                ),
1074                Some("aaaREPLbbb".to_string())
1075            );
1076            assert_eq!(
1077                replace::replace_all_if_match(
1078                    "abc123def456",
1079                    &SearchType::Pattern(Regex::new(r"\d{3}").unwrap()),
1080                    "REPL"
1081                ),
1082                Some("abcREPLdefREPL".to_string())
1083            );
1084            assert_eq!(
1085                replace::replace_all_if_match(
1086                    "aaa123456bbb",
1087                    &SearchType::PatternAdvanced(FancyRegex::new(r"\d+").unwrap()),
1088                    "REPL"
1089                ),
1090                Some("aaaREPLbbb".to_string())
1091            );
1092            assert_eq!(
1093                replace::replace_all_if_match(
1094                    "abc123def456",
1095                    &SearchType::PatternAdvanced(FancyRegex::new(r"\d{3}").unwrap()),
1096                    "REPL"
1097                ),
1098                Some("abcREPLdefREPL".to_string())
1099            );
1100        }
1101
1102        #[test]
1103        fn test_special_characters() {
1104            assert_eq!(
1105                replace::replace_all_if_match(
1106                    "foo.bar*baz",
1107                    &SearchType::Fixed(".bar*".to_string()),
1108                    "REPL"
1109                ),
1110                Some("fooREPLbaz".to_string())
1111            );
1112            assert_eq!(
1113                replace::replace_all_if_match(
1114                    "foo.bar*baz",
1115                    &SearchType::Pattern(Regex::new(r"\.bar\*").unwrap()),
1116                    "REPL"
1117                ),
1118                Some("fooREPLbaz".to_string())
1119            );
1120            assert_eq!(
1121                replace::replace_all_if_match(
1122                    "foo.bar*baz",
1123                    &SearchType::PatternAdvanced(FancyRegex::new(r"\.bar\*").unwrap()),
1124                    "REPL"
1125                ),
1126                Some("fooREPLbaz".to_string())
1127            );
1128        }
1129
1130        #[test]
1131        fn test_unicode() {
1132            assert_eq!(
1133                replace::replace_all_if_match(
1134                    "Hello 世界!",
1135                    &SearchType::Fixed("世界".to_string()),
1136                    "REPL"
1137                ),
1138                Some("Hello REPL!".to_string())
1139            );
1140            assert_eq!(
1141                replace::replace_all_if_match(
1142                    "Hello 世界!",
1143                    &SearchType::Pattern(Regex::new(r"世界").unwrap()),
1144                    "REPL"
1145                ),
1146                Some("Hello REPL!".to_string())
1147            );
1148            assert_eq!(
1149                replace::replace_all_if_match(
1150                    "Hello 世界!",
1151                    &SearchType::PatternAdvanced(FancyRegex::new(r"世界").unwrap()),
1152                    "REPL"
1153                ),
1154                Some("Hello REPL!".to_string())
1155            );
1156        }
1157
1158        #[test]
1159        fn test_case_insensitive() {
1160            assert_eq!(
1161                replace::replace_all_if_match(
1162                    "HELLO world",
1163                    &SearchType::Pattern(Regex::new(r"(?i)hello").unwrap()),
1164                    "REPL"
1165                ),
1166                Some("REPL world".to_string())
1167            );
1168            assert_eq!(
1169                replace::replace_all_if_match(
1170                    "HELLO world",
1171                    &SearchType::PatternAdvanced(FancyRegex::new(r"(?i)hello").unwrap()),
1172                    "REPL"
1173                ),
1174                Some("REPL world".to_string())
1175            );
1176        }
1177    }
1178
1179    mod search_result_tests {
1180        use super::*;
1181
1182        #[test]
1183        fn test_display_error_with_error_result() {
1184            let result = test_helpers::create_test_search_result_with_replacement(
1185                "/path/to/file.txt",
1186                42,
1187                Some(ReplaceResult::Error("Test error message".to_string())),
1188            );
1189
1190            let (path_display, error) = result.display_error();
1191
1192            assert_eq!(path_display, "/path/to/file.txt:42");
1193            assert_eq!(error, "Test error message");
1194        }
1195
1196        #[test]
1197        fn test_display_error_with_unicode_path() {
1198            let result = test_helpers::create_test_search_result_with_replacement(
1199                "/path/to/файл.txt",
1200                123,
1201                Some(ReplaceResult::Error("Unicode test".to_string())),
1202            );
1203
1204            let (path_display, error) = result.display_error();
1205
1206            assert_eq!(path_display, "/path/to/файл.txt:123");
1207            assert_eq!(error, "Unicode test");
1208        }
1209
1210        #[test]
1211        fn test_display_error_with_complex_error_message() {
1212            let complex_error = "Failed to write: Permission denied (os error 13)";
1213            let result = test_helpers::create_test_search_result_with_replacement(
1214                "/readonly/file.txt",
1215                1,
1216                Some(ReplaceResult::Error(complex_error.to_string())),
1217            );
1218
1219            let (path_display, error) = result.display_error();
1220
1221            assert_eq!(path_display, "/readonly/file.txt:1");
1222            assert_eq!(error, complex_error);
1223        }
1224
1225        #[test]
1226        #[should_panic(expected = "Found error result with no error message")]
1227        fn test_display_error_panics_with_none_result() {
1228            let result = test_helpers::create_test_search_result_with_replacement(
1229                "/path/to/file.txt",
1230                1,
1231                None,
1232            );
1233            result.display_error();
1234        }
1235
1236        #[test]
1237        #[should_panic(expected = "Found successful result in errors")]
1238        fn test_display_error_panics_with_success_result() {
1239            let result = test_helpers::create_test_search_result_with_replacement(
1240                "/path/to/file.txt",
1241                1,
1242                Some(ReplaceResult::Success),
1243            );
1244            result.display_error();
1245        }
1246    }
1247
1248    mod search_type_tests {
1249        use super::*;
1250
1251        #[test]
1252        fn test_search_type_emptiness() {
1253            let test_cases = [
1254                (test_helpers::create_fixed_search(""), true),
1255                (test_helpers::create_fixed_search("hello"), false),
1256                (test_helpers::create_fixed_search("   "), false), // whitespace is not empty
1257                (test_helpers::create_pattern_search(""), true),
1258                (test_helpers::create_pattern_search("test"), false),
1259                (test_helpers::create_pattern_search(r"\s+"), false),
1260                (test_helpers::create_advanced_pattern_search(""), true),
1261                (test_helpers::create_advanced_pattern_search("test"), false),
1262            ];
1263
1264            for (search_type, expected_empty) in test_cases {
1265                assert_eq!(
1266                    search_type.is_empty(),
1267                    expected_empty,
1268                    "Emptiness test failed for: {search_type:?}"
1269                );
1270            }
1271        }
1272    }
1273
1274    mod file_searcher_tests {
1275        use super::*;
1276
1277        #[test]
1278        fn test_is_likely_binary_extensions() {
1279            const BINARY_EXTENSIONS: &[&str] = &[
1280                "image.png",
1281                "document.pdf",
1282                "archive.zip",
1283                "program.exe",
1284                "library.dll",
1285                "photo.jpg",
1286                "icon.ico",
1287                "vector.svg",
1288                "compressed.gz",
1289                "backup.7z",
1290                "java.class",
1291                "application.jar",
1292            ];
1293
1294            const TEXT_EXTENSIONS: &[&str] = &[
1295                "code.rs",
1296                "script.py",
1297                "document.txt",
1298                "config.json",
1299                "readme.md",
1300                "style.css",
1301                "page.html",
1302                "source.c",
1303                "header.h",
1304                "makefile",
1305                "no_extension",
1306            ];
1307
1308            const MIXED_CASE_BINARY: &[&str] =
1309                &["IMAGE.PNG", "Document.PDF", "ARCHIVE.ZIP", "Photo.JPG"];
1310
1311            let test_cases = [
1312                (BINARY_EXTENSIONS, true),
1313                (TEXT_EXTENSIONS, false),
1314                (MIXED_CASE_BINARY, true),
1315            ];
1316
1317            for (files, expected_binary) in test_cases {
1318                for file in files {
1319                    assert_eq!(
1320                        is_likely_binary(Path::new(file)),
1321                        expected_binary,
1322                        "Binary detection failed for {file}"
1323                    );
1324                }
1325            }
1326        }
1327
1328        #[test]
1329        fn test_is_likely_binary_no_extension() {
1330            assert!(!is_likely_binary(Path::new("filename")));
1331            assert!(!is_likely_binary(Path::new("/path/to/file")));
1332        }
1333
1334        #[test]
1335        fn test_is_likely_binary_empty_extension() {
1336            assert!(!is_likely_binary(Path::new("file.")));
1337        }
1338
1339        #[test]
1340        fn test_is_likely_binary_complex_paths() {
1341            assert!(is_likely_binary(Path::new("/complex/path/to/image.png")));
1342            assert!(!is_likely_binary(Path::new("/complex/path/to/source.rs")));
1343        }
1344
1345        #[test]
1346        fn test_is_likely_binary_hidden_files() {
1347            assert!(is_likely_binary(Path::new(".hidden.png")));
1348            assert!(!is_likely_binary(Path::new(".hidden.txt")));
1349        }
1350    }
1351
1352    mod multiline_tests {
1353        use super::*;
1354
1355        #[test]
1356        fn test_line_index_single_line() {
1357            let content = "single line";
1358            let index = LineIndex::new(content);
1359            assert_eq!(index.line_number_at(0), 1);
1360            assert_eq!(index.line_number_at(6), 1);
1361            assert_eq!(index.line_number_at(11), 1);
1362        }
1363
1364        #[test]
1365        fn test_line_index_multiple_lines() {
1366            let content = "line 1\nline 2\nline 3";
1367            let index = LineIndex::new(content);
1368
1369            // Line 1 (bytes 0-5)
1370            assert_eq!(index.line_number_at(0), 1);
1371            assert_eq!(index.line_number_at(5), 1);
1372
1373            // Newline at byte 6
1374            assert_eq!(index.line_number_at(6), 1);
1375
1376            // Line 2 (bytes 7-12)
1377            assert_eq!(index.line_number_at(7), 2);
1378            assert_eq!(index.line_number_at(12), 2);
1379
1380            // Newline at byte 13
1381            assert_eq!(index.line_number_at(13), 2);
1382
1383            // Line 3 (bytes 14-19)
1384            assert_eq!(index.line_number_at(14), 3);
1385            assert_eq!(index.line_number_at(19), 3);
1386        }
1387
1388        #[test]
1389        fn test_line_index_empty_lines() {
1390            let content = "line 1\n\nline 3";
1391            let index = LineIndex::new(content);
1392
1393            assert_eq!(index.line_number_at(0), 1); // "l" in line 1
1394            assert_eq!(index.line_number_at(6), 1); // first newline
1395            assert_eq!(index.line_number_at(7), 2); // second newline (empty line)
1396            assert_eq!(index.line_number_at(8), 3); // "l" in line 3
1397        }
1398
1399        #[test]
1400        fn test_search_multiline_fixed_string() {
1401            let content = "foo\nbar\nbaz";
1402            let search = SearchType::Fixed("foo\nb".to_string());
1403            let results = search_multiline(content, &search, None);
1404
1405            assert_eq!(results.len(), 1);
1406            assert_eq!(results[0].start_line_number(), 1);
1407            assert_eq!(results[0].end_line_number(), 2);
1408            assert_eq!(results[0].path, None);
1409            assert_eq!(byte_range_content(&results[0]), "foo\nb");
1410        }
1411
1412        #[test]
1413        fn test_search_multiline_regex_pattern() {
1414            let content = "start\nmiddle\nend\nother";
1415            let search = SearchType::Pattern(regex::Regex::new(r"start.*\nmiddle").unwrap());
1416            let results = search_multiline(content, &search, None);
1417
1418            assert_eq!(results.len(), 1);
1419            assert_eq!(results[0].start_line_number(), 1);
1420            assert_eq!(results[0].end_line_number(), 2);
1421            assert_eq!(byte_range_content(&results[0]), "start\nmiddle");
1422        }
1423
1424        #[test]
1425        fn test_search_multiline_multiple_matches() {
1426            let content = "foo\nbar\n\nfoo\nbar\nbaz";
1427            let search = SearchType::Fixed("foo\nb".to_string());
1428            let results = search_multiline(content, &search, None);
1429
1430            assert_eq!(results.len(), 2);
1431            assert_eq!(results[0].start_line_number(), 1);
1432            assert_eq!(results[0].end_line_number(), 2);
1433            assert_eq!(results[1].start_line_number(), 4);
1434            assert_eq!(results[1].end_line_number(), 5);
1435        }
1436
1437        #[test]
1438        fn test_search_multiline_no_matches() {
1439            let content = "foo\nbar\nbaz";
1440            let search = SearchType::Fixed("not_found".to_string());
1441            let results = search_multiline(content, &search, None);
1442
1443            assert_eq!(results.len(), 0);
1444        }
1445
1446        #[test]
1447        fn test_search_multiline_with_path() {
1448            let content = "test\ndata";
1449            let path = Path::new("/test/file.txt");
1450            let search = SearchType::Fixed("test".to_string());
1451            let results = search_multiline(content, &search, Some(path));
1452
1453            assert_eq!(results.len(), 1);
1454            assert_eq!(results[0].path, Some(PathBuf::from("/test/file.txt")));
1455        }
1456
1457        #[test]
1458        fn test_search_multiline_line_endings_crlf() {
1459            let content = "foo\r\nbar";
1460            let search = SearchType::Fixed("foo\r\n".to_string());
1461            let results = search_multiline(content, &search, None);
1462
1463            assert_eq!(results.len(), 1);
1464            assert_eq!(byte_range_content(&results[0]), "foo\r\n");
1465        }
1466
1467        #[test]
1468        fn test_search_multiline_line_endings_lf() {
1469            let content = "foo\nbar";
1470            let search = SearchType::Fixed("foo\n".to_string());
1471            let results = search_multiline(content, &search, None);
1472
1473            assert_eq!(results.len(), 1);
1474            assert_eq!(byte_range_content(&results[0]), "foo\n");
1475        }
1476
1477        #[test]
1478        fn test_search_multiline_line_endings_none() {
1479            let content = "foobar";
1480            let search = SearchType::Fixed("foo".to_string());
1481            let results = search_multiline(content, &search, None);
1482
1483            assert_eq!(results.len(), 1);
1484            // Only "foo" is matched, not the full line
1485            assert_eq!(byte_range_content(&results[0]), "foo");
1486        }
1487
1488        #[test]
1489        fn test_search_multiline_spanning_three_lines() {
1490            let content = "line1\nline2\nline3\nline4";
1491            let search = SearchType::Fixed("ne1\nline2\nli".to_string());
1492            let results = search_multiline(content, &search, None);
1493
1494            assert_eq!(results.len(), 1);
1495            assert_eq!(results[0].start_line_number(), 1);
1496            assert_eq!(results[0].end_line_number(), 3);
1497            assert_eq!(results[0].path, None);
1498            assert_eq!(results[0].included, true);
1499            assert_eq!(byte_range_content(&results[0]), "ne1\nline2\nli");
1500        }
1501
1502        #[test]
1503        fn test_search_multiline_pattern_at_end() {
1504            let content = "start\npattern\nend";
1505            let search = SearchType::Fixed("pattern\nend".to_string());
1506            let results = search_multiline(content, &search, None);
1507
1508            assert_eq!(results.len(), 1);
1509            assert_eq!(results[0].start_line_number(), 2);
1510            assert_eq!(results[0].end_line_number(), 3);
1511        }
1512
1513        #[test]
1514        fn test_search_multiline_zero_length_match_start() {
1515            let content = "foo\nbar";
1516            let search = SearchType::Pattern(regex::Regex::new(r"^").unwrap());
1517            let results = search_multiline(content, &search, None);
1518
1519            assert_eq!(results.len(), 1);
1520            assert_eq!(byte_range_bytes(&results[0]), (0, 0));
1521            assert_eq!(byte_range_content(&results[0]), "");
1522        }
1523
1524        #[test]
1525        fn test_create_search_result_single_line_match() {
1526            let content = "line1\nline2\nline3";
1527            let line_index = LineIndex::new(content);
1528
1529            let result = create_search_result_from_bytes(6, 11, None, &line_index);
1530
1531            assert_eq!(result.start_line_number(), 2);
1532            assert_eq!(result.end_line_number(), 2);
1533            assert_eq!(byte_range_content(&result), "line2");
1534            assert_eq!(byte_range_bytes(&result), (6, 11));
1535        }
1536
1537        #[test]
1538        fn test_create_search_result_multiline_match() {
1539            let content = "line1\nline2\nline3";
1540            let line_index = LineIndex::new(content);
1541
1542            let result = create_search_result_from_bytes(0, 11, None, &line_index);
1543
1544            assert_eq!(result.start_line_number(), 1);
1545            assert_eq!(result.end_line_number(), 2);
1546            assert_eq!(byte_range_content(&result), "line1\nline2");
1547            assert_eq!(byte_range_bytes(&result), (0, 11));
1548        }
1549
1550        #[test]
1551        fn test_crlf_match_ending_at_cr() {
1552            // Match "foo\r" in "foo\r\nbar" — match includes the \r which is past line content
1553            let content = "foo\r\nbar";
1554            let search = SearchType::Fixed("foo\r".to_string());
1555            let results = search_multiline(content, &search, None);
1556
1557            assert_eq!(results.len(), 1);
1558            assert_eq!(results[0].start_line_number(), 1);
1559            // Match extends into line ending → next line included
1560            assert_eq!(results[0].end_line_number(), 2);
1561            assert_eq!(byte_range_bytes(&results[0]), (0, 4));
1562            assert_eq!(byte_range_content(&results[0]), "foo\r");
1563        }
1564
1565        #[test]
1566        fn test_crlf_match_spanning_crlf_boundary() {
1567            // Match "foo\r\nbar" spanning both lines
1568            let content = "foo\r\nbar";
1569            let search = SearchType::Fixed("foo\r\nbar".to_string());
1570            let results = search_multiline(content, &search, None);
1571
1572            assert_eq!(results.len(), 1);
1573            assert_eq!(results[0].start_line_number(), 1);
1574            assert_eq!(results[0].end_line_number(), 2);
1575            assert_eq!(byte_range_bytes(&results[0]), (0, 8));
1576            assert_eq!(byte_range_content(&results[0]), "foo\r\nbar");
1577        }
1578
1579        #[test]
1580        fn test_crlf_match_ending_at_lf() {
1581            // Match "foo\r\n" in CRLF content — ends at byte 5, past line content
1582            let content = "foo\r\nbar";
1583            let search = SearchType::Fixed("foo\r\n".to_string());
1584            let results = search_multiline(content, &search, None);
1585
1586            assert_eq!(results.len(), 1);
1587            assert_eq!(results[0].start_line_number(), 1);
1588            // Match extends past line content → include next line with match_end=0
1589            assert_eq!(results[0].end_line_number(), 2);
1590            assert_eq!(byte_range_bytes(&results[0]), (0, 5));
1591            assert_eq!(byte_range_content(&results[0]), "foo\r\n");
1592        }
1593
1594        #[test]
1595        fn test_lf_match_ending_at_lf() {
1596            // Match "foo\n" in LF content — ends at byte 4, past line content
1597            let content = "foo\nbar";
1598            let search = SearchType::Fixed("foo\n".to_string());
1599            let results = search_multiline(content, &search, None);
1600
1601            assert_eq!(results.len(), 1);
1602            assert_eq!(results[0].start_line_number(), 1);
1603            // Match extends past line content → include next line with match_end=0
1604            assert_eq!(results[0].end_line_number(), 2);
1605            assert_eq!(byte_range_bytes(&results[0]), (0, 4));
1606            assert_eq!(byte_range_content(&results[0]), "foo\n");
1607        }
1608
1609        #[test]
1610        fn test_lf_match_ending_at_lf_no_next_line() {
1611            // Match "foo\n" — trailing newline means there's technically an empty line 2,
1612            // so the "extends into line ending" logic includes it with match_end=0.
1613            // To truly test "no next line", use content without trailing newline.
1614            let content = "foo\n";
1615            let search = SearchType::Fixed("foo\n".to_string());
1616            let results = search_multiline(content, &search, None);
1617
1618            assert_eq!(results.len(), 1);
1619            assert_eq!(results[0].start_line_number(), 1);
1620            // Line 2 exists (empty, after trailing \n), so it gets included
1621            assert_eq!(results[0].end_line_number(), 2);
1622            assert_eq!(byte_range_bytes(&results[0]), (0, 4));
1623            assert_eq!(byte_range_content(&results[0]), "foo\n");
1624        }
1625
1626        #[test]
1627        fn test_lf_match_ending_at_lf_truly_no_next_line() {
1628            // "foo" with no trailing newline — match the whole thing
1629            // This tests the fallback to last_line_content_len when there's no next line
1630            let content = "foo";
1631            let search = SearchType::Fixed("foo".to_string());
1632            let results = search_multiline(content, &search, None);
1633
1634            assert_eq!(results.len(), 1);
1635            assert_eq!(results[0].start_line_number(), 1);
1636            assert_eq!(results[0].end_line_number(), 1);
1637            assert_eq!(byte_range_bytes(&results[0]), (0, 3));
1638            assert_eq!(byte_range_content(&results[0]), "foo");
1639        }
1640    }
1641
1642    #[test]
1643    fn test_multiple_matches_per_line() {
1644        // "foo\nbar baz bar qux\nbar\nbux\n"
1645        //  0123 456789012345678 901234567
1646        //       ^     ^         ^
1647        //       4-7   12-15     20-23  (exclusive end)
1648        let content = "foo\nbar baz bar qux\nbar\nbux\n";
1649        let search = SearchType::Fixed("bar".to_string());
1650
1651        let results = search_multiline(content, &search, None);
1652
1653        // Should find 3 matches: 2 on line 2, 1 on line 3
1654        assert_eq!(results.len(), 3);
1655
1656        // First match: "bar" at bytes 4-7 on line 2
1657        assert_eq!(results[0].start_line_number(), 2);
1658        assert_eq!(results[0].end_line_number(), 2);
1659        assert_eq!(byte_range_bytes(&results[0]), (4, 7));
1660
1661        // Second match: "bar" at bytes 12-15 on line 2 (same line!)
1662        assert_eq!(results[1].start_line_number(), 2);
1663        assert_eq!(results[1].end_line_number(), 2);
1664        assert_eq!(byte_range_bytes(&results[1]), (12, 15));
1665
1666        // Third match: "bar" at bytes 20-23 on line 3
1667        assert_eq!(results[2].start_line_number(), 3);
1668        assert_eq!(results[2].end_line_number(), 3);
1669        assert_eq!(byte_range_bytes(&results[2]), (20, 23));
1670    }
1671
1672    #[test]
1673    fn test_extract_lines_crlf_content_should_not_include_cr() {
1674        let content = "hello\r\nworld\r\n";
1675        let index = LineIndex::new(content);
1676        let lines = index.extract_lines(1, 2);
1677
1678        assert_eq!(lines.len(), 2);
1679
1680        // Content must NOT include \r — line_ending encodes it separately
1681        assert_eq!(lines[0].1.content, "hello");
1682        assert_eq!(lines[0].1.line_ending, LineEnding::CrLf);
1683
1684        assert_eq!(lines[1].1.content, "world");
1685        assert_eq!(lines[1].1.line_ending, LineEnding::CrLf);
1686    }
1687}