Skip to main content

neco_editor_search/
lib.rs

1//! Text search engine for editor buffers.
2//!
3//! Provides find and replace operations over UTF-8 text, using
4//! `neco_textview::LineIndex` for line/column resolution.
5
6use neco_textview::{LineIndex, TextRange};
7use regex::Regex;
8use std::fmt;
9
10// ---------------------------------------------------------------------------
11// Types
12// ---------------------------------------------------------------------------
13
14/// Search parameters. Plain struct (parameter bag).
15pub struct SearchQuery {
16    pub pattern: String,
17    pub is_regex: bool,
18    pub case_sensitive: bool,
19    pub whole_word: bool,
20}
21
22/// A single search hit with its byte range and line/column position.
23#[derive(Debug, Clone)]
24pub struct SearchMatch {
25    range: TextRange,
26    line: u32,
27    column: u32,
28}
29
30impl SearchMatch {
31    /// Byte-offset range of the match.
32    pub fn range(&self) -> &TextRange {
33        &self.range
34    }
35
36    /// 0-based line number.
37    pub fn line(&self) -> u32 {
38        self.line
39    }
40
41    /// 0-based column (byte offset within line).
42    pub fn column(&self) -> u32 {
43        self.column
44    }
45}
46
47/// Errors produced by search operations.
48#[derive(Debug, Clone, PartialEq, Eq)]
49#[non_exhaustive]
50pub enum SearchError {
51    /// The regex pattern failed to compile.
52    InvalidRegex(String),
53}
54
55impl fmt::Display for SearchError {
56    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57        match self {
58            Self::InvalidRegex(msg) => write!(f, "invalid regex: {msg}"),
59        }
60    }
61}
62
63impl std::error::Error for SearchError {}
64
65// ---------------------------------------------------------------------------
66// Internal helpers
67// ---------------------------------------------------------------------------
68
69/// Build a `regex::Regex` from a `SearchQuery`.
70fn build_regex(query: &SearchQuery) -> Result<Regex, SearchError> {
71    let mut pat = if query.is_regex {
72        query.pattern.clone()
73    } else {
74        regex::escape(&query.pattern)
75    };
76
77    if query.whole_word {
78        pat = format!(r"\b(?:{pat})\b");
79    }
80
81    if !query.case_sensitive {
82        pat = format!("(?i){pat}");
83    }
84
85    Regex::new(&pat).map_err(|e| SearchError::InvalidRegex(e.to_string()))
86}
87
88/// Create a `SearchMatch` from a byte-offset range using `LineIndex`.
89fn match_from_offsets(
90    text: &str,
91    line_index: &LineIndex,
92    start: usize,
93    end: usize,
94) -> Result<SearchMatch, SearchError> {
95    let range = TextRange::new(start, end).expect("match offsets must satisfy start <= end");
96    let pos = line_index
97        .offset_to_position(text, start)
98        // offset_to_position only fails for out-of-bounds / non-boundary offsets,
99        // which cannot happen for regex match positions in valid UTF-8 text.
100        .expect("regex match start must be a valid offset");
101    Ok(SearchMatch {
102        range,
103        line: pos.line(),
104        column: pos.column(),
105    })
106}
107
108// ---------------------------------------------------------------------------
109// Public API
110// ---------------------------------------------------------------------------
111
112/// Find every occurrence of `query` in `text`.
113pub fn find_all(
114    text: &str,
115    line_index: &LineIndex,
116    query: &SearchQuery,
117) -> Result<Vec<SearchMatch>, SearchError> {
118    let re = build_regex(query)?;
119    let mut results = Vec::new();
120    for m in re.find_iter(text) {
121        results.push(match_from_offsets(text, line_index, m.start(), m.end())?);
122    }
123    Ok(results)
124}
125
126/// Find the first occurrence of `query` at or after `from_offset`.
127///
128/// Returns `Ok(None)` when no match exists from that offset onward.
129pub fn find_next(
130    text: &str,
131    line_index: &LineIndex,
132    query: &SearchQuery,
133    from_offset: usize,
134) -> Result<Option<SearchMatch>, SearchError> {
135    let re = build_regex(query)?;
136    if from_offset > text.len() {
137        return Ok(None);
138    }
139    // Use find_at to search within the full haystack, preserving word boundary
140    // semantics even when from_offset falls mid-word.
141    match re.find_at(text, from_offset) {
142        Some(m) => Ok(Some(match_from_offsets(
143            text,
144            line_index,
145            m.start(),
146            m.end(),
147        )?)),
148        None => Ok(None),
149    }
150}
151
152/// Find the last occurrence of `query` whose match ends at or before
153/// `to_offset`.
154///
155/// Returns `Ok(None)` when no match exists before that offset.
156pub fn find_previous(
157    text: &str,
158    line_index: &LineIndex,
159    query: &SearchQuery,
160    to_offset: usize,
161) -> Result<Option<SearchMatch>, SearchError> {
162    let all = find_all(text, line_index, query)?;
163    Ok(all.into_iter().rfind(|m| m.range().end() <= to_offset))
164}
165
166/// Replace every occurrence of `query` in `text` with `replacement`.
167///
168/// Returns the new text and the number of replacements performed.
169pub fn replace_all(
170    text: &str,
171    query: &SearchQuery,
172    replacement: &str,
173) -> Result<(String, usize), SearchError> {
174    let re = build_regex(query)?;
175    let count = re.find_iter(text).count();
176    let new_text = re.replace_all(text, replacement).into_owned();
177    Ok((new_text, count))
178}
179
180/// Return every replacement as an original byte range plus expanded text.
181pub fn replace_all_ranges(
182    text: &str,
183    query: &SearchQuery,
184    replacement: &str,
185) -> Result<Vec<(TextRange, String)>, SearchError> {
186    let re = build_regex(query)?;
187    let mut replacements = Vec::new();
188    for caps in re.captures_iter(text) {
189        let m = caps
190            .get(0)
191            .expect("regex captures must include the full match");
192        let mut expanded = String::new();
193        caps.expand(replacement, &mut expanded);
194        let range =
195            TextRange::new(m.start(), m.end()).expect("match offsets must satisfy start <= end");
196        replacements.push((range, expanded));
197    }
198    Ok(replacements)
199}
200
201/// Replace the first occurrence of `query` at or after `from_offset`.
202///
203/// Returns the new full text and the `SearchMatch` describing the original
204/// match position, or `None` if no match was found.
205pub fn replace_next(
206    text: &str,
207    line_index: &LineIndex,
208    query: &SearchQuery,
209    replacement: &str,
210    from_offset: usize,
211) -> Result<Option<(String, SearchMatch)>, SearchError> {
212    let re = build_regex(query)?;
213    if from_offset > text.len() {
214        return Ok(None);
215    }
216    // Use find_at to preserve word boundary semantics at mid-word offsets.
217    match re.captures_at(text, from_offset) {
218        Some(caps) => {
219            let m = caps
220                .get(0)
221                .expect("regex captures must include the full match");
222            let sm = match_from_offsets(text, line_index, m.start(), m.end())?;
223            let mut expanded = String::new();
224            caps.expand(replacement, &mut expanded);
225
226            let mut new_text = String::with_capacity(text.len());
227            new_text.push_str(&text[..m.start()]);
228            new_text.push_str(&expanded);
229            new_text.push_str(&text[m.end()..]);
230
231            Ok(Some((new_text, sm)))
232        }
233        None => Ok(None),
234    }
235}
236
237// ---------------------------------------------------------------------------
238// Tests
239// ---------------------------------------------------------------------------
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    // Convenience: create a query for plain-text, case-sensitive search.
246    fn plain(pattern: &str) -> SearchQuery {
247        SearchQuery {
248            pattern: pattern.to_string(),
249            is_regex: false,
250            case_sensitive: true,
251            whole_word: false,
252        }
253    }
254
255    // -----------------------------------------------------------------------
256    // build_regex
257    // -----------------------------------------------------------------------
258
259    #[test]
260    fn build_regex_invalid_returns_error() {
261        let q = SearchQuery {
262            pattern: "[invalid".to_string(),
263            is_regex: true,
264            case_sensitive: true,
265            whole_word: false,
266        };
267        let err = build_regex(&q).unwrap_err();
268        assert!(matches!(err, SearchError::InvalidRegex(_)));
269    }
270
271    #[test]
272    fn build_regex_plain_escapes_special_chars() {
273        let q = SearchQuery {
274            pattern: "a.b".to_string(),
275            is_regex: false,
276            case_sensitive: true,
277            whole_word: false,
278        };
279        let re = build_regex(&q).expect("should compile");
280        assert!(re.is_match("a.b"));
281        assert!(!re.is_match("axb"));
282    }
283
284    #[test]
285    fn build_regex_case_insensitive() {
286        let q = SearchQuery {
287            pattern: "hello".to_string(),
288            is_regex: false,
289            case_sensitive: false,
290            whole_word: false,
291        };
292        let re = build_regex(&q).expect("should compile");
293        assert!(re.is_match("HELLO"));
294        assert!(re.is_match("Hello"));
295    }
296
297    #[test]
298    fn build_regex_whole_word() {
299        let q = SearchQuery {
300            pattern: "foo".to_string(),
301            is_regex: false,
302            case_sensitive: true,
303            whole_word: true,
304        };
305        let re = build_regex(&q).expect("should compile");
306        assert!(re.is_match("foo"));
307        assert!(!re.is_match("foobar"));
308        assert!(!re.is_match("barfoo"));
309    }
310
311    // -----------------------------------------------------------------------
312    // find_all
313    // -----------------------------------------------------------------------
314
315    #[test]
316    fn find_all_basic_match() {
317        let text = "hello world";
318        let li = LineIndex::new(text);
319        let matches = find_all(text, &li, &plain("world")).unwrap();
320        assert_eq!(matches.len(), 1);
321        assert_eq!(matches[0].range().start(), 6);
322        assert_eq!(matches[0].range().end(), 11);
323        assert_eq!(matches[0].line(), 0);
324        assert_eq!(matches[0].column(), 6);
325    }
326
327    #[test]
328    fn find_all_multiple_matches() {
329        let text = "abcabc";
330        let li = LineIndex::new(text);
331        let matches = find_all(text, &li, &plain("abc")).unwrap();
332        assert_eq!(matches.len(), 2);
333        assert_eq!(matches[0].range().start(), 0);
334        assert_eq!(matches[1].range().start(), 3);
335    }
336
337    #[test]
338    fn find_all_no_match() {
339        let text = "hello";
340        let li = LineIndex::new(text);
341        let matches = find_all(text, &li, &plain("xyz")).unwrap();
342        assert!(matches.is_empty());
343    }
344
345    #[test]
346    fn find_all_case_insensitive() {
347        let text = "Hello HELLO hello";
348        let li = LineIndex::new(text);
349        let q = SearchQuery {
350            pattern: "hello".to_string(),
351            is_regex: false,
352            case_sensitive: false,
353            whole_word: false,
354        };
355        let matches = find_all(text, &li, &q).unwrap();
356        assert_eq!(matches.len(), 3);
357    }
358
359    #[test]
360    fn find_all_whole_word() {
361        let text = "foo foobar barfoo foo";
362        let li = LineIndex::new(text);
363        let q = SearchQuery {
364            pattern: "foo".to_string(),
365            is_regex: false,
366            case_sensitive: true,
367            whole_word: true,
368        };
369        let matches = find_all(text, &li, &q).unwrap();
370        assert_eq!(matches.len(), 2);
371        assert_eq!(matches[0].range().start(), 0);
372        assert_eq!(matches[1].range().start(), 18);
373    }
374
375    #[test]
376    fn find_all_regex_with_groups() {
377        let text = "2024-01-15 and 2025-12-31";
378        let li = LineIndex::new(text);
379        let q = SearchQuery {
380            pattern: r"\d{4}-\d{2}-\d{2}".to_string(),
381            is_regex: true,
382            case_sensitive: true,
383            whole_word: false,
384        };
385        let matches = find_all(text, &li, &q).unwrap();
386        assert_eq!(matches.len(), 2);
387        assert_eq!(matches[0].range().start(), 0);
388        assert_eq!(matches[0].range().end(), 10);
389        assert_eq!(matches[1].range().start(), 15);
390    }
391
392    #[test]
393    fn find_all_multiline() {
394        let text = "line1\nfoo\nline3\nfoo";
395        let li = LineIndex::new(text);
396        let matches = find_all(text, &li, &plain("foo")).unwrap();
397        assert_eq!(matches.len(), 2);
398        assert_eq!(matches[0].line(), 1);
399        assert_eq!(matches[0].column(), 0);
400        assert_eq!(matches[1].line(), 3);
401        assert_eq!(matches[1].column(), 0);
402    }
403
404    #[test]
405    fn find_all_empty_text() {
406        let text = "";
407        let li = LineIndex::new(text);
408        let matches = find_all(text, &li, &plain("x")).unwrap();
409        assert!(matches.is_empty());
410    }
411
412    #[test]
413    fn find_all_empty_pattern() {
414        let text = "abc";
415        let li = LineIndex::new(text);
416        // Empty pattern matches at every position.
417        let matches = find_all(text, &li, &plain("")).unwrap();
418        assert_eq!(matches.len(), 4); // positions 0,1,2,3
419    }
420
421    // -----------------------------------------------------------------------
422    // find_next
423    // -----------------------------------------------------------------------
424
425    #[test]
426    fn find_next_from_zero() {
427        let text = "abc def abc";
428        let li = LineIndex::new(text);
429        let m = find_next(text, &li, &plain("abc"), 0).unwrap().unwrap();
430        assert_eq!(m.range().start(), 0);
431    }
432
433    #[test]
434    fn find_next_from_middle() {
435        let text = "abc def abc";
436        let li = LineIndex::new(text);
437        let m = find_next(text, &li, &plain("abc"), 1).unwrap().unwrap();
438        assert_eq!(m.range().start(), 8);
439    }
440
441    #[test]
442    fn find_next_past_last_match() {
443        let text = "abc def abc";
444        let li = LineIndex::new(text);
445        let m = find_next(text, &li, &plain("abc"), 9).unwrap();
446        assert!(m.is_none());
447    }
448
449    #[test]
450    fn find_next_from_beyond_text() {
451        let text = "abc";
452        let li = LineIndex::new(text);
453        let m = find_next(text, &li, &plain("abc"), 100).unwrap();
454        assert!(m.is_none());
455    }
456
457    #[test]
458    fn find_next_whole_word_mid_word_offset() {
459        // "foobar" contains "bar" but starting at offset 3 (mid-word) should
460        // NOT match with whole_word because "bar" is not a standalone word here.
461        let text = "foobar baz bar";
462        let li = LineIndex::new(text);
463        let q = SearchQuery {
464            pattern: "bar".to_string(),
465            is_regex: false,
466            case_sensitive: true,
467            whole_word: true,
468        };
469        let m = find_next(text, &li, &q, 3).unwrap().unwrap();
470        // Should skip "bar" inside "foobar" and find the standalone "bar" at offset 11.
471        assert_eq!(m.range().start(), 11);
472    }
473
474    #[test]
475    fn find_previous_returns_last_match_ending_at_offset() {
476        let text = "abc abc abc";
477        let li = LineIndex::new(text);
478
479        let m = find_previous(text, &li, &plain("abc"), 7).unwrap().unwrap();
480
481        assert_eq!(m.range().start(), 4);
482        assert_eq!(m.range().end(), 7);
483    }
484
485    #[test]
486    fn find_previous_returns_none_before_first_match() {
487        let text = "abc abc";
488        let li = LineIndex::new(text);
489
490        let result = find_previous(text, &li, &plain("abc"), 2).unwrap();
491
492        assert!(result.is_none());
493    }
494
495    // -----------------------------------------------------------------------
496    // replace_all
497    // -----------------------------------------------------------------------
498
499    #[test]
500    fn replace_all_basic() {
501        let text = "hello world";
502        let (new_text, count) = replace_all(text, &plain("world"), "rust").unwrap();
503        assert_eq!(new_text, "hello rust");
504        assert_eq!(count, 1);
505    }
506
507    #[test]
508    fn replace_all_multiple() {
509        let text = "aaa";
510        let (new_text, count) = replace_all(text, &plain("a"), "bb").unwrap();
511        assert_eq!(new_text, "bbbbbb");
512        assert_eq!(count, 3);
513    }
514
515    #[test]
516    fn replace_all_no_match() {
517        let text = "hello";
518        let (new_text, count) = replace_all(text, &plain("xyz"), "abc").unwrap();
519        assert_eq!(new_text, "hello");
520        assert_eq!(count, 0);
521    }
522
523    #[test]
524    fn replace_all_empty_text() {
525        let text = "";
526        let (new_text, count) = replace_all(text, &plain("x"), "y").unwrap();
527        assert_eq!(new_text, "");
528        assert_eq!(count, 0);
529    }
530
531    #[test]
532    fn replace_all_regex_backreference() {
533        let text = "foo123bar456";
534        let q = SearchQuery {
535            pattern: r"(\d+)".to_string(),
536            is_regex: true,
537            case_sensitive: true,
538            whole_word: false,
539        };
540        let (new_text, count) = replace_all(text, &q, "[$1]").unwrap();
541        assert_eq!(new_text, "foo[123]bar[456]");
542        assert_eq!(count, 2);
543    }
544
545    #[test]
546    fn replace_all_ranges_expands_regex_backreference() {
547        let text = "foo123bar456";
548        let q = SearchQuery {
549            pattern: r"(\d+)".to_string(),
550            is_regex: true,
551            case_sensitive: true,
552            whole_word: false,
553        };
554
555        let replacements = replace_all_ranges(text, &q, "[$1]").unwrap();
556
557        assert_eq!(replacements.len(), 2);
558        assert_eq!(replacements[0].0.start(), 3);
559        assert_eq!(replacements[0].0.end(), 6);
560        assert_eq!(replacements[0].1, "[123]");
561        assert_eq!(replacements[1].1, "[456]");
562    }
563
564    // -----------------------------------------------------------------------
565    // replace_next
566    // -----------------------------------------------------------------------
567
568    #[test]
569    fn replace_next_basic() {
570        let text = "abc def abc";
571        let li = LineIndex::new(text);
572        let (new_text, m) = replace_next(text, &li, &plain("abc"), "XYZ", 0)
573            .unwrap()
574            .unwrap();
575        assert_eq!(new_text, "XYZ def abc");
576        assert_eq!(m.range().start(), 0);
577        assert_eq!(m.range().end(), 3);
578    }
579
580    #[test]
581    fn replace_next_from_offset() {
582        let text = "abc def abc";
583        let li = LineIndex::new(text);
584        let (new_text, m) = replace_next(text, &li, &plain("abc"), "XYZ", 1)
585            .unwrap()
586            .unwrap();
587        assert_eq!(new_text, "abc def XYZ");
588        assert_eq!(m.range().start(), 8);
589    }
590
591    #[test]
592    fn replace_next_regex_backreference() {
593        let text = "foo123bar456";
594        let li = LineIndex::new(text);
595        let q = SearchQuery {
596            pattern: r"(\d+)".to_string(),
597            is_regex: true,
598            case_sensitive: true,
599            whole_word: false,
600        };
601
602        let (new_text, m) = replace_next(text, &li, &q, "[$1]", 0).unwrap().unwrap();
603
604        assert_eq!(new_text, "foo[123]bar456");
605        assert_eq!(m.range().start(), 3);
606    }
607
608    #[test]
609    fn replace_next_no_match() {
610        let text = "hello";
611        let li = LineIndex::new(text);
612        let result = replace_next(text, &li, &plain("xyz"), "abc", 0).unwrap();
613        assert!(result.is_none());
614    }
615
616    #[test]
617    fn replace_next_from_beyond_text() {
618        let text = "abc";
619        let li = LineIndex::new(text);
620        let result = replace_next(text, &li, &plain("abc"), "x", 100).unwrap();
621        assert!(result.is_none());
622    }
623
624    // -----------------------------------------------------------------------
625    // SearchError Display
626    // -----------------------------------------------------------------------
627
628    #[test]
629    fn search_error_display() {
630        let err = SearchError::InvalidRegex("bad pattern".to_string());
631        let s = err.to_string();
632        assert!(s.contains("bad pattern"));
633    }
634
635    // -----------------------------------------------------------------------
636    // Edge: invalid regex in public API
637    // -----------------------------------------------------------------------
638
639    #[test]
640    fn find_all_invalid_regex() {
641        let text = "hello";
642        let li = LineIndex::new(text);
643        let q = SearchQuery {
644            pattern: "[".to_string(),
645            is_regex: true,
646            case_sensitive: true,
647            whole_word: false,
648        };
649        let err = find_all(text, &li, &q).unwrap_err();
650        assert!(matches!(err, SearchError::InvalidRegex(_)));
651    }
652
653    #[test]
654    fn find_next_invalid_regex() {
655        let text = "hello";
656        let li = LineIndex::new(text);
657        let q = SearchQuery {
658            pattern: "[".to_string(),
659            is_regex: true,
660            case_sensitive: true,
661            whole_word: false,
662        };
663        let err = find_next(text, &li, &q, 0).unwrap_err();
664        assert!(matches!(err, SearchError::InvalidRegex(_)));
665    }
666
667    #[test]
668    fn replace_all_invalid_regex() {
669        let q = SearchQuery {
670            pattern: "[".to_string(),
671            is_regex: true,
672            case_sensitive: true,
673            whole_word: false,
674        };
675        let err = replace_all("x", &q, "y").unwrap_err();
676        assert!(matches!(err, SearchError::InvalidRegex(_)));
677    }
678
679    #[test]
680    fn replace_next_invalid_regex() {
681        let text = "hello";
682        let li = LineIndex::new(text);
683        let q = SearchQuery {
684            pattern: "[".to_string(),
685            is_regex: true,
686            case_sensitive: true,
687            whole_word: false,
688        };
689        let err = replace_next(text, &li, &q, "y", 0).unwrap_err();
690        assert!(matches!(err, SearchError::InvalidRegex(_)));
691    }
692}