Skip to main content

editor_core/
search.rs

1//! Text search helpers.
2//!
3//! This module provides simple search APIs over a UTF-8 `&str`, using **character offsets**
4//! (not byte offsets) for all public inputs/outputs. It supports:
5//!
6//! - plain substring search (escaped and compiled into a regex)
7//! - regex search
8//! - optional whole-word matching
9
10use regex::{Regex, RegexBuilder};
11
12/// Options that control how search is performed.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub struct SearchOptions {
15    /// If `true`, performs a case-sensitive search.
16    pub case_sensitive: bool,
17    /// If `true`, matches only whole words (ASCII-alphanumeric and `_`).
18    pub whole_word: bool,
19    /// If `true`, treats the query as a regex pattern.
20    pub regex: bool,
21}
22
23impl Default for SearchOptions {
24    fn default() -> Self {
25        Self {
26            case_sensitive: true,
27            whole_word: false,
28            regex: false,
29        }
30    }
31}
32
33/// A match returned by the search APIs, expressed as a half-open character range.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub struct SearchMatch {
36    /// Inclusive start character offset.
37    pub start: usize,
38    /// Exclusive end character offset.
39    pub end: usize,
40}
41
42impl SearchMatch {
43    /// Returns the length of the match in characters.
44    pub fn len(&self) -> usize {
45        self.end.saturating_sub(self.start)
46    }
47
48    /// Returns `true` if the match is empty.
49    pub fn is_empty(&self) -> bool {
50        self.start >= self.end
51    }
52}
53
54/// Search errors.
55#[derive(Debug)]
56pub enum SearchError {
57    /// The provided regex pattern failed to compile.
58    InvalidRegex(regex::Error),
59}
60
61impl std::fmt::Display for SearchError {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        match self {
64            Self::InvalidRegex(err) => write!(f, "Invalid regex: {}", err),
65        }
66    }
67}
68
69impl std::error::Error for SearchError {}
70
71#[derive(Debug)]
72pub(crate) struct CharIndex {
73    char_to_byte: Vec<usize>,
74    text_len: usize,
75}
76
77impl CharIndex {
78    pub(crate) fn new(text: &str) -> Self {
79        let mut char_to_byte: Vec<usize> = text.char_indices().map(|(b, _)| b).collect();
80        char_to_byte.push(text.len());
81        Self {
82            char_to_byte,
83            text_len: text.len(),
84        }
85    }
86
87    pub(crate) fn char_count(&self) -> usize {
88        self.char_to_byte.len().saturating_sub(1)
89    }
90
91    pub(crate) fn char_to_byte(&self, char_offset: usize) -> usize {
92        let clamped = char_offset.min(self.char_count());
93        self.char_to_byte
94            .get(clamped)
95            .cloned()
96            .unwrap_or(self.text_len)
97    }
98
99    pub(crate) fn byte_to_char(&self, byte_offset: usize) -> usize {
100        let clamped = byte_offset.min(self.text_len);
101        match self.char_to_byte.binary_search(&clamped) {
102            Ok(idx) => idx,
103            Err(idx) => idx,
104        }
105    }
106
107    pub(crate) fn char_at(&self, text: &str, char_offset: usize) -> Option<char> {
108        if char_offset >= self.char_count() {
109            return None;
110        }
111        let start = self.char_to_byte[char_offset];
112        let end = self.char_to_byte[char_offset + 1];
113        text.get(start..end)?.chars().next()
114    }
115}
116
117fn compile_search_regex(query: &str, options: SearchOptions) -> Result<Regex, SearchError> {
118    let pattern = if options.regex {
119        query.to_string()
120    } else {
121        regex::escape(query)
122    };
123
124    RegexBuilder::new(&pattern)
125        .case_insensitive(!options.case_sensitive)
126        .multi_line(true)
127        .build()
128        .map_err(SearchError::InvalidRegex)
129}
130
131fn is_word_char(ch: char) -> bool {
132    ch == '_' || ch.is_alphanumeric()
133}
134
135fn is_whole_word(text: &str, index: &CharIndex, m: SearchMatch) -> bool {
136    if m.is_empty() {
137        return false;
138    }
139
140    let before = if m.start == 0 {
141        None
142    } else {
143        index.char_at(text, m.start.saturating_sub(1))
144    };
145    let after = index.char_at(text, m.end);
146
147    !before.is_some_and(is_word_char) && !after.is_some_and(is_word_char)
148}
149
150/// Find the next occurrence of `query` in `text`, searching forward from `from_char`.
151///
152/// - Returns `Ok(None)` if no match is found (or if `query` is empty).
153/// - Match ranges are character offsets and are half-open (`[start, end)`).
154pub fn find_next(
155    text: &str,
156    query: &str,
157    options: SearchOptions,
158    from_char: usize,
159) -> Result<Option<SearchMatch>, SearchError> {
160    if query.is_empty() {
161        return Ok(None);
162    }
163
164    let re = compile_search_regex(query, options)?;
165    let index = CharIndex::new(text);
166
167    let mut start_char = from_char.min(index.char_count());
168    loop {
169        let start_byte = index.char_to_byte(start_char);
170        let Some(m) = re.find_at(text, start_byte) else {
171            return Ok(None);
172        };
173
174        let start = index.byte_to_char(m.start());
175        let end = index.byte_to_char(m.end());
176        let candidate = SearchMatch { start, end };
177
178        if candidate.is_empty() {
179            if end >= index.char_count() {
180                return Ok(None);
181            }
182            start_char = end + 1;
183            continue;
184        }
185
186        if options.whole_word && !is_whole_word(text, &index, candidate) {
187            start_char = candidate.end;
188            continue;
189        }
190
191        return Ok(Some(candidate));
192    }
193}
194
195/// Find the previous occurrence of `query` in `text`, searching backward from `from_char`.
196///
197/// - Returns `Ok(None)` if no match is found (or if `query` is empty).
198/// - Match ranges are character offsets and are half-open (`[start, end)`).
199pub fn find_prev(
200    text: &str,
201    query: &str,
202    options: SearchOptions,
203    from_char: usize,
204) -> Result<Option<SearchMatch>, SearchError> {
205    if query.is_empty() {
206        return Ok(None);
207    }
208
209    let re = compile_search_regex(query, options)?;
210    let index = CharIndex::new(text);
211
212    let limit_char = from_char.min(index.char_count());
213    let limit_byte = index.char_to_byte(limit_char);
214
215    let mut last: Option<SearchMatch> = None;
216    for m in re.find_iter(&text[..limit_byte]) {
217        let start = index.byte_to_char(m.start());
218        let end = index.byte_to_char(m.end());
219        let candidate = SearchMatch { start, end };
220
221        if candidate.is_empty() {
222            continue;
223        }
224        if options.whole_word && !is_whole_word(text, &index, candidate) {
225            continue;
226        }
227
228        last = Some(candidate);
229    }
230
231    Ok(last)
232}
233
234/// Find all occurrences of `query` in `text`.
235///
236/// - Returns an empty list if `query` is empty.
237/// - Match ranges are character offsets and are half-open (`[start, end)`).
238pub fn find_all(
239    text: &str,
240    query: &str,
241    options: SearchOptions,
242) -> Result<Vec<SearchMatch>, SearchError> {
243    if query.is_empty() {
244        return Ok(Vec::new());
245    }
246
247    let re = compile_search_regex(query, options)?;
248    let index = CharIndex::new(text);
249
250    let mut matches: Vec<SearchMatch> = Vec::new();
251    for m in re.find_iter(text) {
252        let start = index.byte_to_char(m.start());
253        let end = index.byte_to_char(m.end());
254        let candidate = SearchMatch { start, end };
255
256        if candidate.is_empty() {
257            continue;
258        }
259        if options.whole_word && !is_whole_word(text, &index, candidate) {
260            continue;
261        }
262
263        matches.push(candidate);
264    }
265
266    Ok(matches)
267}
268
269/// Returns `true` if `range` exactly matches an occurrence of `query` in `text`.
270///
271/// This is useful for checking whether a current selection/caret range corresponds to the
272/// "current match" when implementing find/replace flows.
273pub fn is_match_exact(
274    text: &str,
275    query: &str,
276    options: SearchOptions,
277    range: SearchMatch,
278) -> Result<bool, SearchError> {
279    if range.is_empty() {
280        return Ok(false);
281    }
282
283    let Some(next) = find_next(text, query, options, range.start)? else {
284        return Ok(false);
285    };
286
287    Ok(next.start == range.start && next.end == range.end)
288}