xi_core_lib/
find.rs

1// Copyright 2018 The xi-editor Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Module for searching text.
16
17use std::cmp::{max, min};
18use std::iter;
19
20use crate::annotations::{AnnotationRange, AnnotationSlice, AnnotationType, ToAnnotation};
21use crate::selection::{InsertDrift, SelRegion, Selection};
22use crate::view::View;
23use crate::word_boundaries::WordCursor;
24use regex::{Regex, RegexBuilder};
25use xi_rope::delta::DeltaRegion;
26use xi_rope::find::{find, is_multiline_regex, CaseMatching};
27use xi_rope::{Cursor, Interval, LinesMetric, Metric, Rope, RopeDelta};
28
29const REGEX_SIZE_LIMIT: usize = 1000000;
30
31/// Information about search queries and number of matches for find
32#[derive(Serialize, Deserialize, Debug)]
33pub struct FindStatus {
34    /// Identifier for the current search query.
35    id: usize,
36
37    /// The current search query.
38    chars: Option<String>,
39
40    /// Whether the active search is case matching.
41    case_sensitive: Option<bool>,
42
43    /// Whether the search query is considered as regular expression.
44    is_regex: Option<bool>,
45
46    /// Query only matches whole words.
47    whole_words: Option<bool>,
48
49    /// Total number of matches.
50    matches: usize,
51
52    /// Line numbers which have find results.
53    lines: Vec<usize>,
54}
55
56/// Contains logic to search text
57pub struct Find {
58    /// Uniquely identifies this search query.
59    id: usize,
60
61    /// The occurrences, which determine the highlights, have been updated.
62    hls_dirty: bool,
63
64    /// The currently active search string.
65    search_string: Option<String>,
66
67    /// The case matching setting for the currently active search.
68    case_matching: CaseMatching,
69
70    /// The search query should be considered as regular expression.
71    regex: Option<Regex>,
72
73    /// Query matches only whole words.
74    whole_words: bool,
75
76    /// The set of all known find occurrences (highlights).
77    occurrences: Selection,
78}
79
80impl Find {
81    pub fn new(id: usize) -> Find {
82        Find {
83            id,
84            hls_dirty: true,
85            search_string: None,
86            case_matching: CaseMatching::CaseInsensitive,
87            regex: None,
88            whole_words: false,
89            occurrences: Selection::new(),
90        }
91    }
92
93    pub fn id(&self) -> usize {
94        self.id
95    }
96
97    pub fn occurrences(&self) -> &Selection {
98        &self.occurrences
99    }
100
101    pub fn hls_dirty(&self) -> bool {
102        self.hls_dirty
103    }
104
105    pub fn find_status(&self, view: &View, text: &Rope, matches_only: bool) -> FindStatus {
106        if matches_only {
107            FindStatus {
108                id: self.id,
109                chars: None,
110                case_sensitive: None,
111                is_regex: None,
112                whole_words: None,
113                matches: self.occurrences.len(),
114                lines: Vec::new(),
115            }
116        } else {
117            FindStatus {
118                id: self.id,
119                chars: self.search_string.clone(),
120                case_sensitive: Some(self.case_matching == CaseMatching::Exact),
121                is_regex: Some(self.regex.is_some()),
122                whole_words: Some(self.whole_words),
123                matches: self.occurrences.len(),
124                lines: self
125                    .occurrences
126                    .iter()
127                    .map(|o| view.offset_to_line_col(text, o.min()).0 + 1)
128                    .collect(),
129            }
130        }
131    }
132
133    pub fn set_hls_dirty(&mut self, is_dirty: bool) {
134        self.hls_dirty = is_dirty
135    }
136
137    pub fn update_highlights(&mut self, text: &Rope, delta: &RopeDelta) {
138        // update search highlights for changed regions
139        if self.search_string.is_some() {
140            // invalidate occurrences around deletion positions
141            for DeltaRegion { old_offset, len, .. } in delta.iter_deletions() {
142                self.occurrences.delete_range(old_offset, old_offset + len, false);
143            }
144
145            self.occurrences = self.occurrences.apply_delta(delta, false, InsertDrift::Default);
146
147            // invalidate occurrences around insert positions
148            for DeltaRegion { new_offset, len, .. } in delta.iter_inserts() {
149                // also invalidate previous occurrence since it might expand after insertion
150                // eg. for regex .* every insertion after match will be part of match
151                self.occurrences.delete_range(
152                    new_offset.checked_sub(1).unwrap_or(0),
153                    new_offset + len,
154                    false,
155                );
156            }
157
158            // update find for the whole delta and everything after
159            let (iv, new_len) = delta.summary();
160
161            // get last valid occurrence that was unaffected by the delta
162            let start = match self.occurrences.regions_in_range(0, iv.start()).last() {
163                Some(reg) => reg.end,
164                None => 0,
165            };
166
167            // invalidate all search results from the point of the last valid search result until ...
168            let is_multiline = LinesMetric::next(self.search_string.as_ref().unwrap(), 0).is_some();
169
170            if is_multiline || self.is_multiline_regex() {
171                // ... the end of the file
172                self.occurrences.delete_range(iv.start(), text.len(), false);
173                self.update_find(text, start, text.len(), false);
174            } else {
175                // ... the end of the line including line break
176                let mut cursor = Cursor::new(&text, iv.end() + new_len);
177
178                let end_of_line = match cursor.next::<LinesMetric>() {
179                    Some(end) => end,
180                    None if cursor.pos() == text.len() => cursor.pos(),
181                    _ => return,
182                };
183
184                self.occurrences.delete_range(iv.start(), end_of_line, false);
185                self.update_find(text, start, end_of_line, false);
186            }
187        }
188    }
189
190    /// Returns `true` if the search query is a multi-line regex.
191    pub(crate) fn is_multiline_regex(&self) -> bool {
192        self.regex.is_some() && is_multiline_regex(self.search_string.as_ref().unwrap())
193    }
194
195    /// Unsets the search and removes all highlights from the view.
196    pub fn unset(&mut self) {
197        self.search_string = None;
198        self.occurrences = Selection::new();
199        self.hls_dirty = true;
200    }
201
202    /// Sets find parameters and search query. Returns `true` if parameters have been updated.
203    /// Returns `false` to indicate that parameters haven't change.
204    pub(crate) fn set_find(
205        &mut self,
206        search_string: &str,
207        case_sensitive: bool,
208        is_regex: bool,
209        whole_words: bool,
210    ) -> bool {
211        if search_string.is_empty() {
212            self.unset();
213        }
214
215        let case_matching =
216            if case_sensitive { CaseMatching::Exact } else { CaseMatching::CaseInsensitive };
217
218        if let Some(ref s) = self.search_string {
219            if s == search_string
220                && case_matching == self.case_matching
221                && self.regex.is_some() == is_regex
222                && self.whole_words == whole_words
223            {
224                // search parameters did not change
225                return false;
226            }
227        }
228
229        self.unset();
230
231        self.search_string = Some(search_string.to_string());
232        self.case_matching = case_matching;
233        self.whole_words = whole_words;
234
235        // create regex from untrusted input
236        self.regex = match is_regex {
237            false => None,
238            true => RegexBuilder::new(search_string)
239                .size_limit(REGEX_SIZE_LIMIT)
240                .case_insensitive(case_matching == CaseMatching::CaseInsensitive)
241                .build()
242                .ok(),
243        };
244
245        true
246    }
247
248    /// Execute the search on the provided text in the range provided by `start` and `end`.
249    pub fn update_find(&mut self, text: &Rope, start: usize, end: usize, include_slop: bool) {
250        if self.search_string.is_none() {
251            return;
252        }
253
254        // extend the search by twice the string length (twice, because case matching may increase
255        // the length of an occurrence)
256        let slop = if include_slop { self.search_string.as_ref().unwrap().len() * 2 } else { 0 };
257
258        let search_string = self.search_string.as_ref().unwrap();
259
260        // expand region to be able to find occurrences around the region's edges
261        let expanded_start = max(start, slop) - slop;
262        let expanded_end = min(end + slop, text.len());
263        let from = text.at_or_prev_codepoint_boundary(expanded_start).unwrap_or(0);
264        let to = text.at_or_next_codepoint_boundary(expanded_end).unwrap_or(text.len());
265        let mut to_cursor = Cursor::new(&text, to);
266        let _ = to_cursor.next_leaf();
267
268        let sub_text = text.subseq(Interval::new(0, to_cursor.pos()));
269        let mut find_cursor = Cursor::new(&sub_text, from);
270
271        let mut raw_lines = text.lines_raw(from..to);
272
273        while let Some(start) = find(
274            &mut find_cursor,
275            &mut raw_lines,
276            self.case_matching,
277            &search_string,
278            self.regex.as_ref(),
279        ) {
280            let end = find_cursor.pos();
281
282            if self.whole_words && !self.is_matching_whole_words(text, start, end) {
283                raw_lines = text.lines_raw(find_cursor.pos()..to);
284                continue;
285            }
286
287            let region = SelRegion::new(start, end);
288            let (_, e) = self.occurrences.add_range_distinct(region);
289            // in case of ambiguous search results (e.g. search "aba" in "ababa"),
290            // the search result closer to the beginning of the file wins
291            if e != end {
292                // Skip the search result and keep the occurrence that is closer to
293                // the beginning of the file. Re-align the cursor to the kept
294                // occurrence
295                find_cursor.set(e);
296                raw_lines = text.lines_raw(find_cursor.pos()..to);
297                continue;
298            }
299
300            // in case current cursor matches search result (for example query a* matches)
301            // all cursor positions, then cursor needs to be increased so that search
302            // continues at next position. Otherwise, search will result in overflow since
303            // search will always repeat at current cursor position.
304            if start == end {
305                // determine whether end of text is reached and stop search or increase
306                // cursor manually
307                if end + 1 >= text.len() {
308                    break;
309                } else {
310                    find_cursor.set(end + 1);
311                }
312            }
313
314            // update line iterator so that line starts at current cursor position
315            raw_lines = text.lines_raw(find_cursor.pos()..to);
316        }
317
318        self.hls_dirty = true;
319    }
320
321    /// Return the occurrence closest to the provided selection `sel`. If searched is reversed then
322    /// the occurrence closest to the start of the selection is returned. `wrapped` indicates that
323    /// if the end of the text is reached the search continues from the start.
324    pub fn next_occurrence(
325        &self,
326        text: &Rope,
327        reverse: bool,
328        wrapped: bool,
329        sel: &Selection,
330    ) -> Option<SelRegion> {
331        if self.occurrences.len() == 0 {
332            return None;
333        }
334
335        let (sel_start, sel_end) = match sel.last() {
336            Some(last) if last.is_caret() =>
337            // if last selection is caret then allow the current position to be part of the occurrence
338            {
339                (last.min(), last.max())
340            }
341            Some(last) if !last.is_caret() =>
342            // if the last selection is not a caret then continue searching after the caret
343            {
344                (last.min(), last.max() + 1)
345            }
346            _ => (0, 0),
347        };
348
349        if reverse {
350            let next_occurrence = match sel_start.checked_sub(1) {
351                Some(search_end) => self.occurrences.regions_in_range(0, search_end).last(),
352                None => None,
353            };
354
355            if next_occurrence.is_none() && !wrapped {
356                // get previous unselected occurrence
357                return self
358                    .occurrences
359                    .regions_in_range(0, text.len())
360                    .iter()
361                    .cloned()
362                    .filter(|o| sel.regions_in_range(o.min(), o.max()).is_empty())
363                    .collect::<Vec<SelRegion>>()
364                    .last()
365                    .cloned();
366            }
367
368            next_occurrence.cloned()
369        } else {
370            let next_occurrence = self.occurrences.regions_in_range(sel_end, text.len()).first();
371
372            if next_occurrence.is_none() && !wrapped {
373                // get next unselected occurrence
374                return self
375                    .occurrences
376                    .regions_in_range(0, text.len())
377                    .iter()
378                    .cloned()
379                    .filter(|o| sel.regions_in_range(o.min(), o.max()).is_empty())
380                    .collect::<Vec<SelRegion>>()
381                    .first()
382                    .cloned();
383            }
384
385            next_occurrence.cloned()
386        }
387    }
388
389    /// Checks if the start and end of a match is matching whole words.
390    fn is_matching_whole_words(&self, text: &Rope, start: usize, end: usize) -> bool {
391        let mut word_end_cursor = WordCursor::new(text, end - 1);
392        let mut word_start_cursor = WordCursor::new(text, start + 1);
393
394        if let Some(start_boundary) = word_start_cursor.prev_boundary() {
395            if start_boundary != start {
396                return false;
397            }
398        }
399
400        if let Some(end_boundary) = word_end_cursor.next_boundary() {
401            if end_boundary != end {
402                return false;
403            }
404        }
405
406        true
407    }
408}
409
410/// Implementing the `ToAnnotation` trait allows to convert finds to annotations.
411impl ToAnnotation for Find {
412    fn get_annotations(&self, interval: Interval, view: &View, text: &Rope) -> AnnotationSlice {
413        let regions = self.occurrences.regions_in_range(interval.start(), interval.end());
414        let ranges = regions
415            .iter()
416            .map(|region| {
417                let (start_line, start_col) = view.offset_to_line_col(text, region.min());
418                let (end_line, end_col) = view.offset_to_line_col(text, region.max());
419
420                AnnotationRange { start_line, start_col, end_line, end_col }
421            })
422            .collect::<Vec<AnnotationRange>>();
423
424        let payload = iter::repeat(json!({"id": self.id})).take(ranges.len()).collect::<Vec<_>>();
425
426        AnnotationSlice::new(AnnotationType::Find, ranges, Some(payload))
427    }
428}
429
430#[cfg(test)]
431mod tests {
432    use super::*;
433    use xi_rope::DeltaBuilder;
434
435    #[test]
436    fn find() {
437        let base_text = Rope::from("hello world");
438        let mut find = Find::new(1);
439        find.set_find("world", false, false, false);
440        find.update_find(&base_text, 0, base_text.len(), false);
441        assert_eq!(find.occurrences().len(), 1);
442        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(6, 11)));
443    }
444
445    #[test]
446    fn find_whole_words() {
447        let base_text = Rope::from("hello world\n many worlds");
448        let mut find = Find::new(1);
449        find.set_find("world", false, false, true);
450        find.update_find(&base_text, 0, base_text.len(), false);
451        assert_eq!(find.occurrences().len(), 1);
452        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(6, 11)));
453    }
454
455    #[test]
456    fn find_case_sensitive() {
457        let base_text = Rope::from("hello world\n HELLO WORLD");
458        let mut find = Find::new(1);
459        find.set_find("world", true, false, false);
460        find.update_find(&base_text, 0, base_text.len(), false);
461        assert_eq!(find.occurrences().len(), 1);
462        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(6, 11)));
463    }
464
465    #[test]
466    fn find_multiline() {
467        let base_text = Rope::from("hello world\n HELLO WORLD");
468        let mut find = Find::new(1);
469        find.set_find("hello world\n HELLO", true, false, false);
470        find.update_find(&base_text, 0, base_text.len(), false);
471        assert_eq!(find.occurrences().len(), 1);
472        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(0, 18)));
473    }
474
475    #[test]
476    fn find_regex() {
477        let base_text = Rope::from("hello world\n HELLO WORLD");
478        let mut find = Find::new(1);
479        find.set_find("hello \\w+", false, true, false);
480        find.update_find(&base_text, 0, base_text.len(), false);
481        assert_eq!(find.occurrences().len(), 2);
482        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(0, 11)));
483
484        find.set_find("h.llo", true, true, false);
485        find.update_find(&base_text, 0, base_text.len(), false);
486        assert_eq!(find.occurrences().len(), 1);
487        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(0, 5)));
488
489        find.set_find(".*", false, true, false);
490        find.update_find(&base_text, 0, base_text.len(), false);
491        assert_eq!(find.occurrences().len(), 3);
492        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(0, 11)));
493    }
494
495    #[test]
496    fn find_regex_multiline() {
497        let base_text = Rope::from("hello world\n HELLO WORLD");
498        let mut find = Find::new(1);
499        find.set_find("(.*\n.*)+", true, true, false);
500        find.update_find(&base_text, 0, base_text.len(), false);
501        assert_eq!(find.occurrences().len(), 1);
502        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(0, 12)));
503    }
504
505    #[test]
506    fn find_multiline_regex() {
507        let mut find = Find::new(1);
508        find.set_find("a", true, true, false);
509        assert_eq!(find.is_multiline_regex(), false);
510        find.set_find(".*", true, true, false);
511        assert_eq!(find.is_multiline_regex(), false);
512        find.set_find("\\n", true, true, false);
513        assert_eq!(find.is_multiline_regex(), true);
514    }
515
516    #[test]
517    fn find_slop() {
518        let base_text = Rope::from("aaa bbb aaa bbb aaa x");
519        let mut find = Find::new(1);
520        find.set_find("aaa", true, true, false);
521        find.update_find(&base_text, 2, base_text.len(), false);
522        assert_eq!(find.occurrences().len(), 2);
523        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(8, 11)));
524
525        find.update_find(&base_text, 3, base_text.len(), true);
526        assert_eq!(find.occurrences().len(), 3);
527        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(0, 3)));
528    }
529
530    #[test]
531    fn find_next_occurrence() {
532        let base_text = Rope::from("aaa bbb aaa bbb aaa x");
533        let mut find = Find::new(1);
534        find.set_find("aaa", true, true, false);
535        find.update_find(&base_text, 0, base_text.len(), false);
536        assert_eq!(find.occurrences().len(), 3);
537        assert_eq!(
538            find.next_occurrence(&base_text, false, false, &Selection::new()),
539            Some(SelRegion::new(0, 3))
540        );
541
542        let mut prev_selection = Selection::new();
543        prev_selection.add_region(SelRegion::new(0, 3));
544        assert_eq!(
545            find.next_occurrence(&base_text, false, false, &prev_selection),
546            Some(SelRegion::new(8, 11))
547        );
548
549        let mut prev_selection = Selection::new();
550        prev_selection.add_region(SelRegion::new(19, 19));
551        assert_eq!(
552            find.next_occurrence(&base_text, false, true, &prev_selection),
553            Some(SelRegion::new(16, 19))
554        );
555
556        let mut prev_selection = Selection::new();
557        prev_selection.add_region(SelRegion::new(20, 20));
558        assert_eq!(
559            find.next_occurrence(&base_text, false, false, &prev_selection),
560            Some(SelRegion::new(0, 3))
561        );
562    }
563
564    #[test]
565    fn find_previous_occurrence() {
566        let base_text = Rope::from("aaa bbb aaa bbb aaa x");
567        let mut find = Find::new(1);
568        find.set_find("aaa", true, true, false);
569        find.update_find(&base_text, 0, base_text.len(), false);
570        assert_eq!(find.occurrences().len(), 3);
571        assert_eq!(
572            find.next_occurrence(&base_text, true, false, &Selection::new()),
573            Some(SelRegion::new(16, 19))
574        );
575
576        let mut prev_selection = Selection::new();
577        prev_selection.add_region(SelRegion::new(20, 20));
578        assert_eq!(find.next_occurrence(&base_text, true, true, &Selection::new()), None);
579    }
580
581    #[test]
582    fn unset_find() {
583        let base_text = Rope::from("aaa bbb aaa bbb aaa x");
584        let mut find = Find::new(1);
585        find.set_find("aaa", true, true, false);
586        find.update_find(&base_text, 0, base_text.len(), false);
587        assert_eq!(find.occurrences().len(), 3);
588        find.unset();
589        assert_eq!(find.occurrences().len(), 0);
590    }
591
592    #[test]
593    fn update_find_edit() {
594        let base_text = Rope::from("a b a c");
595        let mut find = Find::new(1);
596        find.set_find("a", false, false, false);
597        find.update_find(&base_text, 0, base_text.len(), false);
598        let mut builder = DeltaBuilder::new(base_text.len());
599        builder.replace(0..0, "a ".into());
600
601        assert_eq!(find.occurrences().len(), 2);
602        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(0, 1)));
603        assert_eq!(find.occurrences().last(), Some(&SelRegion::new(4, 5)));
604    }
605
606    #[test]
607    fn update_find_multiline_edit() {
608        let base_text = Rope::from("x\n a\n b\n a\n c");
609        let mut find = Find::new(1);
610        find.set_find("a", false, false, false);
611        find.update_find(&base_text, 0, base_text.len(), false);
612        let mut builder = DeltaBuilder::new(base_text.len());
613        builder.replace(2..2, " a\n b\n a\n".into());
614
615        assert_eq!(find.occurrences().len(), 2);
616        assert_eq!(find.occurrences().first(), Some(&SelRegion::new(3, 4)));
617        assert_eq!(find.occurrences().last(), Some(&SelRegion::new(9, 10)));
618    }
619}