ncp_matcher/lib.rs
1/*!
2This is a fork of the [nucleo_matcher](https://docs.rs/nucleo_matcher) crate.
3It is not recommended for general use. This fork mainly exists to meet the specific
4requirements of [`nucleo-picker`](https://docs.rs/nucleo-picker).
5
6`ncp_matcher` is a low level crate that contains the matcher implementation
7used by the high level `ncp_engine` crate.
8
9**NOTE**: If you are building an fzf-like interactive fuzzy finder that is
10meant to match a reasonably large number of items (> 100) using the high level
11`nucleo` crate is highly recommended. Using `nucleo-matcher` directly in you ui
12loop will be very slow. Implementing this logic yourself is very complex.
13
14The matcher is hightly optimized and can significantly outperform `fzf` and
15`skim` (the `fuzzy-matcher` crate). However some of these optimizations require
16a slightly less convenient API. Be sure to carefully read the documentation of
17the [`Matcher`] to avoid unexpected behaviour.
18# Examples
19
20For almost all usecases the [`pattern`] API should be used instead of calling
21the matcher methods directly. [`Pattern::parse`](pattern::Pattern::parse) will
22construct a single Atom (a single match operation) for each word. The pattern
23can contain special characters to control what kind of match is performed (see
24[`AtomKind`](crate::pattern::AtomKind)).
25
26```
27# use ncp_matcher::{Matcher, Config};
28# use ncp_matcher::pattern::{Pattern, Normalization, CaseMatching};
29let paths = ["foo/bar", "bar/foo", "foobar"];
30let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
31let matches = Pattern::parse("foo bar", CaseMatching::Ignore, Normalization::Smart).match_list(paths, &mut matcher);
32assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]);
33let matches = Pattern::parse("^foo bar", CaseMatching::Ignore, Normalization::Smart).match_list(paths, &mut matcher);
34assert_eq!(matches, vec![("foo/bar", 168), ("foobar", 140)]);
35```
36
37If the pattern should be matched literally (without this special parsing)
38[`Pattern::new`](pattern::Pattern::new) can be used instead.
39
40```
41# use ncp_matcher::{Matcher, Config};
42# use ncp_matcher::pattern::{Pattern, CaseMatching, AtomKind, Normalization};
43let paths = ["foo/bar", "bar/foo", "foobar"];
44let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
45let matches = Pattern::new("foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy).match_list(paths, &mut matcher);
46assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]);
47let paths = ["^foo/bar", "bar/^foo", "foobar"];
48let matches = Pattern::new("^foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy).match_list(paths, &mut matcher);
49assert_eq!(matches, vec![("^foo/bar", 188), ("bar/^foo", 188)]);
50```
51
52Word segmentation is performed automatically on any unescaped character for which [`is_whitespace`](char::is_whitespace) returns true.
53This is relevant, for instance, with non-english keyboard input.
54
55```
56# use ncp_matcher::pattern::{Atom, Pattern, Normalization, CaseMatching};
57assert_eq!(
58 // double-width 'Ideographic Space', i.e. `'\u{3000}'`
59 Pattern::parse("ほげ ふが", CaseMatching::Smart, Normalization::Smart).atoms,
60 vec![
61 Atom::parse("ほげ", CaseMatching::Smart, Normalization::Smart),
62 Atom::parse("ふが", CaseMatching::Smart, Normalization::Smart),
63 ],
64);
65```
66
67If word segmentation is also not desired, a single `Atom` can be constructed directly.
68
69```
70# use ncp_matcher::{Matcher, Config};
71# use ncp_matcher::pattern::{Pattern, Atom, CaseMatching, Normalization, AtomKind};
72let paths = ["foobar", "foo bar"];
73let mut matcher = Matcher::new(Config::DEFAULT);
74let matches = Atom::new("foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy, false).match_list(paths, &mut matcher);
75assert_eq!(matches, vec![("foo bar", 192)]);
76```
77
78
79# Status
80
81Nucleo is used in the helix-editor and therefore has a large user base with lots or real world testing. The core matcher implementation is considered complete and is unlikely to see major changes. The `nucleo-matcher` crate is finished and ready for widespread use, breaking changes should be very rare (a 1.0 release should not be far away).
82
83*/
84
85// sadly ranges don't optimize well
86#![allow(clippy::manual_range_contains)]
87#![warn(missing_docs)]
88
89pub mod chars;
90mod config;
91#[cfg(test)]
92mod debug;
93mod exact;
94mod fuzzy_greedy;
95mod fuzzy_optimal;
96mod matrix;
97pub mod pattern;
98mod prefilter;
99mod score;
100mod utf32_str;
101
102#[cfg(test)]
103mod tests;
104
105pub use crate::config::Config;
106pub use crate::utf32_str::{Utf32Str, Utf32String};
107
108use crate::chars::{AsciiChar, Char};
109use crate::matrix::MatrixSlab;
110
111/// A matcher engine that can execute (fuzzy) matches.
112///
113/// A matches contains **heap allocated** scratch memory that is reused during
114/// matching. This scratch memory allows the matcher to guarantee that it will
115/// **never allocate** during matching (with the exception of pushing to the
116/// `indices` vector if there isn't enough capacity). However this scratch
117/// memory is fairly large (around 135KB) so creating a matcher is expensive.
118///
119/// All `.._match` functions will not compute the indices of the matched
120/// characters. These should be used to prefilter to filter and rank all
121/// matches. All `.._indices` functions will also compute the indices of the
122/// matched characters but are slower compared to the `..match` variant. These
123/// should be used when rendering the best N matches. Note that the `indices`
124/// argument is **never cleared**. This allows running multiple different
125/// matches on the same haystack and merging the indices by sorting and
126/// deduplicating the vector.
127///
128/// The `needle` argument for each function must always be normalized by the
129/// caller (unicode normalization and case folding). Otherwise, the matcher
130/// may fail to produce a match. The [`pattern`] modules provides utilities
131/// to preprocess needles and **should usually be preferred over invoking the
132/// matcher directly**. Additionally it's recommend to perform separate matches
133/// for each word in the needle. Consider the folloling example:
134///
135/// If `foo bar` is used as the needle it matches both `foo test baaar` and
136/// `foo hello-world bar`. However, `foo test baaar` will receive a higher
137/// score than `foo hello-world bar`. `baaar` contains a 2 character gap which
138/// will receive a penalty and therefore the user will likely expect it to rank
139/// lower. However, if `foo bar` is matched as a single query `hello-world` and
140/// `test` are both considered gaps too. As `hello-world` is a much longer gap
141/// then `test` the extra penalty for `baaar` is canceled out. If both words
142/// are matched individually the interspersed words do not receive a penalty and
143/// `foo hello-world bar` ranks higher.
144///
145/// In general nucleo is a **substring matching tool** (except for the prefix/
146/// postfix matching modes) with no penalty assigned to matches that start
147/// later within the same pattern (which enables matching words individually
148/// as shown above). If patterns show a large variety in length and the syntax
149/// described above is not used it may be preferable to give preference to
150/// matches closer to the start of a haystack. To accommodate that usecase the
151/// [`prefer_prefix`](Config::prefer_prefix) option can be set to true.
152///
153/// Matching is limited to 2^32-1 codepoints, if the haystack is longer than
154/// that the matcher **will panic**. The caller must decide whether it wants to
155/// filter out long haystacks or truncate them.
156pub struct Matcher {
157 #[allow(missing_docs)]
158 pub config: Config,
159 slab: MatrixSlab,
160}
161
162// this is just here for convenience not sure if we should implement this
163impl Clone for Matcher {
164 fn clone(&self) -> Self {
165 Matcher {
166 config: self.config.clone(),
167 slab: MatrixSlab::new(),
168 }
169 }
170}
171
172impl std::fmt::Debug for Matcher {
173 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
174 f.debug_struct("Matcher")
175 .field("config", &self.config)
176 .finish_non_exhaustive()
177 }
178}
179
180impl Default for Matcher {
181 fn default() -> Self {
182 Matcher {
183 config: Config::DEFAULT,
184 slab: MatrixSlab::new(),
185 }
186 }
187}
188
189impl Matcher {
190 /// Creates a new matcher instance.
191 ///
192 /// This will eagerly allocate a fairly large chunk of heap memory (around 135KB
193 /// currently, but subject to change) so matchers should be reused if called often,
194 /// such as in a loop.
195 pub fn new(config: Config) -> Self {
196 Self {
197 config,
198 slab: MatrixSlab::new(),
199 }
200 }
201
202 /// Find the fuzzy match with the highest score in the `haystack`.
203 ///
204 /// This functions has `O(mn)` time complexity for short inputs.
205 /// To avoid slowdowns it automatically falls back to
206 /// [greedy matching](crate::Matcher::fuzzy_match_greedy) for large
207 /// needles and haystacks.
208 ///
209 /// See the [matcher documentation](crate::Matcher) for more details.
210 pub fn fuzzy_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
211 assert!(haystack.len() <= u32::MAX as usize);
212 self.fuzzy_matcher_impl::<false>(haystack, needle, &mut Vec::new())
213 }
214
215 /// Find the fuzzy match with the highest score in the `haystack` and
216 /// compute its indices.
217 ///
218 /// This functions has `O(mn)` time complexity for short inputs. To
219 /// avoid slowdowns it automatically falls back to
220 /// [greedy matching](crate::Matcher::fuzzy_match_greedy) for large needles
221 /// and haystacks
222 ///
223 /// See the [matcher documentation](crate::Matcher) for more details.
224 pub fn fuzzy_indices(
225 &mut self,
226 haystack: Utf32Str<'_>,
227 needle: Utf32Str<'_>,
228 indices: &mut Vec<u32>,
229 ) -> Option<u16> {
230 assert!(haystack.len() <= u32::MAX as usize);
231 self.fuzzy_matcher_impl::<true>(haystack, needle, indices)
232 }
233
234 fn fuzzy_matcher_impl<const INDICES: bool>(
235 &mut self,
236 haystack_: Utf32Str<'_>,
237 needle_: Utf32Str<'_>,
238 indices: &mut Vec<u32>,
239 ) -> Option<u16> {
240 if needle_.len() > haystack_.len() {
241 return None;
242 }
243 if needle_.is_empty() {
244 return Some(0);
245 }
246 if needle_.len() == haystack_.len() {
247 return self.exact_match_impl::<INDICES>(
248 haystack_,
249 needle_,
250 0,
251 haystack_.len(),
252 indices,
253 );
254 }
255 assert!(
256 haystack_.len() <= u32::MAX as usize,
257 "fuzzy matching is only support for up to 2^32-1 codepoints"
258 );
259 match (haystack_, needle_) {
260 (Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
261 if let &[needle] = needle {
262 return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
263 }
264 let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle, false)?;
265 if needle_.len() == end - start {
266 return Some(self.calculate_score::<INDICES, _, _>(
267 AsciiChar::cast(haystack),
268 AsciiChar::cast(needle),
269 start,
270 greedy_end,
271 indices,
272 ));
273 }
274 self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
275 AsciiChar::cast(haystack),
276 AsciiChar::cast(needle),
277 start,
278 greedy_end,
279 end,
280 indices,
281 )
282 }
283 (Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
284 // a purely ascii haystack can never be transformed to match
285 // a needle that contains non-ascii chars since we don't allow gaps
286 None
287 }
288 (Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
289 if let &[needle] = needle {
290 let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
291 let res = self.substring_match_1_non_ascii::<INDICES>(
292 haystack,
293 needle as char,
294 start,
295 indices,
296 );
297 return Some(res);
298 }
299 let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
300 if needle_.len() == end - start {
301 return self
302 .exact_match_impl::<INDICES>(haystack_, needle_, start, end, indices);
303 }
304 self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
305 haystack,
306 AsciiChar::cast(needle),
307 start,
308 start + 1,
309 end,
310 indices,
311 )
312 }
313 (Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
314 if let &[needle] = needle {
315 let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
316 let res = self
317 .substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
318 return Some(res);
319 }
320 let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
321 if needle_.len() == end - start {
322 return self
323 .exact_match_impl::<INDICES>(haystack_, needle_, start, end, indices);
324 }
325 self.fuzzy_match_optimal::<INDICES, char, char>(
326 haystack,
327 needle,
328 start,
329 start + 1,
330 end,
331 indices,
332 )
333 }
334 }
335 }
336
337 /// Greedly find a fuzzy match in the `haystack`.
338 ///
339 /// This functions has `O(n)` time complexity but may provide unintutive (non-optimal)
340 /// indices and scores. Usually [fuzzy_match](crate::Matcher::fuzzy_match) should
341 /// be preferred.
342 ///
343 /// See the [matcher documentation](crate::Matcher) for more details.
344 pub fn fuzzy_match_greedy(
345 &mut self,
346 haystack: Utf32Str<'_>,
347 needle: Utf32Str<'_>,
348 ) -> Option<u16> {
349 assert!(haystack.len() <= u32::MAX as usize);
350 self.fuzzy_match_greedy_impl::<false>(haystack, needle, &mut Vec::new())
351 }
352
353 /// Greedly find a fuzzy match in the `haystack` and compute its indices.
354 ///
355 /// This functions has `O(n)` time complexity but may provide unintuitive (non-optimal)
356 /// indices and scores. Usually [fuzzy_indices](crate::Matcher::fuzzy_indices) should
357 /// be preferred.
358 ///
359 /// See the [matcher documentation](crate::Matcher) for more details.
360 pub fn fuzzy_indices_greedy(
361 &mut self,
362 haystack: Utf32Str<'_>,
363 needle: Utf32Str<'_>,
364 indices: &mut Vec<u32>,
365 ) -> Option<u16> {
366 assert!(haystack.len() <= u32::MAX as usize);
367 self.fuzzy_match_greedy_impl::<true>(haystack, needle, indices)
368 }
369
370 fn fuzzy_match_greedy_impl<const INDICES: bool>(
371 &mut self,
372 haystack: Utf32Str<'_>,
373 needle_: Utf32Str<'_>,
374 indices: &mut Vec<u32>,
375 ) -> Option<u16> {
376 if needle_.len() > haystack.len() {
377 return None;
378 }
379 if needle_.is_empty() {
380 return Some(0);
381 }
382 if needle_.len() == haystack.len() {
383 return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
384 }
385 assert!(
386 haystack.len() <= u32::MAX as usize,
387 "matching is only support for up to 2^32-1 codepoints"
388 );
389 match (haystack, needle_) {
390 (Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
391 let (start, greedy_end, _) = self.prefilter_ascii(haystack, needle, true)?;
392 if needle_.len() == greedy_end - start {
393 return Some(self.calculate_score::<INDICES, _, _>(
394 AsciiChar::cast(haystack),
395 AsciiChar::cast(needle),
396 start,
397 greedy_end,
398 indices,
399 ));
400 }
401 self.fuzzy_match_greedy_::<INDICES, AsciiChar, AsciiChar>(
402 AsciiChar::cast(haystack),
403 AsciiChar::cast(needle),
404 start,
405 greedy_end,
406 indices,
407 )
408 }
409 (Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
410 // a purely ascii haystack can never be transformed to match
411 // a needle that contains non-ascii chars since we don't allow gaps
412 None
413 }
414 (Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
415 let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
416 self.fuzzy_match_greedy_::<INDICES, char, AsciiChar>(
417 haystack,
418 AsciiChar::cast(needle),
419 start,
420 start + 1,
421 indices,
422 )
423 }
424 (Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
425 let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
426 self.fuzzy_match_greedy_::<INDICES, char, char>(
427 haystack,
428 needle,
429 start,
430 start + 1,
431 indices,
432 )
433 }
434 }
435 }
436
437 /// Finds the substring match with the highest score in the `haystack`.
438 ///
439 /// This functions has `O(nm)` time complexity. However many cases can
440 /// be significantly accelerated using prefilters so it's usually very fast
441 /// in practice.
442 ///
443 /// See the [matcher documentation](crate::Matcher) for more details.
444 pub fn substring_match(
445 &mut self,
446 haystack: Utf32Str<'_>,
447 needle_: Utf32Str<'_>,
448 ) -> Option<u16> {
449 self.substring_match_impl::<false>(haystack, needle_, &mut Vec::new())
450 }
451
452 /// Finds the substring match with the highest score in the `haystack` and
453 /// compute its indices.
454 ///
455 /// This functions has `O(nm)` time complexity. However many cases can
456 /// be significantly accelerated using prefilters so it's usually fast
457 /// in practice.
458 ///
459 /// See the [matcher documentation](crate::Matcher) for more details.
460 pub fn substring_indices(
461 &mut self,
462 haystack: Utf32Str<'_>,
463 needle_: Utf32Str<'_>,
464 indices: &mut Vec<u32>,
465 ) -> Option<u16> {
466 self.substring_match_impl::<true>(haystack, needle_, indices)
467 }
468
469 fn substring_match_impl<const INDICES: bool>(
470 &mut self,
471 haystack: Utf32Str<'_>,
472 needle_: Utf32Str<'_>,
473 indices: &mut Vec<u32>,
474 ) -> Option<u16> {
475 if needle_.len() > haystack.len() {
476 return None;
477 }
478 if needle_.is_empty() {
479 return Some(0);
480 }
481 if needle_.len() == haystack.len() {
482 return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
483 }
484 assert!(
485 haystack.len() <= u32::MAX as usize,
486 "matching is only support for up to 2^32-1 codepoints"
487 );
488 match (haystack, needle_) {
489 (Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
490 if let &[needle] = needle {
491 return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
492 }
493 self.substring_match_ascii::<INDICES>(haystack, needle, indices)
494 }
495 (Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
496 // a purely ascii haystack can never be transformed to match
497 // a needle that contains non-ascii chars since we don't allow gaps
498 None
499 }
500 (Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
501 if let &[needle] = needle {
502 let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
503 let res = self.substring_match_1_non_ascii::<INDICES>(
504 haystack,
505 needle as char,
506 start,
507 indices,
508 );
509 return Some(res);
510 }
511 let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?;
512 self.substring_match_non_ascii::<INDICES, _>(
513 haystack,
514 AsciiChar::cast(needle),
515 start,
516 indices,
517 )
518 }
519 (Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
520 if let &[needle] = needle {
521 let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
522 let res = self
523 .substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
524 return Some(res);
525 }
526 let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?;
527 self.substring_match_non_ascii::<INDICES, _>(haystack, needle, start, indices)
528 }
529 }
530 }
531
532 /// Checks whether needle and haystack match exactly.
533 ///
534 /// This functions has `O(n)` time complexity.
535 ///
536 /// See the [matcher documentation](crate::Matcher) for more details.
537 pub fn exact_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
538 if needle.is_empty() {
539 return Some(0);
540 }
541 let mut leading_space = 0;
542 let mut trailing_space = 0;
543 if !needle.first().is_whitespace() {
544 leading_space = haystack.leading_white_space()
545 }
546 if !needle.last().is_whitespace() {
547 trailing_space = haystack.trailing_white_space()
548 }
549 // avoid wraparound in size check
550 if trailing_space == haystack.len() {
551 return None;
552 }
553 self.exact_match_impl::<false>(
554 haystack,
555 needle,
556 leading_space,
557 haystack.len() - trailing_space,
558 &mut Vec::new(),
559 )
560 }
561
562 /// Checks whether needle and haystack match exactly and compute the matches indices.
563 ///
564 /// This functions has `O(n)` time complexity.
565 ///
566 /// See the [matcher documentation](crate::Matcher) for more details.
567 pub fn exact_indices(
568 &mut self,
569 haystack: Utf32Str<'_>,
570 needle: Utf32Str<'_>,
571 indices: &mut Vec<u32>,
572 ) -> Option<u16> {
573 if needle.is_empty() {
574 return Some(0);
575 }
576 let mut leading_space = 0;
577 let mut trailing_space = 0;
578 if !needle.first().is_whitespace() {
579 leading_space = haystack.leading_white_space()
580 }
581 if !needle.last().is_whitespace() {
582 trailing_space = haystack.trailing_white_space()
583 }
584 // avoid wraparound in size check
585 if trailing_space == haystack.len() {
586 return None;
587 }
588 self.exact_match_impl::<true>(
589 haystack,
590 needle,
591 leading_space,
592 haystack.len() - trailing_space,
593 indices,
594 )
595 }
596
597 /// Checks whether needle is a prefix of the haystack.
598 ///
599 /// This functions has `O(n)` time complexity.
600 ///
601 /// See the [matcher documentation](crate::Matcher) for more details.
602 pub fn prefix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
603 if needle.is_empty() {
604 return Some(0);
605 }
606 let mut leading_space = 0;
607 if !needle.first().is_whitespace() {
608 leading_space = haystack.leading_white_space()
609 }
610 if haystack.len() - leading_space < needle.len() {
611 None
612 } else {
613 self.exact_match_impl::<false>(
614 haystack,
615 needle,
616 leading_space,
617 needle.len() + leading_space,
618 &mut Vec::new(),
619 )
620 }
621 }
622
623 /// Checks whether needle is a prefix of the haystack and compute the matches indices.
624 ///
625 /// This functions has `O(n)` time complexity.
626 ///
627 /// See the [matcher documentation](crate::Matcher) for more details.
628 pub fn prefix_indices(
629 &mut self,
630 haystack: Utf32Str<'_>,
631 needle: Utf32Str<'_>,
632 indices: &mut Vec<u32>,
633 ) -> Option<u16> {
634 if needle.is_empty() {
635 return Some(0);
636 }
637 let mut leading_space = 0;
638 if !needle.first().is_whitespace() {
639 leading_space = haystack.leading_white_space()
640 }
641 if haystack.len() - leading_space < needle.len() {
642 None
643 } else {
644 self.exact_match_impl::<true>(
645 haystack,
646 needle,
647 leading_space,
648 needle.len() + leading_space,
649 indices,
650 )
651 }
652 }
653
654 /// Checks whether needle is a postfix of the haystack.
655 ///
656 /// This functions has `O(n)` time complexity.
657 ///
658 /// See the [matcher documentation](crate::Matcher) for more details.
659 pub fn postfix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
660 if needle.is_empty() {
661 return Some(0);
662 }
663 let mut trailing_spaces = 0;
664 if !needle.last().is_whitespace() {
665 trailing_spaces = haystack.trailing_white_space()
666 }
667 if haystack.len() - trailing_spaces < needle.len() {
668 None
669 } else {
670 self.exact_match_impl::<false>(
671 haystack,
672 needle,
673 haystack.len() - needle.len() - trailing_spaces,
674 haystack.len() - trailing_spaces,
675 &mut Vec::new(),
676 )
677 }
678 }
679
680 /// Checks whether needle is a postfix of the haystack and compute the matches indices.
681 ///
682 /// This functions has `O(n)` time complexity.
683 ///
684 /// See the [matcher documentation](crate::Matcher) for more details.
685 pub fn postfix_indices(
686 &mut self,
687 haystack: Utf32Str<'_>,
688 needle: Utf32Str<'_>,
689 indices: &mut Vec<u32>,
690 ) -> Option<u16> {
691 if needle.is_empty() {
692 return Some(0);
693 }
694 let mut trailing_spaces = 0;
695 if !needle.last().is_whitespace() {
696 trailing_spaces = haystack.trailing_white_space()
697 }
698 if haystack.len() - trailing_spaces < needle.len() {
699 None
700 } else {
701 self.exact_match_impl::<true>(
702 haystack,
703 needle,
704 haystack.len() - needle.len() - trailing_spaces,
705 haystack.len() - trailing_spaces,
706 indices,
707 )
708 }
709 }
710
711 fn exact_match_impl<const INDICES: bool>(
712 &mut self,
713 haystack: Utf32Str<'_>,
714 needle_: Utf32Str<'_>,
715 start: usize,
716 end: usize,
717 indices: &mut Vec<u32>,
718 ) -> Option<u16> {
719 if needle_.len() != end - start {
720 return None;
721 }
722 assert!(
723 haystack.len() <= u32::MAX as usize,
724 "matching is only support for up to 2^32-1 codepoints"
725 );
726 let score = match (haystack, needle_) {
727 (Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
728 let matched = if self.config.ignore_case {
729 AsciiChar::cast(haystack)[start..end]
730 .iter()
731 .map(|c| c.normalize(&self.config))
732 .eq(AsciiChar::cast(needle)
733 .iter()
734 .map(|c| c.normalize(&self.config)))
735 } else {
736 &haystack[start..end] == needle
737 };
738 if !matched {
739 return None;
740 }
741 self.calculate_score::<INDICES, _, _>(
742 AsciiChar::cast(haystack),
743 AsciiChar::cast(needle),
744 start,
745 end,
746 indices,
747 )
748 }
749 (Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
750 // a purely ascii haystack can never be transformed to match
751 // a needle that contains non-ascii chars since we don't allow gaps
752 return None;
753 }
754 (Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
755 let matched = haystack[start..end]
756 .iter()
757 .map(|c| c.normalize(&self.config))
758 .eq(AsciiChar::cast(needle)
759 .iter()
760 .map(|c| c.normalize(&self.config)));
761 if !matched {
762 return None;
763 }
764
765 self.calculate_score::<INDICES, _, _>(
766 haystack,
767 AsciiChar::cast(needle),
768 start,
769 end,
770 indices,
771 )
772 }
773 (Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
774 let matched = haystack[start..end]
775 .iter()
776 .map(|c| c.normalize(&self.config))
777 .eq(needle.iter().map(|c| c.normalize(&self.config)));
778 if !matched {
779 return None;
780 }
781 self.calculate_score::<INDICES, _, _>(haystack, needle, start, end, indices)
782 }
783 };
784 Some(score)
785 }
786}