1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
//! Byte/Char helpers
use super::Score;
use super::constants::SEPARATOR_TABLE;
use memchr::memchr;
pub(super) trait Atom: PartialEq + Into<char> + Copy {
#[inline(always)]
fn eq(self, other: Self, respect_case: bool) -> bool
where
Self: PartialEq + Sized,
{
if respect_case {
self == other
} else {
self.eq_ignore_case(other)
}
}
fn eq_ignore_case(self, other: Self) -> bool;
fn is_lowercase(self) -> bool;
/// Return the index of the first occurrence of `self` in `haystack`,
/// or `None` if not found.
///
/// Implementations may override this with a SIMD-backed search (e.g.
/// `memchr` for `u8` in case-sensitive mode).
#[inline]
fn find_first_in(self, haystack: &[Self], respect_case: bool) -> Option<usize> {
haystack.iter().position(|&c| self.eq(c, respect_case))
}
/// Return the word-separator bonus for this character, or `0` if it is not
/// a separator. Uses a table lookup — a single bounds check replaces
/// several branches and the returned value encodes both *whether* the
/// character is a separator and *how much* bonus it carries.
#[inline(always)]
fn separator_bonus(self) -> Score {
let ch = self.into() as usize;
// For ch < 128 we do a table lookup; for ch >= 128 we return 0.
// The `get` returns None for out-of-range, and `copied().unwrap_or(0)` is
// typically compiled as a conditional move (branchless).
SEPARATOR_TABLE.get(ch).copied().unwrap_or(0)
}
}
impl Atom for u8 {
#[inline(always)]
fn eq_ignore_case(self, b: Self) -> bool {
self.eq_ignore_ascii_case(&b)
}
#[inline(always)]
fn is_lowercase(self) -> bool {
self.is_ascii_lowercase()
}
/// Case-sensitive search uses SIMD-backed `memchr`; case-insensitive
/// falls back to the generic scalar loop.
#[inline]
fn find_first_in(self, haystack: &[Self], respect_case: bool) -> Option<usize> {
if respect_case {
// SAFETY: `self` is a u8 and memchr searches for it in a byte slice.
memchr(self, haystack)
} else {
// Case-insensitive: compare lowercase. Also try the uppercase variant
// so a single `memchr` can be used for each case variant.
let lo = self.to_ascii_lowercase();
let hi = self.to_ascii_uppercase();
if lo == hi {
// No case distinction for this byte (digit, symbol, etc.).
memchr(lo, haystack)
} else {
// Check both variants and return the earliest occurrence.
let p_lo = memchr(lo, haystack);
let p_hi = memchr(hi, haystack);
match (p_lo, p_hi) {
(None, x) | (x, None) => x,
(Some(a), Some(b)) => Some(a.min(b)),
}
}
}
}
}
impl Atom for char {
#[inline(always)]
fn eq_ignore_case(self, b: Self) -> bool {
self.to_lowercase().eq(b.to_lowercase())
}
#[inline(always)]
fn is_lowercase(self) -> bool {
self.is_lowercase()
}
}