Skip to main content

marki_parse/
special_char.rs

1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2#[repr(u8)]
3pub enum SpecialChar {
4    Tab = b'\t',
5    Newline = b'\n',
6    CarriageReturn = b'\r',
7    Space = b' ',
8    ExclamationMark = b'!',
9    DoubleQuote = b'"',
10    Hash = b'#',
11    SingleQuote = b'\'',
12    OpenParen = b'(',
13    CloseParen = b')',
14    Asterisk = b'*',
15    Plus = b'+',
16    Dash = b'-',
17    Dot = b'.',
18    Zero = b'0',
19    GreaterThan = b'>',
20    OpenBracket = b'[',
21    Backslash = b'\\',
22    CloseBracket = b']',
23    Underscore = b'_',
24    Tilde = b'~',
25    Backtick = b'`',
26}
27
28/// Static lookup table for `from_byte`. Built at compile time.
29static FROM_BYTE: [Option<SpecialChar>; 256] = {
30    use SpecialChar as S;
31    let mut table: [Option<SpecialChar>; 256] = [None; 256];
32    table[b'\t' as usize] = Some(S::Tab);
33    table[b'\n' as usize] = Some(S::Newline);
34    table[b'\r' as usize] = Some(S::CarriageReturn);
35    table[b' ' as usize] = Some(S::Space);
36    table[b'!' as usize] = Some(S::ExclamationMark);
37    table[b'"' as usize] = Some(S::DoubleQuote);
38    table[b'#' as usize] = Some(S::Hash);
39    table[b'\'' as usize] = Some(S::SingleQuote);
40    table[b'(' as usize] = Some(S::OpenParen);
41    table[b')' as usize] = Some(S::CloseParen);
42    table[b'*' as usize] = Some(S::Asterisk);
43    table[b'+' as usize] = Some(S::Plus);
44    table[b'-' as usize] = Some(S::Dash);
45    table[b'.' as usize] = Some(S::Dot);
46    table[b'0' as usize] = Some(S::Zero);
47    table[b'>' as usize] = Some(S::GreaterThan);
48    table[b'[' as usize] = Some(S::OpenBracket);
49    table[b'\\' as usize] = Some(S::Backslash);
50    table[b']' as usize] = Some(S::CloseBracket);
51    table[b'_' as usize] = Some(S::Underscore);
52    table[b'~' as usize] = Some(S::Tilde);
53    table[b'`' as usize] = Some(S::Backtick);
54    table
55};
56
57impl SpecialChar {
58    /// Returns the `u8` value of this character.
59    #[inline]
60    #[must_use]
61    pub const fn byte(self) -> u8 {
62        self as u8
63    }
64
65    /// Look up a byte in the static table. O(1).
66    #[inline]
67    #[must_use]
68    pub fn from_byte(b: u8) -> Option<Self> {
69        FROM_BYTE[b as usize]
70    }
71
72    #[inline]
73    #[must_use]
74    pub const fn is_list_char(self) -> bool {
75        matches!(self, Self::Dash | Self::Asterisk | Self::Plus)
76    }
77
78    #[inline]
79    #[must_use]
80    pub fn count_leading_bytes(self, bytes: &[u8]) -> usize {
81        let needle = self.byte();
82        #[cfg(target_arch = "x86_64")]
83        {
84            // SAFETY: SSE2 is baseline on all x86_64 processors.
85            unsafe { count_leading_sse2(bytes, needle) }
86        }
87        #[cfg(not(target_arch = "x86_64"))]
88        {
89            count_leading_scalar(bytes, needle)
90        }
91    }
92}
93
94#[cfg(not(target_arch = "x86_64"))]
95fn count_leading_scalar(bytes: &[u8], needle: u8) -> usize {
96    let mut n = 0;
97    while n < bytes.len() && bytes[n] == needle {
98        n += 1;
99    }
100    n
101}
102
103#[cfg(target_arch = "x86_64")]
104use std::arch::x86_64::{_mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8};
105
106#[cfg(target_arch = "x86_64")]
107#[target_feature(enable = "sse2")]
108#[allow(
109    clippy::cast_ptr_alignment,
110    clippy::cast_possible_truncation,
111    clippy::cast_sign_loss
112)]
113unsafe fn count_leading_sse2(bytes: &[u8], needle: u8) -> usize {
114    let len = bytes.len();
115    let ptr = bytes.as_ptr();
116
117    unsafe {
118        let n = _mm_set1_epi8(i8::from_ne_bytes([needle]));
119        let mut i = 0;
120
121        // Process 16-byte chunks: all bytes must match.
122        while i + 16 <= len {
123            let chunk = _mm_loadu_si128(ptr.add(i).cast());
124            let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, n)) as u16;
125            if mask == 0xFFFF {
126                i += 16;
127            } else {
128                // First non-matching byte within this chunk.
129                return i + mask.trailing_ones() as usize;
130            }
131        }
132
133        // Scalar tail.
134        while i < len && bytes[i] == needle {
135            i += 1;
136        }
137        i
138    }
139}
140
141impl PartialEq<u8> for SpecialChar {
142    #[inline]
143    fn eq(&self, other: &u8) -> bool {
144        self.byte() == *other
145    }
146}
147
148impl PartialEq<SpecialChar> for u8 {
149    #[inline]
150    fn eq(&self, other: &SpecialChar) -> bool {
151        *self == other.byte()
152    }
153}
154
155impl PartialEq<SpecialChar> for Option<&u8> {
156    #[inline]
157    fn eq(&self, other: &SpecialChar) -> bool {
158        matches!(self, Some(b) if **b == other.byte())
159    }
160}
161
162impl PartialEq<SpecialChar> for Option<u8> {
163    #[inline]
164    fn eq(&self, other: &SpecialChar) -> bool {
165        matches!(self, Some(b) if *b == other.byte())
166    }
167}
168
169impl std::fmt::Display for SpecialChar {
170    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
171        write!(f, "{}", self.byte() as char)
172    }
173}