1use std::fmt::{self, Debug, Display};
4
5use crate::Config;
6#[cfg(feature = "unicode-casefold")]
7use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
8
9#[allow(warnings)]
11#[rustfmt::skip]
12#[cfg(feature = "unicode-casefold")]
13mod case_fold;
14#[cfg(feature = "unicode-normalization")]
15mod normalize;
16
17pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
18 const ASCII: bool;
19 fn char_class(self, config: &Config) -> CharClass;
20 fn char_class_and_normalize(self, config: &Config) -> (Self, CharClass);
21 fn normalize(self, config: &Config) -> Self;
22}
23
24#[repr(transparent)]
26#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
27pub(crate) struct AsciiChar(pub u8);
28
29impl AsciiChar {
30 pub fn cast(bytes: &[u8]) -> &[AsciiChar] {
31 unsafe { &*(bytes as *const [u8] as *const [AsciiChar]) }
32 }
33}
34
35impl fmt::Display for AsciiChar {
36 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37 Display::fmt(&(self.0 as char), f)
38 }
39}
40
41impl PartialEq<AsciiChar> for char {
42 fn eq(&self, other: &AsciiChar) -> bool {
43 other.0 as char == *self
44 }
45}
46
47impl Char for AsciiChar {
48 const ASCII: bool = true;
49 #[inline]
50 fn char_class(self, config: &Config) -> CharClass {
51 let c = self.0;
52 if c >= b'a' && c <= b'z' {
54 CharClass::Lower
55 } else if c >= b'A' && c <= b'Z' {
56 CharClass::Upper
57 } else if c >= b'0' && c <= b'9' {
58 CharClass::Number
59 } else if c.is_ascii_whitespace() {
60 CharClass::Whitespace
61 } else if config.delimiter_chars.contains(&c) {
62 CharClass::Delimiter
63 } else {
64 CharClass::NonWord
65 }
66 }
67
68 #[inline(always)]
69 fn char_class_and_normalize(mut self, config: &Config) -> (Self, CharClass) {
70 let char_class = self.char_class(config);
71 if config.ignore_case && char_class == CharClass::Upper {
72 self.0 += 32
73 }
74 (self, char_class)
75 }
76
77 #[inline(always)]
78 fn normalize(mut self, config: &Config) -> Self {
79 if config.ignore_case && self.0 >= b'A' && self.0 <= b'Z' {
80 self.0 += 32
81 }
82 self
83 }
84}
85
86fn char_class_non_ascii(c: char) -> CharClass {
87 if c.is_lowercase() {
88 CharClass::Lower
89 } else if is_upper_case(c) {
90 CharClass::Upper
91 } else if c.is_numeric() {
92 CharClass::Number
93 } else if c.is_alphabetic() {
94 CharClass::Letter
95 } else if c.is_whitespace() {
96 CharClass::Whitespace
97 } else {
98 CharClass::NonWord
99 }
100}
101
102impl Char for char {
103 const ASCII: bool = false;
104 #[inline(always)]
105 fn char_class(self, config: &Config) -> CharClass {
106 if self.is_ascii() {
107 return AsciiChar(self as u8).char_class(config);
108 }
109 char_class_non_ascii(self)
110 }
111
112 #[inline(always)]
113 fn char_class_and_normalize(mut self, config: &Config) -> (Self, CharClass) {
114 if self.is_ascii() {
115 let (c, class) = AsciiChar(self as u8).char_class_and_normalize(config);
116 return (c.0 as char, class);
117 }
118 let char_class = char_class_non_ascii(self);
119 #[cfg(feature = "unicode-casefold")]
120 let mut case_fold = char_class == CharClass::Upper;
121 #[cfg(feature = "unicode-normalization")]
122 if config.normalize {
123 self = normalize::normalize(self);
124 case_fold = true
125 }
126 #[cfg(feature = "unicode-casefold")]
127 if case_fold && config.ignore_case {
128 self = CASE_FOLDING_SIMPLE
129 .binary_search_by_key(&self, |(upper, _)| *upper)
130 .map_or(self, |idx| CASE_FOLDING_SIMPLE[idx].1)
131 }
132 (self, char_class)
133 }
134
135 #[inline(always)]
136 fn normalize(mut self, config: &Config) -> Self {
137 #[cfg(feature = "unicode-normalization")]
138 if config.normalize {
139 self = normalize::normalize(self);
140 }
141 #[cfg(feature = "unicode-casefold")]
142 if config.ignore_case {
143 self = to_lower_case(self)
144 }
145 self
146 }
147}
148
149#[cfg(feature = "unicode-normalization")]
150pub use normalize::normalize;
151#[cfg(feature = "unicode-segmentation")]
152use unicode_segmentation::UnicodeSegmentation;
153
154#[cfg(feature = "unicode-casefold")]
156#[inline(always)]
157pub fn to_lower_case(c: char) -> char {
158 CASE_FOLDING_SIMPLE
159 .binary_search_by_key(&c, |(upper, _)| *upper)
160 .map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
161}
162
163#[inline(always)]
167pub fn is_upper_case(c: char) -> bool {
168 #[cfg(feature = "unicode-casefold")]
169 let val = CASE_FOLDING_SIMPLE
170 .binary_search_by_key(&c, |(upper, _)| *upper)
171 .is_ok();
172 #[cfg(not(feature = "unicode-casefold"))]
173 let val = c.is_uppercase();
174 val
175}
176
177#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
178pub(crate) enum CharClass {
179 Whitespace,
180 NonWord,
181 Delimiter,
182 Lower,
183 Upper,
184 Letter,
185 Number,
186}
187
188pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
198 #[cfg(feature = "unicode-segmentation")]
199 let res = text.graphemes(true).map(|grapheme| {
200 if grapheme == "\r\n" {
204 '\n'
205 } else {
206 grapheme
207 .chars()
208 .next()
209 .expect("graphemes must be non-empty")
210 }
211 });
212 #[cfg(not(feature = "unicode-segmentation"))]
213 let res = text.chars();
214 res
215}