1#![doc = include_str!("../README.md")]
2#![feature(unicode_internals)]
3use core::unicode::conversions;
4use std::mem;
5
6#[inline]
7fn contains_nonascii(v: usize) -> bool {
8 const NONASCII_MASK: usize = 0x8080808080808080; (NONASCII_MASK & v) != 0
10}
11
12#[inline]
13unsafe fn is_ascii_funsafe<const N: usize>(b: *const u8) -> bool {
15 let mut count = 0;
17 for j in 0..N {
18 let chunk = b.cast::<usize>().add(j).read_unaligned();
19 count += contains_nonascii(chunk) as usize;
20 }
21 count == 0
22}
23
24#[inline]
25unsafe fn convert_while_ascii(
26 b: &[u8],
27 out: &mut [mem::MaybeUninit<u8>],
28 f: fn(&u8) -> u8,
29) -> usize {
30 debug_assert!(out.len() >= b.len());
31
32 const USIZE_SIZE: usize = mem::size_of::<usize>();
33 const MAGIC_UNROLL: usize = 16;
34
35 let mut i = 0;
36 while i + USIZE_SIZE * MAGIC_UNROLL <= b.len() {
37 let c = b.get_unchecked(i..);
38 let o = out.get_unchecked_mut(i..);
39
40 if !is_ascii_funsafe::<MAGIC_UNROLL>(c.as_ptr()) {
41 return i;
42 }
43
44 for j in 0..USIZE_SIZE * MAGIC_UNROLL {
46 let out = o.get_unchecked_mut(j);
47 out.write(f(c.get_unchecked(j)));
48 }
49
50 i += USIZE_SIZE * MAGIC_UNROLL;
51 }
52 i
53}
54
55pub fn to_lowercase(s: &str) -> String {
57 let mut out = Vec::<u8>::with_capacity(s.len());
58 let b = s.as_bytes();
59
60 unsafe {
61 let n = convert_while_ascii(b, out.spare_capacity_mut(), u8::to_ascii_lowercase);
62 out.set_len(n);
63 }
64
65 let rest = unsafe { s.get_unchecked(out.len()..) };
69
70 let mut to = unsafe { String::from_utf8_unchecked(out) };
72
73 for (i, c) in rest.char_indices() {
74 if c == 'Σ' {
75 map_uppercase_sigma(s, i, &mut to)
81 } else {
82 match conversions::to_lower(c) {
83 [a, '\0', _] => to.push(a),
84 [a, b, '\0'] => {
85 to.push(a);
86 to.push(b);
87 }
88 [a, b, c] => {
89 to.push(a);
90 to.push(b);
91 to.push(c);
92 }
93 }
94 }
95 }
96
97 fn map_uppercase_sigma(from: &str, i: usize, out: &mut String) {
98 debug_assert!('Σ'.len_utf8() == 2);
101 let is_word_final = case_ignoreable_then_cased(from[..i].chars().rev())
102 && !case_ignoreable_then_cased(from[i + 2..].chars());
103 out.push_str(if is_word_final { "ς" } else { "σ" });
104 }
105
106 fn case_ignoreable_then_cased<I: Iterator<Item = char>>(mut iter: I) -> bool {
107 use core::unicode::{Case_Ignorable, Cased};
108 match iter.find(|&c| !Case_Ignorable(c)) {
109 Some(c) => Cased(c),
110 None => false,
111 }
112 }
113
114 to
115}
116
117pub fn to_uppercase(s: &str) -> String {
119 let mut out = Vec::<u8>::with_capacity(s.len());
120 let b = s.as_bytes();
121
122 unsafe {
123 let n = convert_while_ascii(b, out.spare_capacity_mut(), u8::to_ascii_uppercase);
124 out.set_len(n);
125 }
126
127 let rest = unsafe { s.get_unchecked(out.len()..) };
131
132 let mut to = unsafe { String::from_utf8_unchecked(out) };
134
135 for c in rest.chars() {
136 match conversions::to_upper(c) {
137 [a, '\0', _] => to.push(a),
138 [a, b, '\0'] => {
139 to.push(a);
140 to.push(b);
141 }
142 [a, b, c] => {
143 to.push(a);
144 to.push(b);
145 to.push(c);
146 }
147 }
148 }
149 to
150}
151
152pub fn is_ascii(b: &[u8]) -> bool {
153 const USIZE_SIZE: usize = mem::size_of::<usize>();
154 const MAGIC_UNROLL: usize = 16;
155
156 if b.len() < USIZE_SIZE {
157 return b.iter().all(u8::is_ascii);
158 }
159 unsafe {
160 let mut i = 0;
161
162 while i + USIZE_SIZE * MAGIC_UNROLL <= b.len() {
164 if !is_ascii_funsafe::<MAGIC_UNROLL>(b.as_ptr().add(i)) {
165 return false;
166 }
167 i += USIZE_SIZE * MAGIC_UNROLL;
168 }
169
170 while i + USIZE_SIZE < b.len() {
172 if !is_ascii_funsafe::<1>(b.as_ptr().add(i)) {
173 return false;
174 }
175 i += USIZE_SIZE;
176 }
177
178 let i = b.len() - USIZE_SIZE;
180 is_ascii_funsafe::<1>(b.as_ptr().add(i))
181 }
182}
183
184#[cfg(test)]
185mod tests {
186 use super::*;
187
188 #[test]
189 fn lowercase() {
190 assert_eq!(to_lowercase(""), "");
191 assert_eq!(to_lowercase("AÉDžaé "), "aédžaé ");
192
193 assert_eq!(to_lowercase("ΑΣ"), "ας");
195 assert_eq!(to_lowercase("Α'Σ"), "α'ς");
196 assert_eq!(to_lowercase("Α''Σ"), "α''ς");
197
198 assert_eq!(to_lowercase("ΑΣ Α"), "ας α");
199 assert_eq!(to_lowercase("Α'Σ Α"), "α'ς α");
200 assert_eq!(to_lowercase("Α''Σ Α"), "α''ς α");
201
202 assert_eq!(to_lowercase("ΑΣ' Α"), "ας' α");
203 assert_eq!(to_lowercase("ΑΣ'' Α"), "ας'' α");
204
205 assert_eq!(to_lowercase("Α'Σ' Α"), "α'ς' α");
206 assert_eq!(to_lowercase("Α''Σ'' Α"), "α''ς'' α");
207
208 assert_eq!(to_lowercase("Α Σ"), "α σ");
209 assert_eq!(to_lowercase("Α 'Σ"), "α 'σ");
210 assert_eq!(to_lowercase("Α ''Σ"), "α ''σ");
211
212 assert_eq!(to_lowercase("Σ"), "σ");
213 assert_eq!(to_lowercase("'Σ"), "'σ");
214 assert_eq!(to_lowercase("''Σ"), "''σ");
215
216 assert_eq!(to_lowercase("ΑΣΑ"), "ασα");
217 assert_eq!(to_lowercase("ΑΣ'Α"), "ασ'α");
218 assert_eq!(to_lowercase("ΑΣ''Α"), "ασ''α");
219 }
220
221 #[test]
222 fn long() {
223 let mut upper = str::repeat("A", 128);
224 let mut lower = str::repeat("a", 128);
225
226 assert_eq!(to_lowercase(&upper), lower);
227 assert_eq!(to_uppercase(&lower), upper);
228
229 upper.push('Σ');
230 lower.push('σ');
231
232 assert_eq!(to_lowercase(&upper), lower);
233 assert_eq!(to_uppercase(&lower), upper);
234 }
235
236 #[test]
237 fn case_conv_long() {
238 let upper = str::repeat("A", 512);
239 let lower = str::repeat("a", 512);
240
241 assert_eq!(to_lowercase(&upper), lower);
242 assert_eq!(to_uppercase(&lower), upper);
243 }
244
245 #[test]
246 fn case_conv_long_unicode() {
247 let upper = str::repeat("É", 512);
248 let lower = str::repeat("é", 512);
249
250 assert_eq!(to_lowercase(&upper), lower);
251 assert_eq!(to_uppercase(&lower), upper);
252 }
253
254 #[test]
255 fn uppercase() {
256 assert_eq!(to_uppercase(""), "");
257 assert_eq!(to_uppercase("aéDžßfiᾀ"), "AÉDŽSSFIἈΙ");
258 }
259}