simd_normalizer/
casefold.rs1use alloc::borrow::Cow;
7use alloc::string::String;
8
9use crate::tables;
10
11#[derive(Clone, Copy, Debug, PartialEq, Eq)]
13pub enum CaseFoldMode {
14 Standard,
16 Turkish,
22}
23
24#[inline]
29pub fn casefold_char(c: char, mode: CaseFoldMode) -> char {
30 if mode == CaseFoldMode::Turkish
32 && let Some(folded) = tables::turkish_casefold(c)
33 {
34 return folded;
35 }
36 tables::lookup_casefold(c).unwrap_or(c)
37}
38
39pub fn casefold<'a>(input: &'a str, mode: CaseFoldMode) -> Cow<'a, str> {
44 if input.is_empty() {
45 return Cow::Borrowed(input);
46 }
47
48 let mut scan_iter = input.char_indices();
50 let first_change = loop {
51 match scan_iter.next() {
52 None => return Cow::Borrowed(input),
53 Some((idx, ch)) => {
54 let folded = casefold_char(ch, mode);
55 if folded != ch {
56 break idx;
57 }
58 },
59 }
60 };
61
62 let mut out = String::with_capacity(input.len());
64 out.push_str(&input[..first_change]);
65
66 for ch in input[first_change..].chars() {
67 out.push(casefold_char(ch, mode));
68 }
69
70 Cow::Owned(out)
71}
72
73#[cfg(test)]
74mod tests {
75 use super::*;
76
77 #[test]
80 fn fold_ascii_uppercase() {
81 assert_eq!(casefold_char('A', CaseFoldMode::Standard), 'a');
82 assert_eq!(casefold_char('Z', CaseFoldMode::Standard), 'z');
83 }
84
85 #[test]
86 fn fold_ascii_lowercase_unchanged() {
87 assert_eq!(casefold_char('a', CaseFoldMode::Standard), 'a');
88 assert_eq!(casefold_char('z', CaseFoldMode::Standard), 'z');
89 }
90
91 #[test]
92 fn fold_digit_unchanged() {
93 assert_eq!(casefold_char('0', CaseFoldMode::Standard), '0');
94 assert_eq!(casefold_char('9', CaseFoldMode::Standard), '9');
95 }
96
97 #[test]
98 fn fold_latin_extended() {
99 assert_eq!(
101 casefold_char('\u{00C0}', CaseFoldMode::Standard),
102 '\u{00E0}'
103 );
104 assert_eq!(
106 casefold_char('\u{00D6}', CaseFoldMode::Standard),
107 '\u{00F6}'
108 );
109 }
110
111 #[test]
112 fn fold_greek() {
113 assert_eq!(
115 casefold_char('\u{0391}', CaseFoldMode::Standard),
116 '\u{03B1}'
117 );
118 assert_eq!(
120 casefold_char('\u{03A3}', CaseFoldMode::Standard),
121 '\u{03C3}'
122 );
123 }
124
125 #[test]
126 fn fold_cyrillic() {
127 assert_eq!(
129 casefold_char('\u{0410}', CaseFoldMode::Standard),
130 '\u{0430}'
131 );
132 }
133
134 #[test]
135 fn fold_micro_sign() {
136 assert_eq!(
138 casefold_char('\u{00B5}', CaseFoldMode::Standard),
139 '\u{03BC}'
140 );
141 }
142
143 #[test]
144 fn fold_sharp_s() {
145 assert_eq!(
147 casefold_char('\u{1E9E}', CaseFoldMode::Standard),
148 '\u{00DF}'
149 );
150 }
151
152 #[test]
155 fn fold_turkish_dotless_i() {
156 assert_eq!(casefold_char('I', CaseFoldMode::Standard), 'i');
158 assert_eq!(casefold_char('I', CaseFoldMode::Turkish), '\u{0131}');
160 }
161
162 #[test]
163 fn fold_turkish_dotted_capital_i() {
164 assert_eq!(casefold_char('\u{0130}', CaseFoldMode::Turkish), 'i');
166 }
167
168 #[test]
169 fn fold_turkish_other_chars_unchanged() {
170 assert_eq!(casefold_char('A', CaseFoldMode::Turkish), 'a');
172 assert_eq!(casefold_char('a', CaseFoldMode::Turkish), 'a');
173 }
174
175 #[test]
178 fn fold_string_ascii() {
179 let result = casefold("Hello World", CaseFoldMode::Standard);
180 assert_eq!(&*result, "hello world");
181 }
182
183 #[test]
184 fn fold_string_already_folded() {
185 let result = casefold("hello world", CaseFoldMode::Standard);
186 assert!(matches!(result, Cow::Borrowed(_)));
187 assert_eq!(&*result, "hello world");
188 }
189
190 #[test]
191 fn fold_string_empty() {
192 let result = casefold("", CaseFoldMode::Standard);
193 assert!(matches!(result, Cow::Borrowed(_)));
194 }
195
196 #[test]
197 fn fold_string_mixed() {
198 let result = casefold("Ströme", CaseFoldMode::Standard);
199 assert_eq!(&*result, "ströme");
200 }
201
202 #[test]
203 fn fold_string_turkish() {
204 let result = casefold("Istanbul", CaseFoldMode::Turkish);
205 assert_eq!(&*result, "\u{0131}stanbul");
207 }
208
209 #[test]
210 fn fold_string_all_ascii_lowercase() {
211 let result = casefold(
213 "abcdefghijklmnopqrstuvwxyz0123456789",
214 CaseFoldMode::Standard,
215 );
216 assert!(matches!(result, Cow::Borrowed(_)));
217 }
218}