1#![warn(missing_docs)]
4
5use core::fmt;
10use std::ops;
11use unicase::UniCase;
12use unicode_normalization::{UnicodeNormalization, char::is_combining_mark};
13
14#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
17pub(crate) struct Ignoring(u8);
18
19impl Ignoring {
20 const NEITHER: Self = Self(0);
21 const CASE: Self = Self(1);
22 const ACCENT: Self = Self(2);
23}
24
25impl fmt::Display for Ignoring {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 match *self {
28 Ignoring::NEITHER => write!(f, "/_"),
29 Ignoring::CASE => write!(f, "/c"),
30 Ignoring::ACCENT => write!(f, "/a"),
31 _ => write!(f, "/b"),
32 }
33 }
34}
35
36impl ops::BitAnd for Ignoring {
37 type Output = Self;
38
39 fn bitand(self, rhs: Self) -> Self::Output {
40 Self(self.0 & rhs.0)
41 }
42}
43
44impl ops::BitOr for Ignoring {
45 type Output = Self;
46
47 fn bitor(self, rhs: Self) -> Self::Output {
48 Self(self.0 | rhs.0)
49 }
50}
51
52#[derive(Debug, Clone, PartialOrd, Ord)]
58pub struct QString {
59 inner: String,
61 flags: Ignoring,
63}
64
65impl fmt::Display for QString {
66 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67 write!(f, "/{}{}", self.inner, self.flags)
68 }
69}
70
71impl PartialEq for QString {
72 fn eq(&self, other: &Self) -> bool {
73 let to_icase = self.is_icase() || other.is_icase();
74 let to_iaccent = self.is_iaccent() || other.is_iaccent();
75 match (to_icase, to_iaccent) {
76 (true, true) => {
77 UniCase::new(QString::unaccent(&self.inner))
78 == UniCase::new(QString::unaccent(&other.inner))
79 }
80 (true, false) => UniCase::new(&self.inner) == UniCase::new(&other.inner),
81 (false, true) => QString::unaccent(&self.inner) == QString::unaccent(&other.inner),
82 (false, false) => self.inner == other.inner,
83 }
84 }
85}
86
87impl Eq for QString {}
88
89impl QString {
90 pub fn plain<S: Into<String>>(s: S) -> Self {
92 Self {
93 inner: s.into(),
94 flags: Ignoring::NEITHER,
95 }
96 }
97
98 pub fn and_icase(&self) -> Self {
101 Self {
102 inner: self.inner.to_owned(),
103 flags: self.flags.clone() | Ignoring::CASE,
104 }
105 }
106
107 pub fn and_iaccent(&self) -> Self {
110 Self {
111 inner: self.inner.to_owned(),
112 flags: self.flags.clone() | Ignoring::ACCENT,
113 }
114 }
115
116 pub fn as_str(&self) -> &str {
118 &self.inner
119 }
120
121 pub fn unaccent(s: &str) -> String {
124 if s.is_empty() {
125 return "".into();
126 }
127
128 let result: String = s.nfd().filter(|x| !is_combining_mark(*x)).nfc().collect();
129 result
130 }
131
132 pub(crate) fn flags(&self) -> u8 {
134 self.flags.0
135 }
136
137 pub(crate) fn inner(&self) -> &str {
139 &self.inner
140 }
141
142 #[allow(dead_code)]
144 pub(crate) fn is_plain(&self) -> bool {
145 self.flags.0 == 0
146 }
147
148 pub(crate) fn is_icase(&self) -> bool {
150 self.flags.0 % 2 == 1
151 }
152
153 pub(crate) fn is_iaccent(&self) -> bool {
155 self.flags.0 >= 2
156 }
157
158 pub(crate) fn like(input: &Self, pattern: &Self) -> bool {
160 const WC: char = '%';
161
162 fn recursive(input: &[char], pattern: &[char]) -> bool {
164 if pattern.is_empty() {
166 return input.is_empty();
167 }
168
169 if input.is_empty() {
170 return pattern.iter().all(|&x| x == '%');
171 }
172
173 if pattern[0] == '\\' && pattern.len() > 1 {
174 let escaped = pattern[1];
175 return recursive(&input[1..], &pattern[2..])
176 || (input[0] == escaped) && recursive(&input[1..], &pattern[2..]);
177 }
178
179 if pattern[0] == '%' {
180 return recursive(&input[1..], pattern) || recursive(input, &pattern[1..]);
181 }
182
183 if pattern[0] == '_' {
184 return recursive(&input[1..], &pattern[1..]);
185 }
186
187 (input[0] == pattern[0]) && recursive(&input[1..], &pattern[1..])
188 }
189
190 fn reduce_wildcards(pattern: &str) -> Vec<char> {
192 let mut result: Vec<char> = Vec::with_capacity(pattern.len());
193 let mut chars = pattern.chars();
194 let mut saw_uwc = false;
195 while let Some(c) = chars.next() {
196 let state = if c == '\\' {
197 result.push('\\');
198 if let Some(n) = chars.next() {
199 result.push(n);
200 }
201 false
202 } else if c == WC {
203 if !saw_uwc {
204 result.push(WC);
205 }
206 true
207 } else {
208 result.push(c);
209 false
210 };
211 saw_uwc = state;
212 }
213 result
214 }
215
216 let input_icase = input.is_icase();
218 let pattern_icase = pattern.is_icase();
219 let icase = input_icase || pattern_icase;
220 let input_iaccent = input.is_iaccent();
222 let pattern_iaccent = pattern.is_iaccent();
223 let iaccent = input_iaccent || pattern_iaccent;
224
225 let folded_input: Vec<char> = match (icase, iaccent) {
226 (true, true) => UniCase::unicode(QString::unaccent(&input.inner))
227 .to_folded_case()
228 .chars()
229 .collect(),
230 (true, false) => UniCase::unicode(input.inner.as_str())
231 .to_folded_case()
232 .chars()
233 .collect(),
234 (false, true) => QString::unaccent(&input.inner).as_str().chars().collect(),
235 (false, false) => input.inner.chars().collect(),
236 };
237
238 let binding1 = UniCase::unicode(QString::unaccent(&pattern.inner)).to_folded_case();
239 let binding2 = UniCase::unicode(&pattern.inner).to_folded_case();
240 let binding3 = QString::unaccent(&pattern.inner);
241 let folded_pattern = match (icase, iaccent) {
242 (true, true) => binding1.as_str(),
243 (true, false) => binding2.as_str(),
244 (false, true) => binding3.as_str(),
245 (false, false) => pattern.inner.as_str(),
246 };
247
248 let reduced_pattern = reduce_wildcards(folded_pattern);
250
251 recursive(&folded_input, &reduced_pattern)
252 }
253
254 #[cfg(test)]
256 pub fn iaccent(s: &str) -> Self {
257 Self {
258 inner: s.to_owned(),
259 flags: Ignoring::ACCENT,
260 }
261 }
262
263 #[cfg(test)]
265 pub fn icase(s: &str) -> Self {
266 Self {
267 inner: s.to_owned(),
268 flags: Ignoring::CASE,
269 }
270 }
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276 use rand::{
277 Rng,
278 distr::{
279 Alphanumeric,
280 uniform::{UniformChar, UniformSampler},
281 },
282 };
283 use tracing::debug;
284
285 #[test]
286 fn test_display() {
287 const S1: &str = "/chișinău/_";
288 const S2: &str = "/CHIȘINĂU/c";
289 const S3: &str = "/CHIȘINĂU/a";
290 const S5: &str = "/chișinău/b";
291
292 let s1 = QString::plain("chișinău");
293 assert!(s1.is_plain());
294 assert_eq!(s1.to_string(), S1);
295
296 let s2 = QString::icase("CHIȘINĂU");
297 assert!(s2.is_icase());
298 assert_eq!(s2.to_string(), S2);
299
300 let s3 = QString::iaccent("CHIȘINĂU");
301 assert!(s3.is_iaccent());
302 assert_eq!(s3.to_string(), S3);
303
304 let s4 = s1.and_icase();
305 assert!(s1.is_plain());
306 assert!(!s4.is_plain());
307 assert!(s4.is_icase());
308
309 let s5 = s4.and_iaccent();
310 assert_eq!(s5.to_string(), S5);
311 assert!(s5.is_icase());
312 assert!(s5.is_iaccent());
313 }
314
315 #[test]
316 fn test_equality() {
317 let s1 = QString::plain("chisinau");
318 let s2 = QString::icase("CHISINAU");
319 let s3 = QString::iaccent("chișinău");
320 let s4 = QString::iaccent("CHIȘINĂU").and_icase();
321 let s5 = QString::plain("CHISINAU").and_iaccent();
322
323 assert!(s1 == s2);
324 assert!(s3 == s4);
325 assert!(s4 == s5);
326
327 let s4 = s2.and_iaccent();
329 let s5 = s3.and_icase();
330
331 assert!(s1 == s3);
332 assert!(s1 == s4);
333 assert!(s1 == s5);
334
335 let s5 = s4.and_iaccent();
337 assert_eq!(s2, s4);
338 assert_eq!(s2, s5);
339 assert_eq!(s4, s5);
340 assert!(s5.is_icase());
341 assert!(s5.is_iaccent());
342 }
343
344 #[test]
345 fn test_unaccent() {
346 let slo = "chisinau";
347 let shi = "CHISINAU";
348 let aaaa = ["ẵ", "aͣ", "ą", "ǟ", "aₐ", "ắ"];
349 let nota = ["ɑ", "Ⓐ", "ⓐ", "æ", "ǽ", "ⱥ", "ᶏ", "ₐ"];
350
351 let iaccented = QString::unaccent("chișinău");
352 assert_eq!(iaccented, slo);
353
354 let iaccented = QString::unaccent("CHIȘINĂU");
355 assert_eq!(iaccented, shi);
356
357 let iaccented = QString::unaccent("Chiș%");
360 tracing::debug!("iaccented = '{iaccented}'");
361 assert_eq!(iaccented, "Chis%");
362
363 let iaccented = QString::unaccent("cHis%");
364 tracing::debug!("iaccented = '{iaccented}'");
365 assert_eq!(iaccented, "cHis%");
366
367 let a = QString::unaccent(&UniCase::new("chișinău%").to_folded_case());
370 tracing::debug!("a = '{a}'");
371 let b = UniCase::new(QString::unaccent("chișinău%")).to_folded_case();
372 tracing::debug!("b = '{b}'");
373 assert_eq!(a, b);
374
375 for c in aaaa.into_iter() {
377 let a = QString::unaccent(c);
378 assert!(a.starts_with('a'));
379 }
380 for c in nota.into_iter() {
381 let a = QString::unaccent(c);
382 assert!(!a.starts_with('a'));
383 }
384 }
385
386 fn starts_with_foo() -> String {
387 let mut rng = rand::rng();
388 let size: usize = rng.random_range(5..15);
389 let s = (0..size)
390 .map(|_| rng.sample(Alphanumeric) as char)
391 .collect();
392 let hit = rng.random_bool(0.25);
393 if hit { format!("Foo{s}") } else { s }
394 }
395
396 #[test]
397 fn test_like_small() {
398 let pattern = QString::icase("foo%");
399 for _ in 0..1000 {
400 let s = starts_with_foo();
401 if s.starts_with("Foo") {
402 let input = QString::icase(&s);
403 let result = QString::like(&input, &pattern);
404 if !result {
405 eprintln!("*** Was expecting '{s}' to succeed");
406 panic!("Ooops")
407 }
408 };
409 }
410 }
411
412 #[test]
413 fn test_like_capital() {
414 let pattern = QString::icase("FOO%");
415 for _ in 0..1000 {
416 let s = starts_with_foo();
417 if s.starts_with("Foo") {
418 let input = QString::icase(&s);
419 let result = QString::like(&input, &pattern);
420 if !result {
421 eprintln!("*** Was expecting '{s}' to succeed");
422 panic!("Ooops")
423 }
424 };
425 }
426 }
427
428 #[test]
429 fn test_nfkd() {
430 const S: &str = "ἄbc";
431
432 let r1: String = S
433 .chars()
434 .map(|c| UnicodeNormalization::nfkd(c).nth(0).unwrap())
435 .collect();
436 tracing::debug!("'{r1}'");
437 assert_eq!(r1, "αbc");
438
439 assert_eq!(QString::unaccent(S), r1);
440 }
441
442 #[test]
443 #[tracing_test::traced_test]
444 fn test_like_bench() {
445 fn random_latin_word() -> String {
447 let mut rng = rand::rng();
448 let len: usize = Rng::random_range(&mut rng, 5..10);
449 let dist = UniformChar::new_inclusive('\u{0041}', '\u{024F}').unwrap();
450 (0..len).map(|_| dist.sample(&mut rng)).collect()
451 }
452
453 const PATTERN: &str = "Ä%%";
454 let pattern = QString::plain(PATTERN).and_iaccent().and_icase();
455 for _ in 0..1000 {
456 let raw = random_latin_word();
457 let cooked = raw
458 .nfd()
459 .filter(|x| !is_combining_mark(*x))
460 .nfc()
461 .collect::<String>();
462 let ricotta = UniCase::unicode(&cooked).to_folded_case();
463 let expected = ricotta.starts_with('a');
464 let input = QString::plain(&raw).and_icase().and_iaccent();
465 let actual = QString::like(&input, &pattern);
466 if actual != expected {
467 debug!(" raw: '{raw}' {}", raw.escape_unicode());
468 debug!(" cotta: '{cooked}' {}", cooked.escape_unicode());
469 debug!("ricotta: '{ricotta}' {}", ricotta.escape_unicode());
470 panic!(
471 "IA(IC({input})) LIKE IC(IA({pattern})) is {actual} but expected {expected}"
472 );
473 }
474 }
475 }
476}