1#![warn(missing_docs)]
4
5use core::fmt;
10use std::ops;
11use unicase::UniCase;
12use unicode_normalization::{UnicodeNormalization, char::is_combining_mark};
13
14#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
17pub(crate) struct Ignoring(u8);
18
19impl Ignoring {
20 const NEITHER: Self = Self(0);
21 const CASE: Self = Self(1);
22 const ACCENT: Self = Self(2);
23}
24
25impl fmt::Display for Ignoring {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 match *self {
28 Ignoring::NEITHER => write!(f, "/_"),
29 Ignoring::CASE => write!(f, "/c"),
30 Ignoring::ACCENT => write!(f, "/a"),
31 _ => write!(f, "/b"),
32 }
33 }
34}
35
36impl ops::BitAnd for Ignoring {
37 type Output = Self;
38
39 fn bitand(self, rhs: Self) -> Self::Output {
40 Self(self.0 & rhs.0)
41 }
42}
43
44impl ops::BitOr for Ignoring {
45 type Output = Self;
46
47 fn bitor(self, rhs: Self) -> Self::Output {
48 Self(self.0 | rhs.0)
49 }
50}
51
52#[derive(Debug, Clone, PartialOrd, Ord)]
56pub struct QString {
57 inner: String,
59 flags: Ignoring,
61}
62
63impl fmt::Display for QString {
64 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65 write!(f, "/{}{}", self.inner, self.flags)
66 }
67}
68
69impl PartialEq for QString {
70 fn eq(&self, other: &Self) -> bool {
71 let to_icase = self.is_icase() || other.is_icase();
72 let to_iaccent = self.is_iaccent() || other.is_iaccent();
73 match (to_icase, to_iaccent) {
74 (true, true) => {
75 UniCase::new(QString::unaccent(&self.inner))
76 == UniCase::new(QString::unaccent(&other.inner))
77 }
78 (true, false) => UniCase::new(&self.inner) == UniCase::new(&other.inner),
79 (false, true) => QString::unaccent(&self.inner) == QString::unaccent(&other.inner),
80 (false, false) => self.inner == other.inner,
81 }
82 }
83}
84
85impl Eq for QString {}
86
87impl QString {
88 pub fn plain<S: Into<String>>(s: S) -> Self {
90 Self {
91 inner: s.into(),
92 flags: Ignoring::NEITHER,
93 }
94 }
95
96 pub fn and_icase(&self) -> Self {
99 Self {
100 inner: self.inner.to_owned(),
101 flags: self.flags.clone() | Ignoring::CASE,
102 }
103 }
104
105 pub fn and_iaccent(&self) -> Self {
108 Self {
109 inner: self.inner.to_owned(),
110 flags: self.flags.clone() | Ignoring::ACCENT,
111 }
112 }
113
114 pub fn as_str(&self) -> &str {
116 &self.inner
117 }
118
119 pub fn unaccent(s: &str) -> String {
122 if s.is_empty() {
123 return "".into();
124 }
125
126 let result: String = s.nfd().filter(|x| !is_combining_mark(*x)).nfc().collect();
127 result
128 }
129
130 pub(crate) fn flags(&self) -> u8 {
132 self.flags.0
133 }
134
135 pub(crate) fn inner(&self) -> &str {
137 &self.inner
138 }
139
140 #[allow(dead_code)]
142 pub(crate) fn is_plain(&self) -> bool {
143 self.flags.0 == 0
144 }
145
146 pub(crate) fn is_icase(&self) -> bool {
148 self.flags.0 % 2 == 1
149 }
150
151 pub(crate) fn is_iaccent(&self) -> bool {
153 self.flags.0 >= 2
154 }
155
156 pub(crate) fn like(input: &Self, pattern: &Self) -> bool {
158 const WC: char = '%';
159
160 fn recursive(input: &[char], pattern: &[char]) -> bool {
162 if pattern.is_empty() {
164 return input.is_empty();
165 }
166
167 if input.is_empty() {
168 return pattern.iter().all(|&x| x == '%');
169 }
170
171 if pattern[0] == '\\' && pattern.len() > 1 {
172 let escaped = pattern[1];
173 return recursive(&input[1..], &pattern[2..])
174 || (input[0] == escaped) && recursive(&input[1..], &pattern[2..]);
175 }
176
177 if pattern[0] == '%' {
178 return recursive(&input[1..], pattern) || recursive(input, &pattern[1..]);
179 }
180
181 if pattern[0] == '_' {
182 return recursive(&input[1..], &pattern[1..]);
183 }
184
185 (input[0] == pattern[0]) && recursive(&input[1..], &pattern[1..])
186 }
187
188 fn reduce_wildcards(pattern: &str) -> Vec<char> {
190 let mut result: Vec<char> = Vec::with_capacity(pattern.len());
191 let mut chars = pattern.chars();
192 let mut saw_uwc = false;
193 while let Some(c) = chars.next() {
194 let state = if c == '\\' {
195 result.push('\\');
196 if let Some(n) = chars.next() {
197 result.push(n);
198 }
199 false
200 } else if c == WC {
201 if !saw_uwc {
202 result.push(WC);
203 }
204 true
205 } else {
206 result.push(c);
207 false
208 };
209 saw_uwc = state;
210 }
211 result
212 }
213
214 let input_icase = input.is_icase();
216 let pattern_icase = pattern.is_icase();
217 let icase = input_icase || pattern_icase;
218 let input_iaccent = input.is_iaccent();
220 let pattern_iaccent = pattern.is_iaccent();
221 let iaccent = input_iaccent || pattern_iaccent;
222
223 let folded_input: Vec<char> = match (icase, iaccent) {
224 (true, true) => UniCase::unicode(QString::unaccent(&input.inner))
225 .to_folded_case()
226 .chars()
227 .collect(),
228 (true, false) => UniCase::unicode(input.inner.as_str())
229 .to_folded_case()
230 .chars()
231 .collect(),
232 (false, true) => QString::unaccent(&input.inner).as_str().chars().collect(),
233 (false, false) => input.inner.chars().collect(),
234 };
235
236 let binding1 = UniCase::unicode(QString::unaccent(&pattern.inner)).to_folded_case();
237 let binding2 = UniCase::unicode(&pattern.inner).to_folded_case();
238 let binding3 = QString::unaccent(&pattern.inner);
239 let folded_pattern = match (icase, iaccent) {
240 (true, true) => binding1.as_str(),
241 (true, false) => binding2.as_str(),
242 (false, true) => binding3.as_str(),
243 (false, false) => pattern.inner.as_str(),
244 };
245
246 let reduced_pattern = reduce_wildcards(folded_pattern);
248
249 recursive(&folded_input, &reduced_pattern)
250 }
251
252 #[cfg(test)]
254 pub fn iaccent(s: &str) -> Self {
255 Self {
256 inner: s.to_owned(),
257 flags: Ignoring::ACCENT,
258 }
259 }
260
261 #[cfg(test)]
263 pub fn icase(s: &str) -> Self {
264 Self {
265 inner: s.to_owned(),
266 flags: Ignoring::CASE,
267 }
268 }
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274 use rand::{
275 Rng,
276 distr::{
277 Alphanumeric,
278 uniform::{UniformChar, UniformSampler},
279 },
280 };
281 use tracing::debug;
282
283 #[test]
284 fn test_display() {
285 const S1: &str = "/chișinău/_";
286 const S2: &str = "/CHIȘINĂU/c";
287 const S3: &str = "/CHIȘINĂU/a";
288 const S5: &str = "/chișinău/b";
289
290 let s1 = QString::plain("chișinău");
291 assert!(s1.is_plain());
292 assert_eq!(s1.to_string(), S1);
293
294 let s2 = QString::icase("CHIȘINĂU");
295 assert!(s2.is_icase());
296 assert_eq!(s2.to_string(), S2);
297
298 let s3 = QString::iaccent("CHIȘINĂU");
299 assert!(s3.is_iaccent());
300 assert_eq!(s3.to_string(), S3);
301
302 let s4 = s1.and_icase();
303 assert!(s1.is_plain());
304 assert!(!s4.is_plain());
305 assert!(s4.is_icase());
306
307 let s5 = s4.and_iaccent();
308 assert_eq!(s5.to_string(), S5);
309 assert!(s5.is_icase());
310 assert!(s5.is_iaccent());
311 }
312
313 #[test]
314 fn test_equality() {
315 let s1 = QString::plain("chisinau");
316 let s2 = QString::icase("CHISINAU");
317 let s3 = QString::iaccent("chișinău");
318 let s4 = QString::iaccent("CHIȘINĂU").and_icase();
319 let s5 = QString::plain("CHISINAU").and_iaccent();
320
321 assert!(s1 == s2);
322 assert!(s3 == s4);
323 assert!(s4 == s5);
324
325 let s4 = s2.and_iaccent();
327 let s5 = s3.and_icase();
328
329 assert!(s1 == s3);
330 assert!(s1 == s4);
331 assert!(s1 == s5);
332
333 let s5 = s4.and_iaccent();
335 assert_eq!(s2, s4);
336 assert_eq!(s2, s5);
337 assert_eq!(s4, s5);
338 assert!(s5.is_icase());
339 assert!(s5.is_iaccent());
340 }
341
342 #[test]
343 fn test_unaccent() {
344 let slo = "chisinau";
345 let shi = "CHISINAU";
346 let aaaa = ["ẵ", "aͣ", "ą", "ǟ", "aₐ", "ắ"];
347 let nota = ["ɑ", "Ⓐ", "ⓐ", "æ", "ǽ", "ⱥ", "ᶏ", "ₐ"];
348
349 let iaccented = QString::unaccent("chișinău");
350 assert_eq!(iaccented, slo);
351
352 let iaccented = QString::unaccent("CHIȘINĂU");
353 assert_eq!(iaccented, shi);
354
355 let iaccented = QString::unaccent("Chiș%");
358 tracing::debug!("iaccented = '{iaccented}'");
359 assert_eq!(iaccented, "Chis%");
360
361 let iaccented = QString::unaccent("cHis%");
362 tracing::debug!("iaccented = '{iaccented}'");
363 assert_eq!(iaccented, "cHis%");
364
365 let a = QString::unaccent(&UniCase::new("chișinău%").to_folded_case());
368 tracing::debug!("a = '{a}'");
369 let b = UniCase::new(QString::unaccent("chișinău%")).to_folded_case();
370 tracing::debug!("b = '{b}'");
371 assert_eq!(a, b);
372
373 for c in aaaa.into_iter() {
375 let a = QString::unaccent(c);
376 assert!(a.starts_with('a'));
377 }
378 for c in nota.into_iter() {
379 let a = QString::unaccent(c);
380 assert!(!a.starts_with('a'));
381 }
382 }
383
384 fn starts_with_foo() -> String {
385 let mut rng = rand::rng();
386 let size: usize = rng.random_range(5..15);
387 let s = (0..size)
388 .map(|_| rng.sample(Alphanumeric) as char)
389 .collect();
390 let hit = rng.random_bool(0.25);
391 if hit { format!("Foo{s}") } else { s }
392 }
393
394 #[test]
395 fn test_like_small() {
396 let pattern = QString::icase("foo%");
397 for _ in 0..1000 {
398 let s = starts_with_foo();
399 if s.starts_with("Foo") {
400 let input = QString::icase(&s);
401 let result = QString::like(&input, &pattern);
402 if !result {
403 panic!("Ooops! Was expecting '{s}' to succeed")
404 }
405 };
406 }
407 }
408
409 #[test]
410 fn test_like_capital() {
411 let pattern = QString::icase("FOO%");
412 for _ in 0..1000 {
413 let s = starts_with_foo();
414 if s.starts_with("Foo") {
415 let input = QString::icase(&s);
416 let result = QString::like(&input, &pattern);
417 if !result {
418 panic!("Ooops! Was expecting '{s}' to succeed")
419 }
420 };
421 }
422 }
423
424 #[test]
425 fn test_nfkd() {
426 const S: &str = "ἄbc";
427
428 let r1: String = S
429 .chars()
430 .map(|c| UnicodeNormalization::nfkd(c).nth(0).unwrap())
431 .collect();
432 tracing::debug!("'{r1}'");
433 assert_eq!(r1, "αbc");
434
435 assert_eq!(QString::unaccent(S), r1);
436 }
437
438 #[test]
439 fn test_like_bench() {
440 fn random_latin_word() -> String {
442 let mut rng = rand::rng();
443 let len: usize = Rng::random_range(&mut rng, 5..10);
444 let dist = UniformChar::new_inclusive('\u{0041}', '\u{024F}').unwrap();
445 (0..len).map(|_| dist.sample(&mut rng)).collect()
446 }
447
448 const PATTERN: &str = "Ä%%";
449 let pattern = QString::plain(PATTERN).and_iaccent().and_icase();
450 for _ in 0..1000 {
451 let raw = random_latin_word();
452 let cooked = raw
453 .nfd()
454 .filter(|x| !is_combining_mark(*x))
455 .nfc()
456 .collect::<String>();
457 let ricotta = UniCase::unicode(&cooked).to_folded_case();
458 let expected = ricotta.starts_with('a');
459 let input = QString::plain(&raw).and_icase().and_iaccent();
460 let actual = QString::like(&input, &pattern);
461 if actual != expected {
462 debug!(" raw: '{raw}' {}", raw.escape_unicode());
463 debug!(" cotta: '{cooked}' {}", cooked.escape_unicode());
464 debug!("ricotta: '{ricotta}' {}", ricotta.escape_unicode());
465 panic!(
466 "IA(IC({input})) LIKE IC(IA({pattern})) is {actual} but expected {expected}"
467 );
468 }
469 }
470 }
471}