ogc_cql2/
qstring.rs

1// SPDX-License-Identifier: Apache-2.0
2
3#![warn(missing_docs)]
4
5//! CQL2 friendly string type that caters for a literal character sequence to
6//! be used as-is or in a case-insensitive way.
7//!
8
9use core::fmt;
10use std::ops;
11use unicase::UniCase;
12use unicode_normalization::{UnicodeNormalization, char::is_combining_mark};
13
14/// Flags to indicate how to handle a given literal string; i.e. whether to
15/// ignore its case, its accents, ignore both, or use as is.
16#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
17struct Ignoring(u8);
18
19impl Ignoring {
20    const NOTHING: Self = Self(0);
21    const CASE: Self = Self(1);
22    const ACCENT: Self = Self(2);
23}
24
25impl fmt::Display for Ignoring {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        match *self {
28            Ignoring::NOTHING => write!(f, "/_"),
29            Ignoring::CASE => write!(f, "/c"),
30            Ignoring::ACCENT => write!(f, "/a"),
31            _ => write!(f, "/b"),
32        }
33    }
34}
35
36impl ops::BitAnd for Ignoring {
37    type Output = Self;
38
39    fn bitand(self, rhs: Self) -> Self::Output {
40        Self(self.0 & rhs.0)
41    }
42}
43
44impl ops::BitOr for Ignoring {
45    type Output = Self;
46
47    fn bitor(self, rhs: Self) -> Self::Output {
48        Self(self.0 | rhs.0)
49    }
50}
51
52/// String based type used by [Queryables][1] to represent a plain string, and
53/// a set of flags to indicate how to use it in case and/or accent insensitive
54/// contexts.
55///
56/// [1]: crate::queryable::Q
57#[derive(Debug, Clone, PartialOrd, Ord)]
58pub struct QString {
59    /// String literal.
60    inner: String,
61    /// How to use in case and accent sensitive contexts.
62    flags: Ignoring,
63}
64
65impl fmt::Display for QString {
66    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67        write!(f, "/{}{}", self.inner, self.flags)
68    }
69}
70
71impl PartialEq for QString {
72    fn eq(&self, other: &Self) -> bool {
73        let to_icase = self.is_icase() || other.is_icase();
74        let to_iaccent = self.is_iaccent() || other.is_iaccent();
75        match (to_icase, to_iaccent) {
76            (true, true) => {
77                UniCase::new(QString::unaccent(&self.inner))
78                    == UniCase::new(QString::unaccent(&other.inner))
79            }
80            (true, false) => UniCase::new(&self.inner) == UniCase::new(&other.inner),
81            (false, true) => QString::unaccent(&self.inner) == QString::unaccent(&other.inner),
82            (false, false) => self.inner == other.inner,
83        }
84    }
85}
86
87impl Eq for QString {}
88
89impl QString {
90    /// Constructor for a plain instance.
91    pub fn plain(s: &str) -> Self {
92        Self {
93            inner: s.to_owned(),
94            flags: Ignoring::NOTHING,
95        }
96    }
97
98    /// Create a new instance from `self` w/ the added ICASE (ignore case) flag
99    /// set.
100    pub fn and_icase(&self) -> Self {
101        Self {
102            inner: self.inner.to_owned(),
103            flags: self.flags.clone() | Ignoring::CASE,
104        }
105    }
106
107    /// Create a new instance from `self` w/ the added IACCENT (ignore accent)
108    /// flag set.
109    pub fn and_iaccent(&self) -> Self {
110        Self {
111            inner: self.inner.to_owned(),
112            flags: self.flags.clone() | Ignoring::ACCENT,
113        }
114    }
115
116    /// Return the inner raw string.
117    pub fn as_str(&self) -> &str {
118        &self.inner
119    }
120
121    /// Return a new string from the given argument w/ all Unicode 'Mn' (Combining
122    /// Mark) codepoints removed.
123    pub fn unaccent(s: &str) -> String {
124        if s.is_empty() {
125            return "".into();
126        }
127
128        let result: String = s.nfd().filter(|x| !is_combining_mark(*x)).nfc().collect();
129        result
130    }
131
132    /// Return TRUE if this is a case-insensitive string; FALSE otherwise.
133    fn is_icase(&self) -> bool {
134        self.flags.0 % 2 == 1
135    }
136
137    /// Return TRUE if this is an accent-insensitive string; FALSE otherwise.
138    fn is_iaccent(&self) -> bool {
139        self.flags.0 >= 2
140    }
141
142    /// Whether `input` is matches the LIKE `pattern`.
143    pub(crate) fn like(input: &Self, pattern: &Self) -> bool {
144        // recursively compare 2 sub-strings, 1 char at a time...
145        fn recursive(input: &[char], pattern: &[char]) -> bool {
146            // w/ an empty pattern, only empty input matches...
147            if pattern.is_empty() {
148                return input.is_empty();
149            }
150
151            if input.is_empty() {
152                return pattern.iter().all(|&x| x == '%');
153            }
154
155            if pattern[0] == '\\' && pattern.len() > 1 {
156                let escaped = pattern[1];
157                return recursive(&input[1..], &pattern[2..])
158                    || (input[0] == escaped) && recursive(&input[1..], &pattern[2..]);
159            }
160
161            if pattern[0] == '%' {
162                return recursive(&input[1..], pattern) || recursive(input, &pattern[1..]);
163            }
164
165            if pattern[0] == '_' {
166                return recursive(&input[1..], &pattern[1..]);
167            }
168
169            (input[0] == pattern[0]) && recursive(&input[1..], &pattern[1..])
170        }
171
172        // case-insensitive mode kicks in when either arguments is unicase.
173        let input_icase = input.is_icase();
174        let pattern_icase = pattern.is_icase();
175        let icase = input_icase || pattern_icase;
176        // same deal w/ ignore-accents...
177        let input_iaccent = input.is_iaccent();
178        let pattern_iaccent = pattern.is_iaccent();
179        let iaccent = input_iaccent || pattern_iaccent;
180
181        let folded_input: Vec<char> = match (icase, iaccent) {
182            (true, true) => {
183                // compare ignoring case + accent...
184                UniCase::unicode(QString::unaccent(&input.inner))
185                    .to_folded_case()
186                    .chars()
187                    .collect()
188            }
189            (true, false) => {
190                // compare ignoring case only...
191                UniCase::unicode(input.inner.as_str())
192                    .to_folded_case()
193                    .chars()
194                    .collect()
195            }
196            (false, true) => {
197                // compare ignoring accents only...
198                QString::unaccent(&input.inner).as_str().chars().collect()
199            }
200            (false, false) => {
201                // plain strings all the way...
202                input.inner.chars().collect()
203            }
204        };
205
206        let folded_pattern: Vec<char> = match (icase, iaccent) {
207            (true, true) => UniCase::unicode(QString::unaccent(&pattern.inner))
208                .to_folded_case()
209                .chars()
210                .collect(),
211            (true, false) => UniCase::unicode(&pattern.inner)
212                .to_folded_case()
213                .chars()
214                .collect(),
215            (false, true) => QString::unaccent(&pattern.inner).chars().collect(),
216            (false, false) => pattern.inner.as_str().chars().collect(),
217        };
218
219        recursive(&folded_input, &folded_pattern)
220    }
221
222    /// Return TRUE if this is a plain string; FALSE otherwise.
223    #[cfg(test)]
224    fn is_plain(&self) -> bool {
225        self.flags.0 == 0
226    }
227
228    /// Constructor for an accent-insensitive instance.
229    #[cfg(test)]
230    pub fn iaccent(s: &str) -> Self {
231        Self {
232            inner: s.to_owned(),
233            flags: Ignoring::ACCENT,
234        }
235    }
236
237    /// Constructor for a case-insensitive instance.
238    #[cfg(test)]
239    pub fn icase(s: &str) -> Self {
240        Self {
241            inner: s.to_owned(),
242            flags: Ignoring::CASE,
243        }
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250    use rand::{Rng, distr::Alphanumeric};
251
252    #[test]
253    // #[tracing_test::traced_test]
254    fn test_display() {
255        const S1: &str = "/chișinău/_";
256        const S2: &str = "/CHIȘINĂU/c";
257        const S3: &str = "/CHIȘINĂU/a";
258        const S5: &str = "/chișinău/b";
259
260        let s1 = QString::plain("chișinău");
261        assert!(s1.is_plain());
262        assert_eq!(s1.to_string(), S1);
263
264        let s2 = QString::icase("CHIȘINĂU");
265        assert!(s2.is_icase());
266        assert_eq!(s2.to_string(), S2);
267
268        let s3 = QString::iaccent("CHIȘINĂU");
269        assert!(s3.is_iaccent());
270        assert_eq!(s3.to_string(), S3);
271
272        let s4 = s1.and_icase();
273        assert!(s1.is_plain());
274        assert!(!s4.is_plain());
275        assert!(s4.is_icase());
276
277        let s5 = s4.and_iaccent();
278        assert_eq!(s5.to_string(), S5);
279        assert!(s5.is_icase());
280        assert!(s5.is_iaccent());
281    }
282
283    #[test]
284    // #[tracing_test::traced_test]
285    fn test_equality() {
286        let s1 = QString::plain("chisinau");
287        let s2 = QString::icase("CHISINAU");
288        let s3 = QString::iaccent("chișinău");
289        let s4 = QString::iaccent("CHIȘINĂU").and_icase();
290        let s5 = QString::plain("CHISINAU").and_iaccent();
291
292        assert!(s1 == s2);
293        assert!(s3 == s4);
294        assert!(s4 == s5);
295
296        // all together now...
297        let s4 = s2.and_iaccent();
298        let s5 = s3.and_icase();
299
300        assert!(s1 == s3);
301        assert!(s1 == s4);
302        assert!(s1 == s5);
303
304        // remain valid after same bit set multiple times...
305        let s5 = s4.and_iaccent();
306        assert_eq!(s2, s4);
307        assert_eq!(s2, s5);
308        assert_eq!(s4, s5);
309        assert!(s5.is_icase());
310        assert!(s5.is_iaccent());
311    }
312
313    #[test]
314    // #[tracing_test::traced_test]
315    fn test_unaccent() {
316        let slo = "chisinau";
317        let shi = "CHISINAU";
318
319        let iaccented = QString::unaccent("chișinău");
320        // tracing::debug!("iaccented = '{iaccented}'");
321        assert_eq!(iaccented, slo);
322
323        let iaccented = QString::unaccent("CHIȘINĂU");
324        // tracing::debug!("iaccented = '{iaccented}'");
325        assert_eq!(iaccented, shi);
326
327        // now test when LIKE wildcard characters are included...
328
329        let iaccented = QString::unaccent("Chiș%");
330        tracing::debug!("iaccented = '{iaccented}'");
331        assert_eq!(iaccented, "Chis%");
332
333        let iaccented = QString::unaccent("cHis%");
334        tracing::debug!("iaccented = '{iaccented}'");
335        assert_eq!(iaccented, "cHis%");
336
337        // ...and when combined w/ icase...
338
339        let a = QString::unaccent(&UniCase::new("chișinău%").to_folded_case());
340        tracing::debug!("a = '{a}'");
341        let b = UniCase::new(QString::unaccent("chișinău%")).to_folded_case();
342        tracing::debug!("b = '{b}'");
343        assert_eq!(a, b);
344    }
345
346    fn starts_with_foo() -> String {
347        let size: usize = rand::rng().random_range(5..15);
348        let s = (0..size)
349            .map(|_| rand::rng().sample(Alphanumeric) as char)
350            .collect();
351        let hit = rand::rng().random_bool(0.25);
352        if hit { format!("Foo{s}") } else { s }
353    }
354
355    #[test]
356    // #[tracing_test::traced_test]
357    fn test_like_small() {
358        let pattern = QString::icase("foo%");
359        for _ in 0..1000 {
360            let s = starts_with_foo();
361            if s.starts_with("Foo") {
362                let input = QString::icase(&s);
363                let result = QString::like(&input, &pattern);
364                if !result {
365                    eprintln!("*** Was expecting '{s}' to succeed");
366                    panic!("Ooops")
367                }
368            };
369        }
370    }
371
372    #[test]
373    // #[tracing_test::traced_test]
374    fn test_like_capital() {
375        let pattern = QString::icase("FOO%");
376        for _ in 0..1000 {
377            let s = starts_with_foo();
378            if s.starts_with("Foo") {
379                let input = QString::icase(&s);
380                let result = QString::like(&input, &pattern);
381                if !result {
382                    eprintln!("*** Was expecting '{s}' to succeed");
383                    panic!("Ooops")
384                }
385            };
386        }
387    }
388
389    #[test]
390    fn test_nfkd() {
391        const S: &str = "ἄbc";
392
393        let r1: String = S
394            .chars()
395            .map(|c| UnicodeNormalization::nfkd(c).nth(0).unwrap())
396            .collect();
397        tracing::debug!("'{r1}'");
398        assert_eq!(r1, "αbc");
399
400        assert_eq!(QString::unaccent(S), r1);
401    }
402}