without_ats/
lib.rs

1//! # without_ats
2//!
3//! A one-function library with a function to remove every occurence of `"@...@"` from
4//! a string.
5//!
6//! This is useful for us to remove the diacritics from analyser output.
7
8/// Return an iterator of all the string slices of `s` that are not "enclosed" in "@..@".
9///
10/// Example:
11///
12/// ```
13/// use without_ats::without_ats_iter;
14///
15/// let clean = without_ats_iter("@AAA@Clean@BBB@String@CCC@");
16/// assert_eq!(String::from_iter(clean), String::from("CleanString"));
17/// ```
18pub fn without_ats_iter(s: &str) -> impl Iterator<Item = &str> {
19    let mut i = 0;
20    let mut it = memchr::memchr_iter(b'@', s.as_bytes());
21
22    std::iter::from_fn(move || {
23        if s.is_empty() {
24            return None;
25        }
26
27        let lasti = s.len() - 1;
28
29        loop {
30            if i >= s.len() {
31                return None;
32            }
33
34            // find opening '@'
35            let Some(a) = it.next() else {
36                // no next opening '@', set done and return remainder of string
37                let res = Some(&s[i..]);
38                i = s.len();
39                return res;
40            };
41
42            // The next slice we potentially want to yield, goes from the previous `i`,
43            // up to the opening '@'. Note: This may be empty, if a == i!
44            let next_slice = &s[i..a];
45
46            if a == lasti {
47                // opening '@' was end of string
48                i = s.len();
49                return None;
50            }
51
52            // Find terminating '@'
53            let Some(b) = it.next() else {
54                // unterminated @, return what we have
55                i = s.len();
56                if !next_slice.is_empty() {
57                    return Some(next_slice);
58                } else {
59                    return None;
60                }
61            };
62
63            // now we have something to skip
64            // Next, we continue from one plus b.
65            i = b + 1;
66
67            // We check if we have a slice to yield, and if not, then we just
68            // continue the loop from the beginning, and look for a new slice
69            if !next_slice.is_empty() {
70                return Some(next_slice);
71            } else {
72                continue;
73            }
74        }
75    })
76}
77
78/// Return a new `String` from the input `s`, where everything enclosed in `@...@` has
79/// been removed from `s`.
80/// Example:
81///
82/// ```
83/// use without_ats::without_ats;
84///
85/// let clean = without_ats("@AAA@Clean@BBB@String@CCC@");
86/// assert_eq!(clean, String::from("CleanString"));
87/// ```
88pub fn without_ats(s: &str) -> String {
89    String::from_iter(without_ats_iter(s))
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95
96    #[test]
97    fn empty() {
98        let input = "";
99        let expected = "";
100        let actual = without_ats(input);
101        assert_eq!(expected, actual);
102    }
103
104    #[test]
105    fn real() {
106        let input = "@P.12p.add@viessat+V+IV+Imprt+Du1@R.12p.add@@D.CmpOnly.FALSE@@D.CmpPref.TRUE@@D.NeedNoun.ON@@D.SpellRlx.ON@@C.SpellRlx@@D.SpaceCmp.ON@@C.SpaceCmp@";
107        let expected = "viessat+V+IV+Imprt+Du1";
108        let actual = without_ats(input);
109        assert_eq!(expected, actual);
110    }
111}