without_ats/lib.rs
1//! # without_ats
2//!
3//! A one-function library with a function to remove every occurence of `"@...@"` from
4//! a string.
5//!
6//! This is useful for us to remove the diacritics from analyser output.
7
8/// Return an iterator of all the string slices of `s` that are not "enclosed" in "@..@".
9///
10/// Example:
11///
12/// ```
13/// use without_ats::without_ats_iter;
14///
15/// let clean = without_ats_iter("@AAA@Clean@BBB@String@CCC@");
16/// assert_eq!(String::from_iter(clean), String::from("CleanString"));
17/// ```
18pub fn without_ats_iter(s: &str) -> impl Iterator<Item = &str> {
19 let mut i = 0;
20 let mut it = memchr::memchr_iter(b'@', s.as_bytes());
21
22 std::iter::from_fn(move || {
23 if s.is_empty() {
24 return None;
25 }
26
27 let lasti = s.len() - 1;
28
29 loop {
30 if i >= s.len() {
31 return None;
32 }
33
34 // find opening '@'
35 let Some(a) = it.next() else {
36 // no next opening '@', set done and return remainder of string
37 let res = Some(&s[i..]);
38 i = s.len();
39 return res;
40 };
41
42 // The next slice we potentially want to yield, goes from the previous `i`,
43 // up to the opening '@'. Note: This may be empty, if a == i!
44 let next_slice = &s[i..a];
45
46 if a == lasti {
47 // opening '@' was end of string
48 i = s.len();
49 return None;
50 }
51
52 // Find terminating '@'
53 let Some(b) = it.next() else {
54 // unterminated @, return what we have
55 i = s.len();
56 if !next_slice.is_empty() {
57 return Some(next_slice);
58 } else {
59 return None;
60 }
61 };
62
63 // now we have something to skip
64 // Next, we continue from one plus b.
65 i = b + 1;
66
67 // We check if we have a slice to yield, and if not, then we just
68 // continue the loop from the beginning, and look for a new slice
69 if !next_slice.is_empty() {
70 return Some(next_slice);
71 } else {
72 continue;
73 }
74 }
75 })
76}
77
78/// Return a new `String` from the input `s`, where everything enclosed in `@...@` has
79/// been removed from `s`.
80/// Example:
81///
82/// ```
83/// use without_ats::without_ats;
84///
85/// let clean = without_ats("@AAA@Clean@BBB@String@CCC@");
86/// assert_eq!(clean, String::from("CleanString"));
87/// ```
88pub fn without_ats(s: &str) -> String {
89 String::from_iter(without_ats_iter(s))
90}
91
92#[cfg(test)]
93mod tests {
94 use super::*;
95
96 #[test]
97 fn empty() {
98 let input = "";
99 let expected = "";
100 let actual = without_ats(input);
101 assert_eq!(expected, actual);
102 }
103
104 #[test]
105 fn real() {
106 let input = "@P.12p.add@viessat+V+IV+Imprt+Du1@R.12p.add@@D.CmpOnly.FALSE@@D.CmpPref.TRUE@@D.NeedNoun.ON@@D.SpellRlx.ON@@C.SpellRlx@@D.SpaceCmp.ON@@C.SpaceCmp@";
107 let expected = "viessat+V+IV+Imprt+Du1";
108 let actual = without_ats(input);
109 assert_eq!(expected, actual);
110 }
111}