substr_iterator/
lib.rs

1//! This library is made to iterate over a `&str` by a number of characters without allocating.
2//!
3//! ```rust
4//! use substr_iterator::{Trigram, TrigramIter};
5//!
6//! let mut iter = TrigramIter::from("whatever");
7//! assert_eq!(iter.next(), Some(['w', 'h', 'a']));
8//! let mut iter = TrigramIter::from("今天我吃饭");
9//! assert_eq!(iter.next(), Some(['今', '天', '我']));
10//! ```
11//!
12//! It's also possible to handle bigger windows.
13//!
14//! ```rust
15//! use substr_iterator::{Substr, SubstrIter};
16//!
17//! let mut iter = SubstrIter::<2>::from("whatever");
18//! assert_eq!(iter.next(), Some(['w', 'h']));
19//! let mut iter = SubstrIter::<2>::from("今天我吃饭");
20//! assert_eq!(iter.next(), Some(['今', '天']));
21//! ```
22//!
23//! When the `std` feature is enabled, the [SubstrWrapper] allows to display [Substr] as a [String].
24//!
25//! ```rust
26//! use substr_iterator::{SubstrWrapper, Trigram, TrigramIter};
27//!
28//! let mut iter = TrigramIter::from("whatever");
29//! let item = SubstrWrapper(iter.next().unwrap());
30//! assert_eq!(item.to_string(), "wha");
31//! ```
32//!
33//! When the `serde` feature is enabled, the [SubstrWrapper] allows to serialize and deserialize.
34//!
35//! ```rust
36//! use substr_iterator::{SubstrWrapper, Trigram, TrigramIter};
37//!
38//! let data: Vec<SubstrWrapper<3>> = vec![
39//!     SubstrWrapper(['a', 'b', 'c']),
40//!     SubstrWrapper(['今', '天', '我']),
41//! ];
42//! assert_eq!(
43//!     serde_json::to_string(&data).unwrap(),
44//!     "[\"abc\",\"今天我\"]",
45//! );
46//! ```
47
48/// A set of N characters stored as an array.
49pub type Substr<const N: usize> = [char; N];
50
51/// A set of 3 characters stored as an array.
52pub type Trigram = Substr<3>;
53
54/// An iterator for only 3 characters. This is just an alias to [SubstrIter].
55pub type TrigramIter<'a> = SubstrIter<'a, 3>;
56
57/// This is an iterator that allows to take a number of characters out of a string
58/// and iterate like a window.
59///
60/// ```rust
61/// let mut iter = substr_iterator::TrigramIter::from("whatever");
62/// ```
63pub struct SubstrIter<'a, const N: usize> {
64    iter: std::str::Chars<'a>,
65    cache: [char; N],
66    index: usize,
67}
68
69impl<'a, const N: usize> From<&'a str> for SubstrIter<'a, N> {
70    fn from(origin: &'a str) -> Self {
71        let mut iter = origin.chars();
72        let mut cache = ['\0'; N];
73        for (idx, v) in (&mut iter).take(N - 1).enumerate() {
74            cache[idx] = v;
75        }
76        Self {
77            iter,
78            cache,
79            index: 0,
80        }
81    }
82}
83
84impl<const N: usize> SubstrIter<'_, N> {
85    fn get(&self, offset: usize) -> char {
86        self.cache[(self.index + offset) % N]
87    }
88
89    fn push(&mut self, value: char) {
90        self.cache[(self.index + N - 1) % N] = value;
91    }
92}
93
94impl<const N: usize> Iterator for SubstrIter<'_, N> {
95    type Item = Substr<N>;
96
97    fn next(&mut self) -> Option<Self::Item> {
98        let value = self.iter.next()?;
99        self.push(value);
100        let res: [char; N] = core::array::from_fn(|i| self.get(i));
101        self.index += 1;
102        Some(res)
103    }
104}
105
106/// Wrapper around [Substr] in order to bring extra capabilities.
107///
108/// ```rust
109/// use substr_iterator::SubstrWrapper;
110/// use std::str::FromStr;
111///
112/// let value: [char; 3] = ['a', 'b', 'c'];
113/// let wrapped = SubstrWrapper(value);
114/// // implements Display
115/// assert_eq!(wrapped.to_string(), "abc");
116///
117/// // parsing &str
118/// let parsed = SubstrWrapper::from_str("abc").unwrap();
119/// assert_eq!(wrapped, parsed);
120/// ```
121///
122/// When the `serde` feature is enabled, [SubstrWrapper] provides a way to [serde::Serialize] and [serde::Deserialize].
123///
124/// ```rust
125/// let value: [char; 3] = ['a', 'b', 'c'];
126/// let wrapped = substr_iterator::SubstrWrapper(value);
127/// assert_eq!(serde_json::to_string(&wrapped).unwrap(), "\"abc\"");
128/// ```
129#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
130pub struct SubstrWrapper<const N: usize>(pub Substr<N>);
131
132impl<const N: usize> SubstrWrapper<N> {
133    /// Extract the [Substr] from the wrapper
134    pub fn inner(self) -> Substr<N> {
135        self.0
136    }
137}
138
139impl<const N: usize> From<Substr<N>> for SubstrWrapper<N> {
140    fn from(value: Substr<N>) -> Self {
141        Self(value)
142    }
143}
144
145#[cfg(feature = "std")]
146impl<const N: usize> std::fmt::Display for SubstrWrapper<N> {
147    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148        use std::fmt::Write;
149
150        for c in self.0 {
151            f.write_char(c)?;
152        }
153        Ok(())
154    }
155}
156
157/// String parsing error for [SubstrWrapper].
158///
159/// ```rust
160/// use substr_iterator::SubstrWrapper;
161/// use std::str::FromStr;
162///
163/// let err = SubstrWrapper::<3>::from_str("abcd").unwrap_err();
164/// assert_eq!(err.expected, 3);
165/// assert_eq!(err.current, 4);
166/// ```
167#[cfg(feature = "std")]
168#[derive(Clone, Copy, Debug)]
169pub struct SubstrParserError {
170    /// The expected number of characters
171    pub expected: usize,
172    /// The given number of characters in the [&str]
173    pub current: usize,
174}
175
176#[cfg(feature = "std")]
177impl std::fmt::Display for SubstrParserError {
178    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
179        write!(
180            f,
181            "invalid length {}, expected {}",
182            self.current, self.expected
183        )
184    }
185}
186
187#[cfg(feature = "std")]
188impl<const N: usize> std::str::FromStr for SubstrWrapper<N> {
189    type Err = SubstrParserError;
190
191    fn from_str(s: &str) -> Result<Self, Self::Err> {
192        let mut chars = s.chars();
193        let mut res = [' '; N];
194        for (idx, item) in res.iter_mut().enumerate() {
195            *item = chars.next().ok_or(SubstrParserError {
196                expected: N,
197                current: idx,
198            })?
199        }
200        let rest = chars.count();
201        if rest == 0 {
202            Ok(SubstrWrapper(res))
203        } else {
204            Err(SubstrParserError {
205                expected: N,
206                current: res.len() + rest,
207            })
208        }
209    }
210}
211
212#[cfg(feature = "serde")]
213impl<const N: usize> serde::Serialize for SubstrWrapper<N> {
214    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
215    where
216        S: serde::Serializer,
217    {
218        serializer.collect_str(self)
219    }
220}
221
222#[cfg(feature = "serde")]
223struct SubstrVisitor<const N: usize>;
224
225#[cfg(feature = "serde")]
226impl<const N: usize> serde::de::Visitor<'_> for SubstrVisitor<N> {
227    type Value = SubstrWrapper<N>;
228
229    fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
230        write!(f, "a string of {} characters", N)
231    }
232
233    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
234    where
235        E: serde::de::Error,
236    {
237        std::str::FromStr::from_str(v).map_err(serde::de::Error::custom)
238    }
239}
240
241#[cfg(feature = "serde")]
242impl<'de, const N: usize> serde::de::Deserialize<'de> for SubstrWrapper<N> {
243    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
244    where
245        D: serde::de::Deserializer<'de>,
246    {
247        deserializer.deserialize_str(SubstrVisitor)
248    }
249}
250
251#[cfg(test)]
252mod tests {
253    use std::str::FromStr;
254
255    use test_case::test_case;
256
257    use crate::*;
258
259    #[test_case("whatever", vec!["wha", "hat", "ate", "tev", "eve", "ver"]; "with simple characters")]
260    #[test_case("今天我吃饭", vec!["今天我", "天我吃", "我吃饭"]; "with chinese characters")]
261    fn should_window(word: &str, expected: Vec<&'static str>) {
262        let all = Vec::from_iter(SubstrIter::<'_, 3>::from(word).map(SubstrWrapper));
263        assert_eq!(
264            all,
265            expected
266                .iter()
267                .map(|v| SubstrWrapper::from_str(v).unwrap())
268                .collect::<Vec<_>>()
269        );
270    }
271
272    #[test_case(vec![['a', 'b', 'c']], vec!["abc"]; "with simple characters")]
273    #[test_case(vec![['今','天','我'], ['天','我','吃'], ['我','吃','饭']], vec!["今天我", "天我吃", "我吃饭"]; "with chinese characters")]
274    #[cfg(feature = "std")]
275    fn should_display(subsets: Vec<[char; 3]>, expected: Vec<&'static str>) {
276        let displayed = subsets
277            .into_iter()
278            .map(|v| SubstrWrapper(v).to_string())
279            .collect::<Vec<_>>();
280        assert_eq!(displayed, expected);
281    }
282
283    #[test]
284    fn should_serialize() {
285        let res: Vec<SubstrWrapper<3>> = vec![
286            SubstrWrapper(['a', 'b', 'c']),
287            SubstrWrapper(['今', '天', '我']),
288        ];
289        let json = serde_json::to_string(&res).unwrap();
290        assert_eq!(json, "[\"abc\",\"今天我\"]");
291        let decoded: Vec<SubstrWrapper<3>> = serde_json::from_str(&json).unwrap();
292        assert_eq!(res, decoded);
293    }
294
295    #[test]
296    #[should_panic(expected = "invalid length 4, expected 3")]
297    fn should_not_deserialize_with_invalid_length() {
298        let _: Vec<SubstrWrapper<3>> = serde_json::from_str("[\"abcd\",\"今天我\"]").unwrap();
299    }
300
301    #[test]
302    #[should_panic(expected = "invalid type: integer `42`, expected a string of 3 characters")]
303    fn should_not_deserialize_with_invalid_type() {
304        let _: SubstrWrapper<3> = serde_json::from_str("42").unwrap();
305    }
306}