text_to_sounds/
scanner.rs

1/// The scanner to move through the text
2/// Inspired by [Lyn crate](https://crates.io/crates/lyn) and by an [article](https://depth-first.com/articles/2021/12/16/a-beginners-guide-to-parsing-in-rust/)
3#[derive(Debug)]
4pub struct Scanner {
5    cursor: usize,
6    characters: Vec<char>,
7}
8
9/// Space html character
10const SPACE_HTML_CHAR: &char = &' ';
11
12/// Non-breakable space html character
13const NON_BREAKABLE_SPACE_HTML_CHAR: &char = &'\u{a0}';
14
15/// Default char if Scanner will found nothing
16/// Just an easy workaround for Option
17const DEFAULT_CHAR: &char = SPACE_HTML_CHAR;
18
19/// Array of the punctuation characters
20const PUNCTUATION_CHARS: [char; 7] = ['.', ',', ';', '!', '?', ':', '-'];
21
22impl Scanner {
23    /// Creates new Scanner
24    pub fn new(string: &str) -> Self {
25        Self {
26            cursor: 0,
27            characters: string.chars().collect(),
28        }
29    }
30
31    /// Returns the current cursor. Useful for reporting errors.
32    pub fn cursor(&self) -> usize {
33        self.cursor
34    }
35
36    /// Returns the next character without advancing the cursor.
37    pub fn peek(&self) -> &char {
38        self.characters.get(self.cursor).unwrap_or(DEFAULT_CHAR)
39    }
40
41    /// Returns the next + 1 character without advancing the cursor.
42    pub fn peek_next(&self) -> &char {
43        self.characters.get(self.cursor + 1).unwrap_or(DEFAULT_CHAR)
44    }
45
46    /// Returns the prev character without advancing the cursor.
47    pub fn peek_prev(&self) -> &char {
48        match self.cursor() == 0 {
49            true => DEFAULT_CHAR,
50            false => self.characters.get(self.cursor - 1).unwrap_or(DEFAULT_CHAR),
51        }
52    }
53
54    /// Returns true if further progress is not possible.
55    pub fn is_done(&self) -> bool {
56        self.cursor == self.characters.len()
57    }
58
59    /// Returns true if the first char.
60    pub fn is_first(&self) -> bool {
61        match self.cursor == 0 {
62            true => true,
63            false => {
64                let prev_char = self.peek_prev();
65
66                prev_char == SPACE_HTML_CHAR
67                    || prev_char == NON_BREAKABLE_SPACE_HTML_CHAR
68                    || Self::is_punctuation(prev_char)
69            }
70        }
71    }
72
73    /// Returns true if the last char.
74    pub fn is_last(&self) -> bool {
75        match self.cursor + 1 == self.characters.len() {
76            true => true,
77            false => {
78                let next_char = self.peek_next();
79
80                next_char == SPACE_HTML_CHAR
81                    || next_char == NON_BREAKABLE_SPACE_HTML_CHAR
82                    || Self::is_punctuation(next_char)
83            }
84        }
85    }
86
87    // Returns true if next char exists in `chars` param
88    pub fn is_next_any(&self, chars: Vec<char>) -> bool {
89        chars.iter().any(|c| self.peek_next() == c)
90    }
91
92    /// Returns the next character and advances the cursor.
93    pub fn pop(&mut self) -> &char {
94        match self.characters.get(self.cursor) {
95            Some(character) => {
96                self.cursor += 1;
97
98                character
99            }
100            None => DEFAULT_CHAR,
101        }
102    }
103
104    /// Returns true if the character is a punctuation character
105    fn is_punctuation(c: &char) -> bool {
106        PUNCTUATION_CHARS.iter().any(|cc| cc == c)
107    }
108}
109
110#[cfg(test)]
111mod cursor {
112    use super::*;
113
114    #[test]
115    fn empty() {
116        let scanner = Scanner::new("");
117
118        assert_eq!(scanner.cursor(), 0)
119    }
120
121    #[test]
122    fn in_progress() {
123        let mut scanner = Scanner::new("abc");
124
125        scanner.pop();
126
127        assert_eq!(scanner.cursor(), 1);
128    }
129
130    #[test]
131    fn done() {
132        let mut scanner = Scanner::new("abc");
133
134        scanner.pop();
135        scanner.pop();
136        scanner.pop();
137
138        assert_eq!(scanner.cursor(), 3)
139    }
140}
141
142#[cfg(test)]
143mod is_done {
144    use super::*;
145
146    #[test]
147    fn emtpy() {
148        let scanner = Scanner::new("");
149
150        assert!(scanner.is_done())
151    }
152
153    #[test]
154    fn not_done() {
155        let mut scanner = Scanner::new("abc");
156
157        scanner.pop();
158
159        assert!(!scanner.is_done())
160    }
161
162    #[test]
163    fn done() {
164        let mut scanner = Scanner::new("abc");
165
166        scanner.pop();
167        scanner.pop();
168        scanner.pop();
169
170        assert!(scanner.is_done())
171    }
172}
173
174#[cfg(test)]
175mod peek {
176    use super::*;
177
178    #[test]
179    fn empty() {
180        let scanner = Scanner::new("");
181
182        assert_eq!(scanner.peek(), DEFAULT_CHAR)
183    }
184
185    #[test]
186    fn not_done() {
187        let mut scanner = Scanner::new("abc");
188
189        scanner.pop();
190
191        assert_eq!(scanner.peek(), &'b')
192    }
193}
194
195#[cfg(test)]
196mod peek_next {
197    use super::*;
198
199    #[test]
200    fn empty() {
201        let scanner = Scanner::new("");
202
203        assert_eq!(scanner.peek_next(), DEFAULT_CHAR)
204    }
205
206    #[test]
207    fn not_done() {
208        let mut scanner = Scanner::new("abc");
209
210        scanner.pop();
211
212        assert_eq!(scanner.peek_next(), &'c')
213    }
214}
215
216#[cfg(test)]
217mod peek_prev {
218    use super::*;
219
220    #[test]
221    fn empty() {
222        let scanner = Scanner::new("");
223
224        assert_eq!(scanner.peek_prev(), DEFAULT_CHAR)
225    }
226
227    #[test]
228    fn not_done() {
229        let mut scanner = Scanner::new("abc");
230
231        scanner.pop();
232
233        assert_eq!(scanner.peek_prev(), &'a')
234    }
235}
236
237#[cfg(test)]
238mod is_first {
239    use super::*;
240
241    #[test]
242    fn is_first() {
243        let scanner = Scanner::new("abc");
244
245        assert!(scanner.is_first())
246    }
247
248    #[test]
249    fn is_first_with_punctuation_char() {
250        let mut scanner = Scanner::new("!abc");
251
252        scanner.pop();
253
254        assert!(scanner.is_first())
255    }
256
257    #[test]
258    fn is_first_with_non_breakable_char() {
259        let mut scanner = Scanner::new("\u{a0}abc");
260
261        scanner.pop();
262
263        assert!(scanner.is_first())
264    }
265
266    #[test]
267    fn not_is_first() {
268        let mut scanner = Scanner::new("abc");
269
270        scanner.pop();
271
272        assert!(!scanner.is_first())
273    }
274}
275
276#[cfg(test)]
277mod is_last {
278    use super::*;
279
280    #[test]
281    fn is_last() {
282        let mut scanner = Scanner::new("abc");
283
284        scanner.pop();
285        scanner.pop();
286
287        assert!(scanner.is_last())
288    }
289
290    #[test]
291    fn is_last_with_punctuation_char() {
292        let mut scanner = Scanner::new("abc!");
293
294        scanner.pop();
295        scanner.pop();
296
297        assert!(scanner.is_last())
298    }
299
300    #[test]
301    fn is_last_with_non_breakable_char() {
302        let mut scanner = Scanner::new("abc\u{a0}");
303
304        scanner.pop();
305        scanner.pop();
306
307        assert!(scanner.is_last())
308    }
309
310    #[test]
311    fn not_is_last() {
312        let scanner = Scanner::new("abc");
313
314        assert!(!scanner.is_last())
315    }
316}
317
318#[cfg(test)]
319mod is_next_any {
320    use super::*;
321
322    #[test]
323    fn it_should_be_true() {
324        let scanner = Scanner::new("cheese");
325
326        assert!(scanner.is_next_any(vec!['h']));
327    }
328
329    #[test]
330    fn it_should_be_false() {
331        let scanner = Scanner::new("cheese");
332
333        assert!(!scanner.is_next_any(vec!['c']));
334    }
335}
336
337#[cfg(test)]
338mod pop {
339    use super::*;
340
341    #[test]
342    fn empty() {
343        let mut scanner = Scanner::new("");
344
345        assert_eq!(scanner.pop(), DEFAULT_CHAR);
346        assert_eq!(scanner.cursor(), 0)
347    }
348
349    #[test]
350    fn not_done() {
351        let mut scanner = Scanner::new("abc");
352
353        assert_eq!(scanner.pop(), &'a');
354        assert_eq!(scanner.cursor(), 1)
355    }
356
357    #[test]
358    fn done() {
359        let mut scanner = Scanner::new("abc");
360
361        scanner.pop();
362        scanner.pop();
363        scanner.pop();
364
365        assert_eq!(scanner.pop(), DEFAULT_CHAR);
366        assert_eq!(scanner.cursor(), 3)
367    }
368}
369
370#[cfg(test)]
371mod is_punctuation {
372    use super::*;
373
374    #[test]
375    fn is_punctuation() {
376        assert!(Scanner::is_punctuation(&'!'));
377    }
378
379    #[test]
380    fn is_not_punctuation() {
381        assert!(!Scanner::is_punctuation(&'k'));
382    }
383}