textrs/
lib.rs

1pub fn tokenize(text: &str) -> std::vec::Vec<&str> {
2    text.split(" ").collect::<Vec<_>>()
3}
4
5pub fn token_count(text: &str) -> usize {
6    let tokens = tokenize(text);
7
8    if tokens == [""] {
9        return 0
10    }
11    tokens.len()
12}
13
14#[cfg(test)]
15mod tests {
16    use crate::{tokenize, token_count};
17
18    #[test]
19    fn test_tokenizer() {
20        let t = "hello, this is some text";
21        assert_eq!(tokenize(t), ["hello,", "this", "is", "some", "text"])
22    }
23
24    #[test]
25    fn test_tokenizer_single_token() {
26        let t = "hello";
27        assert_eq!(tokenize(t), ["hello"])
28    }
29
30    #[test]
31    fn test_tokenizer_empty() {
32
33        let t = "";
34        assert_eq!(tokenize(t), [""])
35    }
36
37    #[test]
38    fn count_simple_text() {
39        let t = "hello, this is some text";
40        assert_eq!(token_count(t), 5)
41    }
42
43    #[test]
44    fn count_single_token() {
45        let t = "hello";
46        assert_eq!(token_count(t), 1)
47    }
48
49    #[test]
50    fn count_empty_string() {
51        let t = "";
52        assert_eq!(token_count(t), 0)
53    }
54}