1pub fn tokenize(text: &str) -> std::vec::Vec<&str> {
2 text.split(" ").collect::<Vec<_>>()
3}
4
5pub fn token_count(text: &str) -> usize {
6 let tokens = tokenize(text);
7
8 if tokens == [""] {
9 return 0
10 }
11 tokens.len()
12}
13
14#[cfg(test)]
15mod tests {
16 use crate::{tokenize, token_count};
17
18 #[test]
19 fn test_tokenizer() {
20 let t = "hello, this is some text";
21 assert_eq!(tokenize(t), ["hello,", "this", "is", "some", "text"])
22 }
23
24 #[test]
25 fn test_tokenizer_single_token() {
26 let t = "hello";
27 assert_eq!(tokenize(t), ["hello"])
28 }
29
30 #[test]
31 fn test_tokenizer_empty() {
32
33 let t = "";
34 assert_eq!(tokenize(t), [""])
35 }
36
37 #[test]
38 fn count_simple_text() {
39 let t = "hello, this is some text";
40 assert_eq!(token_count(t), 5)
41 }
42
43 #[test]
44 fn count_single_token() {
45 let t = "hello";
46 assert_eq!(token_count(t), 1)
47 }
48
49 #[test]
50 fn count_empty_string() {
51 let t = "";
52 assert_eq!(token_count(t), 0)
53 }
54}