1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
pub fn tokenize(text: &str) -> std::vec::Vec<&str> { text.split(" ").collect::<Vec<_>>() } pub fn token_count(text: &str) -> usize { let tokens = tokenize(text); if tokens == [""] { return 0 } tokens.len() } #[cfg(test)] mod tests { use crate::{tokenize, token_count}; #[test] fn test_tokenizer() { let t = "hello, this is some text"; assert_eq!(tokenize(t), ["hello,", "this", "is", "some", "text"]) } #[test] fn test_tokenizer_single_token() { let t = "hello"; assert_eq!(tokenize(t), ["hello"]) } #[test] fn test_tokenizer_empty() { let t = ""; assert_eq!(tokenize(t), [""]) } #[test] fn count_simple_text() { let t = "hello, this is some text"; assert_eq!(token_count(t), 5) } #[test] fn count_single_token() { let t = "hello"; assert_eq!(token_count(t), 1) } #[test] fn count_empty_string() { let t = ""; assert_eq!(token_count(t), 0) } }