extern crate alloc;
use alloc::string::String;
use alloc::vec::Vec;
pub fn simple_lex(text: &str) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
let mut cur = String::new();
for c in text.chars() {
if c.is_alphanumeric() || c == '_' {
for lc in c.to_lowercase() {
cur.push(lc);
}
} else if !cur.is_empty() {
out.push(core::mem::take(&mut cur));
}
}
if !cur.is_empty() {
out.push(cur);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lowercase_ascii_words() {
assert_eq!(
simple_lex("Hello, World! Quick BROWN fox."),
alloc::vec![
String::from("hello"),
String::from("world"),
String::from("quick"),
String::from("brown"),
String::from("fox")
]
);
}
#[test]
fn keeps_underscore_inside_word() {
assert_eq!(
simple_lex("post_title COMMENT_BODY"),
alloc::vec![String::from("post_title"), String::from("comment_body")]
);
}
#[test]
fn collapses_runs_of_separators() {
assert_eq!(
simple_lex(" --- foo ,, bar . . . baz "),
alloc::vec![
String::from("foo"),
String::from("bar"),
String::from("baz")
]
);
}
#[test]
fn empty_input_yields_empty() {
assert!(simple_lex("").is_empty());
assert!(simple_lex(",,, ...").is_empty());
}
}