use crate::lex::token::Token;
use logos::Logos;
pub fn tokenize(source: &str) -> Vec<(Token, logos::Span)> {
let mut lexer = Token::lexer(source);
let mut tokens = Vec::new();
while let Some(result) = lexer.next() {
if let Ok(token) = result {
tokens.push((token, lexer.span()));
}
}
tokens
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenizes() {
let tokenss = tokenize("hello world");
assert_eq!(tokenss.len(), 3);
assert_eq!(tokenss[0].0, Token::Text("hello".to_string()));
assert_eq!(tokenss[1].0, Token::Whitespace(1));
assert_eq!(tokenss[2].0, Token::Text("world".to_string()));
}
#[test]
fn test_empty_input() {
let tokenss = tokenize("");
assert_eq!(tokenss, vec![]);
}
#[test]
fn test_complex_tokenization() {
let input = "1. Session Title\n - Item 1\n - Item 2";
let tokenss = tokenize(input);
assert_eq!(tokenss[0].0, Token::Number("1".to_string())); assert_eq!(tokenss[1].0, Token::Period); assert_eq!(tokenss[2].0, Token::Whitespace(1)); assert_eq!(tokenss[3].0, Token::Text("Session".to_string())); assert_eq!(tokenss[4].0, Token::Whitespace(1)); assert_eq!(tokenss[5].0, Token::Text("Title".to_string())); assert_eq!(tokenss[6].0, Token::BlankLine(Some("\n".to_string())));
assert_eq!(tokenss[7].0, Token::Indentation); assert_eq!(tokenss[8].0, Token::Dash); assert_eq!(tokenss[9].0, Token::Whitespace(1)); assert_eq!(tokenss[10].0, Token::Text("Item".to_string())); assert_eq!(tokenss[11].0, Token::Whitespace(1)); assert_eq!(tokenss[12].0, Token::Number("1".to_string())); assert_eq!(tokenss[13].0, Token::BlankLine(Some("\n".to_string())));
assert_eq!(tokenss[14].0, Token::Indentation); assert_eq!(tokenss[15].0, Token::Dash); assert_eq!(tokenss[16].0, Token::Whitespace(1)); assert_eq!(tokenss[17].0, Token::Text("Item".to_string())); assert_eq!(tokenss[18].0, Token::Whitespace(1)); assert_eq!(tokenss[19].0, Token::Number("2".to_string()));
}
#[test]
fn test_whitespace_only() {
let tokenss = tokenize(" \t ");
assert_eq!(tokenss.len(), 3);
assert_eq!(tokenss[0].0, Token::Whitespace(3));
assert_eq!(tokenss[1].0, Token::Indentation);
assert_eq!(tokenss[2].0, Token::Whitespace(2));
}
}