use crate::Result;
use alloc::vec::Vec;
#[cfg(not(feature = "std"))]
extern crate alloc;
pub mod scanner;
#[cfg(feature = "simd")]
pub mod simd;
pub mod state;
pub mod tokens;
pub use scanner::{CharNavigator, TokenScanner};
pub use state::{IssueCollector, IssueLevel, TokenContext, TokenIssue};
pub use tokens::{DelimiterType, Token, TokenType};
#[derive(Debug, Clone)]
pub struct AssTokenizer<'a> {
source: &'a str,
scanner: TokenScanner<'a>,
context: TokenContext,
issues: IssueCollector<'a>,
}
impl<'a> AssTokenizer<'a> {
#[must_use]
pub fn new(source: &'a str) -> Self {
let initial_position = if source.starts_with('\u{FEFF}') {
3 } else {
0
};
Self {
source,
scanner: TokenScanner::new(source, initial_position, 1, 1),
context: TokenContext::Document,
issues: IssueCollector::new(),
}
}
pub fn next_token(&mut self) -> Result<Option<Token<'a>>> {
if self.context.allows_whitespace_skipping() {
self.scanner.navigator_mut().skip_whitespace();
}
if self.scanner.navigator().is_at_end() {
return Ok(None);
}
let start_pos = self.scanner.navigator().position();
let start_line = self.scanner.navigator().line();
let start_column = self.scanner.navigator().column();
let current_char = self.scanner.navigator_mut().peek_char()?;
let token_type = match (current_char, self.context) {
('[', _) => {
self.context = TokenContext::SectionHeader;
self.scanner.scan_section_header()
}
(']', TokenContext::SectionHeader) => {
self.context = TokenContext::Document;
self.scanner.navigator_mut().advance_char()?;
Ok(TokenType::SectionClose)
}
(':', TokenContext::Document) => {
self.context = self.context.enter_field_value();
self.scanner.navigator_mut().advance_char()?;
Ok(TokenType::Colon)
}
('{', _) => {
self.context = TokenContext::StyleOverride;
self.scanner.scan_style_override()
}
('}', TokenContext::StyleOverride) => {
self.context = TokenContext::Document;
self.scanner.navigator_mut().advance_char()?;
Ok(TokenType::OverrideClose)
}
(',', _) => {
self.scanner.navigator_mut().advance_char()?;
Ok(TokenType::Comma)
}
('\n' | '\r', _) => {
self.context = self.context.reset_to_document();
self.scanner.navigator_mut().advance_char()?;
if current_char == '\r' && self.scanner.navigator_mut().peek_char()? == '\n' {
self.scanner.navigator_mut().advance_char()?;
}
Ok(TokenType::Newline)
}
(';', TokenContext::Document) => self.scanner.scan_comment(),
('!', TokenContext::Document) => {
if self.scanner.navigator().peek_next() == Ok(':') {
self.scanner.scan_comment()
} else {
self.scanner.scan_text(self.context)
}
}
('}', _) => {
self.scanner.navigator_mut().advance_char()?;
Ok(TokenType::Text)
}
(']', _) => {
self.scanner.navigator_mut().advance_char()?;
Ok(TokenType::Text)
}
_ => {
if self.context == TokenContext::FieldValue {
self.scanner.scan_field_value()
} else {
self.scanner.scan_text(self.context)
}
}
}?;
let end_pos = self.scanner.navigator().position();
let span = &self.source[start_pos..end_pos];
if start_pos == end_pos && !self.scanner.navigator().is_at_end() {
return Err(crate::utils::CoreError::internal(
"Tokenizer position not advancing",
));
}
Ok(Some(Token {
token_type,
span,
line: start_line,
column: start_column,
}))
}
pub fn tokenize_all(&mut self) -> Result<Vec<Token<'a>>> {
let mut tokens = Vec::new();
let mut iteration_count = 0;
while let Some(token) = self.next_token()? {
tokens.push(token);
iteration_count += 1;
if iteration_count > 50 {
return Err(crate::utils::CoreError::internal(
"Too many tokenizer iterations",
));
}
}
Ok(tokens)
}
#[must_use]
pub fn issues(&self) -> &[TokenIssue<'a>] {
self.issues.issues()
}
#[must_use]
pub const fn position(&self) -> usize {
self.scanner.navigator().position()
}
#[must_use]
pub const fn line(&self) -> usize {
self.scanner.navigator().line()
}
#[must_use]
pub const fn column(&self) -> usize {
self.scanner.navigator().column()
}
pub fn reset(&mut self) {
let initial_position = if self.source.starts_with('\u{FEFF}') {
3
} else {
0
};
self.scanner = TokenScanner::new(self.source, initial_position, 1, 1);
self.context = TokenContext::Document;
self.issues.clear();
}
}
#[cfg(test)]
mod tests;
#[cfg(test)]
mod inline_tests {
use super::*;
#[cfg(not(feature = "std"))]
use alloc::string::ToString;
#[cfg(not(feature = "std"))]
use hashbrown::HashSet;
#[cfg(feature = "std")]
use std::collections::HashSet;
#[test]
fn tokenize_section_header() {
let mut tokenizer = AssTokenizer::new("[Script Info]");
let tokens = tokenizer.tokenize_all().unwrap();
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0].token_type, TokenType::SectionHeader);
assert_eq!(tokens[1].token_type, TokenType::SectionClose);
}
#[test]
fn tokenize_field_line() {
let mut tokenizer = AssTokenizer::new("Title: Test Script");
let tokens = tokenizer.tokenize_all().unwrap();
assert!(tokens.len() >= 3);
assert_eq!(tokens[1].token_type, TokenType::Colon);
}
#[test]
fn reset_tokenizer() {
let mut tokenizer = AssTokenizer::new("Test");
let _ = tokenizer.next_token().unwrap();
assert!(tokenizer.position() > 0);
tokenizer.reset();
assert_eq!(tokenizer.position(), 0);
assert_eq!(tokenizer.line(), 1);
}
#[test]
fn tokenize_with_bom() {
let mut tokenizer = AssTokenizer::new("\u{FEFF}[Script Info]");
let tokens = tokenizer.tokenize_all().unwrap();
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0].token_type, TokenType::SectionHeader);
}
#[test]
fn tokenize_style_override() {
let mut tokenizer = AssTokenizer::new("{\\b1}text{\\b0}");
let tokens = tokenizer.tokenize_all().unwrap();
assert!(tokens.len() >= 2);
let has_override = tokens.iter().any(|t| {
matches!(
t.token_type,
TokenType::OverrideBlock | TokenType::OverrideOpen | TokenType::OverrideClose
)
});
let has_text = tokens.iter().any(|t| t.token_type == TokenType::Text);
assert!(
has_override || has_text,
"Should have override or text tokens"
);
}
#[test]
fn tokenize_comma_delimiter() {
let mut tokenizer = AssTokenizer::new("field1,field2,field3");
let tokens = tokenizer.tokenize_all().unwrap();
assert!(tokens.iter().any(|t| t.token_type == TokenType::Comma));
}
#[test]
fn tokenize_newline_types() {
let mut tokenizer = AssTokenizer::new("line1\nline2\r\nline3");
let tokens = tokenizer.tokenize_all().unwrap();
let newline_count = tokens
.iter()
.filter(|t| t.token_type == TokenType::Newline)
.count();
assert!(newline_count >= 2);
}
#[test]
fn tokenize_comment_semicolon() {
let mut tokenizer = AssTokenizer::new("; This is a comment");
let tokens = tokenizer.tokenize_all().unwrap();
assert!(!tokens.is_empty());
assert_eq!(tokens[0].token_type, TokenType::Comment);
}
#[test]
fn tokenize_comment_exclamation() {
let mut tokenizer = AssTokenizer::new("!: This is a comment");
let tokens = tokenizer.tokenize_all().unwrap();
assert!(!tokens.is_empty());
assert_eq!(tokens[0].token_type, TokenType::Comment);
}
#[test]
fn tokenize_misplaced_delimiters() {
let mut tokenizer = AssTokenizer::new("text}more]text");
let tokens = tokenizer.tokenize_all().unwrap();
assert!(tokens.iter().any(|t| t.token_type == TokenType::Text));
}
#[test]
fn tokenize_field_value_context() {
let mut tokenizer = AssTokenizer::new("Key: Value with spaces");
let tokens = tokenizer.tokenize_all().unwrap();
let has_text = tokens
.iter()
.any(|t| matches!(t.token_type, TokenType::Text));
assert!(has_text);
}
#[test]
fn tokenize_exclamation_without_colon() {
let mut tokenizer = AssTokenizer::new("!not a comment");
let tokens = tokenizer.tokenize_all().unwrap();
assert!(tokens.iter().any(|t| t.token_type == TokenType::Text));
}
#[test]
fn tokenize_all_iteration_limit() {
let repeated_text = "a".repeat(100);
let mut tokenizer = AssTokenizer::new(&repeated_text);
let result = tokenizer.tokenize_all();
assert!(result.is_ok() || result.is_err());
}
#[test]
fn tokenizer_position_tracking() {
let mut tokenizer = AssTokenizer::new("Test\nLine 2");
let initial_pos = tokenizer.position();
let initial_line = tokenizer.line();
let initial_col = tokenizer.column();
assert_eq!(initial_pos, 0);
assert_eq!(initial_line, 1);
assert_eq!(initial_col, 1);
let _ = tokenizer.next_token().unwrap();
assert!(tokenizer.position() > initial_pos);
}
#[test]
fn tokenizer_issues_collection() {
let mut tokenizer = AssTokenizer::new("test content");
let _ = tokenizer.tokenize_all().unwrap();
let _issues = tokenizer.issues();
}
#[test]
fn tokenize_empty_input() {
let mut tokenizer = AssTokenizer::new("");
let result = tokenizer.next_token().unwrap();
assert!(result.is_none());
}
#[test]
fn tokenize_only_whitespace() {
let mut tokenizer = AssTokenizer::new(" \t ");
let result = tokenizer.next_token().unwrap();
assert!(result.is_none());
}
#[test]
fn tokenizer_infinite_loop_protection() {
let mut tokenizer = AssTokenizer::new("test");
let result = tokenizer.next_token();
assert!(result.is_ok());
assert!(tokenizer.position() > 0 || tokenizer.scanner.navigator().is_at_end());
}
#[test]
fn tokenizer_iteration_limit_exceeded() {
let long_content = "a ".repeat(30); let mut tokenizer = AssTokenizer::new(&long_content);
let result = tokenizer.tokenize_all();
match result {
Ok(tokens) => assert!(tokens.len() <= 50),
Err(e) => assert!(e.to_string().contains("Too many tokenizer iterations")),
}
}
#[test]
fn tokenizer_context_transitions_comprehensive() {
let mut tokenizer = AssTokenizer::new("[Section]:value{override}text\n");
assert_eq!(tokenizer.context, TokenContext::Document);
let token1 = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token1.token_type, TokenType::SectionHeader);
assert_eq!(tokenizer.context, TokenContext::SectionHeader);
let token2 = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token2.token_type, TokenType::SectionClose);
assert_eq!(tokenizer.context, TokenContext::Document);
let token3 = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token3.token_type, TokenType::Colon);
assert_eq!(tokenizer.context, TokenContext::FieldValue);
let _remaining_tokens = tokenizer.tokenize_all().unwrap();
}
#[test]
fn tokenizer_delimiter_in_wrong_context() {
let mut tokenizer = AssTokenizer::new("}text");
let token = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token.token_type, TokenType::Text);
assert_eq!(token.span, "}");
let mut tokenizer2 = AssTokenizer::new("]text");
let token2 = tokenizer2.next_token().unwrap().unwrap();
assert_eq!(token2.token_type, TokenType::Text);
assert_eq!(token2.span, "]");
}
#[test]
fn tokenizer_bom_edge_cases() {
let mut tokenizer = AssTokenizer::new("\u{FEFF}content");
assert_eq!(tokenizer.position(), 3);
let _token = tokenizer.next_token().unwrap();
tokenizer.reset();
assert_eq!(tokenizer.position(), 3); assert_eq!(tokenizer.line(), 1);
assert_eq!(tokenizer.column(), 1);
assert_eq!(tokenizer.context, TokenContext::Document);
}
#[test]
fn tokenizer_carriage_return_line_feed() {
let mut tokenizer = AssTokenizer::new("line1\r\nline2");
let token1 = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token1.token_type, TokenType::Text);
let token2 = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token2.token_type, TokenType::Newline);
assert_eq!(tokenizer.context, TokenContext::Document);
let token3 = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token3.token_type, TokenType::Text);
assert_eq!(token3.span, "line2");
}
#[test]
fn tokenizer_exclamation_comment_detection() {
let mut tokenizer = AssTokenizer::new("!:comment");
let token = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token.token_type, TokenType::Comment);
let mut tokenizer2 = AssTokenizer::new("!text");
let token2 = tokenizer2.next_token().unwrap().unwrap();
assert_eq!(token2.token_type, TokenType::Text);
}
#[test]
fn tokenizer_field_value_context_handling() {
let mut tokenizer = AssTokenizer::new("key:value with spaces,next");
let token1 = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token1.token_type, TokenType::Text);
let token2 = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token2.token_type, TokenType::Colon);
assert_eq!(tokenizer.context, TokenContext::FieldValue);
let token3 = tokenizer.next_token().unwrap().unwrap();
assert!(matches!(
token3.token_type,
TokenType::Text | TokenType::Number | TokenType::HexValue
));
}
#[test]
fn tokenizer_position_line_column_tracking() {
let mut tokenizer = AssTokenizer::new("first\nsecond\nthird");
assert_eq!(tokenizer.position(), 0);
assert_eq!(tokenizer.line(), 1);
assert_eq!(tokenizer.column(), 1);
let _token1 = tokenizer.next_token().unwrap().unwrap();
let pos1 = tokenizer.position();
let line1 = tokenizer.line();
let _col1 = tokenizer.column();
let _token2 = tokenizer.next_token().unwrap().unwrap(); assert!(tokenizer.line() > line1);
let _token3 = tokenizer.next_token().unwrap().unwrap();
assert!(tokenizer.position() > pos1);
}
#[test]
fn tokenizer_all_delimiter_types() {
let mut tokenizer = AssTokenizer::new("[section]:value,field{override}text\n");
let tokens = tokenizer.tokenize_all().unwrap();
let types: HashSet<_> = tokens.iter().map(|t| &t.token_type).collect();
assert!(types.len() > 1);
assert!(
types.contains(&TokenType::SectionHeader) || types.contains(&TokenType::SectionOpen)
);
assert!(types.contains(&TokenType::Colon));
assert!(types.contains(&TokenType::Comma));
}
#[test]
fn tokenizer_empty_reset_state() {
let mut tokenizer = AssTokenizer::new("");
let result = tokenizer.next_token().unwrap();
assert!(result.is_none());
tokenizer.reset();
assert_eq!(tokenizer.position(), 0);
assert_eq!(tokenizer.line(), 1);
assert_eq!(tokenizer.column(), 1);
}
#[test]
fn tokenizer_whitespace_handling_contexts() {
let mut tokenizer = AssTokenizer::new(" [ section ] ");
let token1 = tokenizer.next_token().unwrap().unwrap();
assert!(matches!(
token1.token_type,
TokenType::SectionHeader | TokenType::SectionOpen
));
let _remaining = tokenizer.tokenize_all().unwrap();
}
#[test]
fn tokenizer_issue_collection_access() {
let mut tokenizer = AssTokenizer::new("valid content");
assert!(tokenizer.issues().is_empty());
let _tokens = tokenizer.tokenize_all().unwrap();
let _issues = tokenizer.issues();
tokenizer.reset();
assert!(tokenizer.issues().is_empty());
}
#[test]
fn tokenizer_scanner_navigation_access() {
let mut tokenizer = AssTokenizer::new("test content");
let initial_pos = tokenizer.position();
let initial_line = tokenizer.line();
let initial_col = tokenizer.column();
assert_eq!(initial_pos, 0);
assert_eq!(initial_line, 1);
assert_eq!(initial_col, 1);
let _token = tokenizer.next_token().unwrap();
let _new_pos = tokenizer.position();
let _new_line = tokenizer.line();
let _new_col = tokenizer.column();
}
#[test]
fn tokenizer_mixed_context_characters() {
let mut tokenizer = AssTokenizer::new("text{override[section]:value}more");
let tokens = tokenizer.tokenize_all().unwrap();
assert!(!tokens.is_empty());
assert!(tokens.iter().any(|t| t.token_type == TokenType::Text));
}
#[test]
fn tokenizer_semicolon_comment_in_document_context() {
let mut tokenizer = AssTokenizer::new("; comment in document context");
let token = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token.token_type, TokenType::Comment);
}
#[test]
fn tokenizer_no_bom_content() {
let mut tokenizer = AssTokenizer::new("content without BOM");
assert_eq!(tokenizer.position(), 0);
let _token = tokenizer.next_token().unwrap();
assert!(tokenizer.position() > 0);
}
#[test]
fn tokenizer_infinite_loop_protection_error() {
let source = "invalid_char\x00";
let mut tokenizer = AssTokenizer::new(source);
match tokenizer.next_token() {
Ok(_) | Err(_) => {
assert!(
tokenizer.position() < source.len() || tokenizer.position() == source.len()
);
}
}
}
#[test]
fn tokenizer_position_line_column_advancement() {
let source = "[Section]\nKey=Value\n! Comment";
let mut tokenizer = AssTokenizer::new(source);
let mut last_pos = 0;
let mut tokens = Vec::new();
while let Ok(Some(token)) = tokenizer.next_token() {
let current_pos = tokenizer.position();
if !tokenizer.scanner.navigator().is_at_end() {
assert!(current_pos > last_pos, "Position must advance");
}
assert!(token.line >= 1);
assert!(token.column >= 1);
tokens.push(token);
last_pos = current_pos;
if tokens.len() > 20 {
break;
}
}
assert!(!tokens.is_empty());
}
#[test]
fn tokenizer_span_creation_and_boundaries() {
let source = "[Test]\nField=Value123";
let mut tokenizer = AssTokenizer::new(source);
while let Ok(Some(token)) = tokenizer.next_token() {
assert!(
!token.span.is_empty()
|| token.token_type == crate::tokenizer::tokens::TokenType::Comment
);
assert!(token.span.len() <= source.len());
let start_pos = token.span.as_ptr() as usize - source.as_ptr() as usize;
assert!(start_pos < source.len());
}
}
#[test]
fn tokenizer_iteration_limit_comprehensive() {
let source = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,1,2,3,4,5,6,7,8,9,0,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z";
let mut tokenizer = AssTokenizer::new(source);
let result = tokenizer.tokenize_all();
if let Ok(tokens) = result {
assert!(tokens.len() <= 50, "Should respect iteration limit");
} else {
}
}
#[test]
fn tokenizer_all_error_recovery() {
let source = "Valid[Section]\n\x00InvalidChar\nKey=Value";
let mut tokenizer = AssTokenizer::new(source);
let result = tokenizer.tokenize_all();
match result {
Ok(tokens) => {
assert!(!tokens.is_empty());
}
Err(_) => {
assert!(!tokenizer.issues().is_empty());
}
}
}
#[test]
fn tokenizer_empty_source_boundaries() {
let source = "";
let mut tokenizer = AssTokenizer::new(source);
assert_eq!(tokenizer.position(), 0);
assert_eq!(tokenizer.line(), 1);
assert_eq!(tokenizer.column(), 1);
let result = tokenizer.next_token();
assert!(result.is_ok());
assert!(result.unwrap().is_none());
}
#[test]
fn tokenizer_single_character_advancement() {
let source = "a";
let mut tokenizer = AssTokenizer::new(source);
let start_pos = tokenizer.position();
if let Ok(Some(token)) = tokenizer.next_token() {
let end_pos = tokenizer.position();
assert!(end_pos > start_pos);
assert_eq!(token.span, "a");
}
}
#[test]
fn tokenizer_multi_byte_character_advancement() {
let source = "🎵音楽";
let mut tokenizer = AssTokenizer::new(source);
let mut positions = Vec::new();
positions.push(tokenizer.position());
while let Ok(Some(_)) = tokenizer.next_token() {
positions.push(tokenizer.position());
if positions.len() > 10 {
break; }
}
for window in positions.windows(2) {
if window[1] != window[0] {
assert!(window[1] > window[0]);
}
}
}
#[test]
fn tokenizer_token_push_verification() {
let source = "Key1=Value1\nKey2=Value2";
let mut tokenizer = AssTokenizer::new(source);
let tokens = tokenizer.tokenize_all().unwrap_or_default();
assert!(!tokens.is_empty());
for token in &tokens {
assert!(
!token.span.is_empty()
|| token.token_type == crate::tokenizer::tokens::TokenType::Comment
);
}
}
#[test]
fn tokenizer_context_based_token_creation() {
let source = "{\\b1}Bold text{\\b0}";
let mut tokenizer = AssTokenizer::new(source);
let mut token_count = 0;
while let Ok(Some(token)) = tokenizer.next_token() {
assert!(token.line >= 1);
assert!(token.column >= 1);
assert!(!token.span.is_empty());
token_count += 1;
if token_count > 15 {
break;
}
}
assert!(token_count > 0);
}
#[test]
fn tokenizer_section_header_start_tracking() {
let source = "[Script Info]";
let mut tokenizer = AssTokenizer::new(source);
let token = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token.line, 1);
assert_eq!(token.column, 1);
}
#[test]
fn tokenizer_section_close_bracket() {
let source = "[Test]";
let mut tokenizer = AssTokenizer::new(source);
let _header = tokenizer.next_token().unwrap().unwrap();
let close = tokenizer.next_token().unwrap().unwrap();
assert_eq!(
close.token_type,
crate::tokenizer::tokens::TokenType::SectionClose
);
}
#[test]
fn tokenizer_colon_field_separator() {
let source = "Key:Value";
let mut tokenizer = AssTokenizer::new(source);
let _key = tokenizer.next_token().unwrap().unwrap();
let colon = tokenizer.next_token().unwrap().unwrap();
assert_eq!(colon.token_type, crate::tokenizer::tokens::TokenType::Colon);
}
#[test]
fn tokenizer_comma_separator() {
let source = "val1,val2";
let mut tokenizer = AssTokenizer::new(source);
let _val1 = tokenizer.next_token().unwrap().unwrap();
let comma = tokenizer.next_token().unwrap().unwrap();
assert_eq!(comma.token_type, crate::tokenizer::tokens::TokenType::Comma);
}
#[test]
fn tokenizer_newline_handling() {
let source = "line1\nline2\r\nline3";
let mut tokenizer = AssTokenizer::new(source);
let _line1 = tokenizer.next_token().unwrap().unwrap();
let newline1 = tokenizer.next_token().unwrap().unwrap();
assert_eq!(
newline1.token_type,
crate::tokenizer::tokens::TokenType::Newline
);
}
#[test]
fn tokenizer_style_override_tokens() {
let source = "{\\b1}text{\\b0}";
let mut tokenizer = AssTokenizer::new(source);
let override_block = tokenizer.next_token().unwrap().unwrap();
assert_eq!(
override_block.token_type,
crate::tokenizer::tokens::TokenType::OverrideBlock
);
}
#[test]
fn tokenizer_comment_exclamation() {
let source = "!: This is a comment";
let mut tokenizer = AssTokenizer::new(source);
let comment = tokenizer.next_token().unwrap().unwrap();
assert_eq!(
comment.token_type,
crate::tokenizer::tokens::TokenType::Comment
);
}
#[test]
fn tokenizer_comment_semicolon() {
let source = "; This is a comment";
let mut tokenizer = AssTokenizer::new(source);
let comment = tokenizer.next_token().unwrap().unwrap();
assert_eq!(
comment.token_type,
crate::tokenizer::tokens::TokenType::Comment
);
}
#[test]
fn tokenizer_whitespace_token() {
let source = "Key: \t ";
let mut tokenizer = AssTokenizer::new(source);
let _key = tokenizer.next_token().unwrap().unwrap();
let _colon = tokenizer.next_token().unwrap().unwrap();
let whitespace = tokenizer.next_token().unwrap().unwrap();
assert_eq!(
whitespace.token_type,
crate::tokenizer::tokens::TokenType::Whitespace
);
}
#[test]
fn tokenizer_text_fallback() {
let source = "regular_text_123";
let mut tokenizer = AssTokenizer::new(source);
let text = tokenizer.next_token().unwrap().unwrap();
assert_eq!(text.token_type, crate::tokenizer::tokens::TokenType::Text);
}
#[test]
fn tokenizer_infinite_loop_error_path() {
let source = "test";
let mut tokenizer = AssTokenizer::new(source);
let result = tokenizer.next_token();
assert!(result.is_ok());
}
#[test]
fn tokenizer_span_creation_path() {
let source = "test";
let mut tokenizer = AssTokenizer::new(source);
let token = tokenizer.next_token().unwrap().unwrap();
assert_eq!(token.span, "test");
assert_eq!(token.line, 1);
assert_eq!(token.column, 1);
}
#[test]
fn tokenizer_end_of_input_handling() {
let source = "";
let mut tokenizer = AssTokenizer::new(source);
let result = tokenizer.next_token().unwrap();
assert!(result.is_none());
}
#[test]
fn tokenizer_all_error_propagation() {
let source = "valid_content";
let mut tokenizer = AssTokenizer::new(source);
let tokens = tokenizer.tokenize_all().unwrap();
assert!(!tokens.is_empty());
}
#[test]
fn tokenizer_carriage_return_handling() {
let source = "line1\rline2";
let mut tokenizer = AssTokenizer::new(source);
let _line1 = tokenizer.next_token().unwrap().unwrap();
let newline = tokenizer.next_token().unwrap().unwrap();
assert_eq!(
newline.token_type,
crate::tokenizer::tokens::TokenType::Newline
);
let _line2 = tokenizer.next_token().unwrap().unwrap();
assert_eq!(tokenizer.line(), 2);
}
}