use crate::tokenizer::{AssTokenizer, TokenType};
#[cfg(not(feature = "std"))]
extern crate alloc;
#[cfg(not(feature = "std"))]
use alloc::{format, vec::Vec};
#[test]
fn tokenizer_bom_variants() {
let bom_content = "\u{FEFF}[Script Info]\nTitle: Test";
let mut tokenizer = AssTokenizer::new(bom_content);
let first_token = tokenizer.next_token().unwrap().unwrap();
assert_eq!(first_token.token_type, TokenType::SectionHeader);
}
#[test]
fn tokenizer_malformed_unicode() {
let unicode_content = "emoji: 🎭🎬 text: こんにちは bidirectional: עברית";
let mut tokenizer = AssTokenizer::new(unicode_content);
let tokens = tokenizer.tokenize_all().unwrap();
assert!(!tokens.is_empty());
}
#[test]
fn tokenizer_context_state_edge_cases() {
let mut tokenizer = AssTokenizer::new("[Section]\n:value\n}outside");
let tokens = tokenizer.tokenize_all().unwrap();
let types: Vec<_> = tokens.iter().map(|t| &t.token_type).collect();
assert!(types.contains(&&TokenType::SectionHeader));
assert!(types.contains(&&TokenType::Colon));
}
#[test]
fn tokenizer_very_long_tokens() {
let long_text = "a".repeat(10000);
let content = format!("[Section]\nTitle: {long_text}");
let mut tokenizer = AssTokenizer::new(&content);
let tokens = tokenizer.tokenize_all().unwrap();
assert!(!tokens.is_empty());
assert!(tokens.iter().any(|t| t.token_type == TokenType::Text));
}
#[test]
fn tokenizer_nested_context_handling() {
let content = "{override{nested}text}[section{mixed}]:value{another}";
let mut tokenizer = AssTokenizer::new(content);
let tokens = tokenizer.tokenize_all().unwrap();
assert!(!tokens.is_empty());
assert!(tokens.iter().any(|t| {
matches!(
t.token_type,
TokenType::OverrideOpen | TokenType::OverrideClose
)
}));
}