1use crate::Result;
27use alloc::vec::Vec;
28
29#[cfg(not(feature = "std"))]
30extern crate alloc;
31pub mod scanner;
32#[cfg(feature = "simd")]
33pub mod simd;
34pub mod state;
35pub mod tokens;
36
37pub use scanner::{CharNavigator, TokenScanner};
39pub use state::{IssueCollector, IssueLevel, TokenContext, TokenIssue};
40pub use tokens::{DelimiterType, Token, TokenType};
41
42#[derive(Debug, Clone)]
47pub struct AssTokenizer<'a> {
48 source: &'a str,
50 scanner: TokenScanner<'a>,
52 context: TokenContext,
54 issues: IssueCollector<'a>,
56}
57
58impl<'a> AssTokenizer<'a> {
59 #[must_use]
63 pub fn new(source: &'a str) -> Self {
64 let initial_position = if source.starts_with('\u{FEFF}') {
65 3 } else {
67 0
68 };
69
70 Self {
71 source,
72 scanner: TokenScanner::new(source, initial_position, 1, 1),
73 context: TokenContext::Document,
74 issues: IssueCollector::new(),
75 }
76 }
77
78 pub fn next_token(&mut self) -> Result<Option<Token<'a>>> {
87 if self.context.allows_whitespace_skipping() {
88 self.scanner.navigator_mut().skip_whitespace();
89 }
90
91 if self.scanner.navigator().is_at_end() {
92 return Ok(None);
93 }
94
95 let start_pos = self.scanner.navigator().position();
96 let start_line = self.scanner.navigator().line();
97 let start_column = self.scanner.navigator().column();
98
99 let current_char = self.scanner.navigator_mut().peek_char()?;
100
101 let token_type = match (current_char, self.context) {
102 ('[', _) => {
103 self.context = TokenContext::SectionHeader;
104 self.scanner.scan_section_header()
105 }
106 (']', TokenContext::SectionHeader) => {
107 self.context = TokenContext::Document;
108 self.scanner.navigator_mut().advance_char()?;
109 Ok(TokenType::SectionClose)
110 }
111 (':', TokenContext::Document) => {
112 self.context = self.context.enter_field_value();
113 self.scanner.navigator_mut().advance_char()?;
114 Ok(TokenType::Colon)
115 }
116
117 ('{', _) => {
118 self.context = TokenContext::StyleOverride;
119 self.scanner.scan_style_override()
120 }
121 ('}', TokenContext::StyleOverride) => {
122 self.context = TokenContext::Document;
123 self.scanner.navigator_mut().advance_char()?;
124 Ok(TokenType::OverrideClose)
125 }
126 (',', _) => {
127 self.scanner.navigator_mut().advance_char()?;
128 Ok(TokenType::Comma)
129 }
130 ('\n' | '\r', _) => {
131 self.context = self.context.reset_to_document();
132 self.scanner.navigator_mut().advance_char()?;
133 if current_char == '\r' && self.scanner.navigator_mut().peek_char()? == '\n' {
134 self.scanner.navigator_mut().advance_char()?;
135 }
136 Ok(TokenType::Newline)
137 }
138 (';', TokenContext::Document) => self.scanner.scan_comment(),
139 ('!', TokenContext::Document) => {
140 if self.scanner.navigator().peek_next() == Ok(':') {
142 self.scanner.scan_comment()
143 } else {
144 self.scanner.scan_text(self.context)
145 }
146 }
147 ('}', _) => {
149 self.scanner.navigator_mut().advance_char()?;
151 Ok(TokenType::Text)
152 }
153 (']', _) => {
154 self.scanner.navigator_mut().advance_char()?;
156 Ok(TokenType::Text)
157 }
158 _ => {
159 if self.context == TokenContext::FieldValue {
161 self.scanner.scan_field_value()
162 } else {
163 self.scanner.scan_text(self.context)
164 }
165 }
166 }?;
167
168 let end_pos = self.scanner.navigator().position();
169 let span = &self.source[start_pos..end_pos];
170
171 if start_pos == end_pos && !self.scanner.navigator().is_at_end() {
173 return Err(crate::utils::CoreError::internal(
174 "Tokenizer position not advancing",
175 ));
176 }
177
178 Ok(Some(Token {
179 token_type,
180 span,
181 line: start_line,
182 column: start_column,
183 }))
184 }
185
186 pub fn tokenize_all(&mut self) -> Result<Vec<Token<'a>>> {
192 let mut tokens = Vec::new();
193 let mut iteration_count = 0;
194 while let Some(token) = self.next_token()? {
195 tokens.push(token);
196 iteration_count += 1;
197 if iteration_count > 50 {
198 return Err(crate::utils::CoreError::internal(
199 "Too many tokenizer iterations",
200 ));
201 }
202 }
203
204 Ok(tokens)
205 }
206
207 #[must_use]
209 pub fn issues(&self) -> &[TokenIssue<'a>] {
210 self.issues.issues()
211 }
212
213 #[must_use]
215 pub const fn position(&self) -> usize {
216 self.scanner.navigator().position()
217 }
218
219 #[must_use]
221 pub const fn line(&self) -> usize {
222 self.scanner.navigator().line()
223 }
224
225 #[must_use]
227 pub const fn column(&self) -> usize {
228 self.scanner.navigator().column()
229 }
230
231 pub fn reset(&mut self) {
233 let initial_position = if self.source.starts_with('\u{FEFF}') {
234 3
235 } else {
236 0
237 };
238 self.scanner = TokenScanner::new(self.source, initial_position, 1, 1);
239 self.context = TokenContext::Document;
240 self.issues.clear();
241 }
242}
243
244#[cfg(test)]
245mod tests;
246
247#[cfg(test)]
248mod inline_tests {
249 use super::*;
250 #[cfg(not(feature = "std"))]
251 use alloc::string::ToString;
252 #[cfg(not(feature = "std"))]
253 use hashbrown::HashSet;
254 #[cfg(feature = "std")]
255 use std::collections::HashSet;
256
257 #[test]
258 fn tokenize_section_header() {
259 let mut tokenizer = AssTokenizer::new("[Script Info]");
260 let tokens = tokenizer.tokenize_all().unwrap();
261 assert_eq!(tokens.len(), 2);
262 assert_eq!(tokens[0].token_type, TokenType::SectionHeader);
263 assert_eq!(tokens[1].token_type, TokenType::SectionClose);
264 }
265
266 #[test]
267 fn tokenize_field_line() {
268 let mut tokenizer = AssTokenizer::new("Title: Test Script");
269 let tokens = tokenizer.tokenize_all().unwrap();
270 assert!(tokens.len() >= 3);
271 assert_eq!(tokens[1].token_type, TokenType::Colon);
272 }
273
274 #[test]
275 fn reset_tokenizer() {
276 let mut tokenizer = AssTokenizer::new("Test");
277 let _ = tokenizer.next_token().unwrap();
278 assert!(tokenizer.position() > 0);
279
280 tokenizer.reset();
281 assert_eq!(tokenizer.position(), 0);
282 assert_eq!(tokenizer.line(), 1);
283 }
284
285 #[test]
286 fn tokenize_with_bom() {
287 let mut tokenizer = AssTokenizer::new("\u{FEFF}[Script Info]");
288 let tokens = tokenizer.tokenize_all().unwrap();
289 assert_eq!(tokens.len(), 2);
290 assert_eq!(tokens[0].token_type, TokenType::SectionHeader);
291 }
292
293 #[test]
294 fn tokenize_style_override() {
295 let mut tokenizer = AssTokenizer::new("{\\b1}text{\\b0}");
296 let tokens = tokenizer.tokenize_all().unwrap();
297 assert!(tokens.len() >= 2);
298 let has_override = tokens.iter().any(|t| {
300 matches!(
301 t.token_type,
302 TokenType::OverrideBlock | TokenType::OverrideOpen | TokenType::OverrideClose
303 )
304 });
305 let has_text = tokens.iter().any(|t| t.token_type == TokenType::Text);
306 assert!(
307 has_override || has_text,
308 "Should have override or text tokens"
309 );
310 }
311
312 #[test]
313 fn tokenize_comma_delimiter() {
314 let mut tokenizer = AssTokenizer::new("field1,field2,field3");
315 let tokens = tokenizer.tokenize_all().unwrap();
316 assert!(tokens.iter().any(|t| t.token_type == TokenType::Comma));
317 }
318
319 #[test]
320 fn tokenize_newline_types() {
321 let mut tokenizer = AssTokenizer::new("line1\nline2\r\nline3");
322 let tokens = tokenizer.tokenize_all().unwrap();
323 let newline_count = tokens
324 .iter()
325 .filter(|t| t.token_type == TokenType::Newline)
326 .count();
327 assert!(newline_count >= 2);
328 }
329
330 #[test]
331 fn tokenize_comment_semicolon() {
332 let mut tokenizer = AssTokenizer::new("; This is a comment");
333 let tokens = tokenizer.tokenize_all().unwrap();
334 assert!(!tokens.is_empty());
335 assert_eq!(tokens[0].token_type, TokenType::Comment);
336 }
337
338 #[test]
339 fn tokenize_comment_exclamation() {
340 let mut tokenizer = AssTokenizer::new("!: This is a comment");
341 let tokens = tokenizer.tokenize_all().unwrap();
342 assert!(!tokens.is_empty());
343 assert_eq!(tokens[0].token_type, TokenType::Comment);
344 }
345
346 #[test]
347 fn tokenize_misplaced_delimiters() {
348 let mut tokenizer = AssTokenizer::new("text}more]text");
349 let tokens = tokenizer.tokenize_all().unwrap();
350 assert!(tokens.iter().any(|t| t.token_type == TokenType::Text));
351 }
352
353 #[test]
354 fn tokenize_field_value_context() {
355 let mut tokenizer = AssTokenizer::new("Key: Value with spaces");
356 let tokens = tokenizer.tokenize_all().unwrap();
357 let has_text = tokens
358 .iter()
359 .any(|t| matches!(t.token_type, TokenType::Text));
360 assert!(has_text);
361 }
362
363 #[test]
364 fn tokenize_exclamation_without_colon() {
365 let mut tokenizer = AssTokenizer::new("!not a comment");
366 let tokens = tokenizer.tokenize_all().unwrap();
367 assert!(tokens.iter().any(|t| t.token_type == TokenType::Text));
368 }
369
370 #[test]
371 fn tokenize_all_iteration_limit() {
372 let repeated_text = "a".repeat(100);
373 let mut tokenizer = AssTokenizer::new(&repeated_text);
374 let result = tokenizer.tokenize_all();
375 assert!(result.is_ok() || result.is_err());
377 }
378
379 #[test]
380 fn tokenizer_position_tracking() {
381 let mut tokenizer = AssTokenizer::new("Test\nLine 2");
382
383 let initial_pos = tokenizer.position();
384 let initial_line = tokenizer.line();
385 let initial_col = tokenizer.column();
386
387 assert_eq!(initial_pos, 0);
388 assert_eq!(initial_line, 1);
389 assert_eq!(initial_col, 1);
390
391 let _ = tokenizer.next_token().unwrap();
392 assert!(tokenizer.position() > initial_pos);
393 }
394
395 #[test]
396 fn tokenizer_issues_collection() {
397 let mut tokenizer = AssTokenizer::new("test content");
398 let _ = tokenizer.tokenize_all().unwrap();
399 let _issues = tokenizer.issues();
400 }
402
403 #[test]
404 fn tokenize_empty_input() {
405 let mut tokenizer = AssTokenizer::new("");
406 let result = tokenizer.next_token().unwrap();
407 assert!(result.is_none());
408 }
409
410 #[test]
411 fn tokenize_only_whitespace() {
412 let mut tokenizer = AssTokenizer::new(" \t ");
413 let result = tokenizer.next_token().unwrap();
414 assert!(result.is_none());
415 }
416
417 #[test]
418 fn tokenizer_infinite_loop_protection() {
419 let mut tokenizer = AssTokenizer::new("test");
421
422 let result = tokenizer.next_token();
424 assert!(result.is_ok());
425
426 assert!(tokenizer.position() > 0 || tokenizer.scanner.navigator().is_at_end());
428 }
429
430 #[test]
431 fn tokenizer_iteration_limit_exceeded() {
432 let long_content = "a ".repeat(30); let mut tokenizer = AssTokenizer::new(&long_content);
435 let result = tokenizer.tokenize_all();
436
437 match result {
439 Ok(tokens) => assert!(tokens.len() <= 50),
440 Err(e) => assert!(e.to_string().contains("Too many tokenizer iterations")),
441 }
442 }
443
444 #[test]
445 fn tokenizer_context_transitions_comprehensive() {
446 let mut tokenizer = AssTokenizer::new("[Section]:value{override}text\n");
447
448 assert_eq!(tokenizer.context, TokenContext::Document);
450
451 let token1 = tokenizer.next_token().unwrap().unwrap();
453 assert_eq!(token1.token_type, TokenType::SectionHeader);
454 assert_eq!(tokenizer.context, TokenContext::SectionHeader);
455
456 let token2 = tokenizer.next_token().unwrap().unwrap();
458 assert_eq!(token2.token_type, TokenType::SectionClose);
459 assert_eq!(tokenizer.context, TokenContext::Document);
460
461 let token3 = tokenizer.next_token().unwrap().unwrap();
463 assert_eq!(token3.token_type, TokenType::Colon);
464 assert_eq!(tokenizer.context, TokenContext::FieldValue);
465
466 let _remaining_tokens = tokenizer.tokenize_all().unwrap();
468 }
469
470 #[test]
471 fn tokenizer_delimiter_in_wrong_context() {
472 let mut tokenizer = AssTokenizer::new("}text");
474 let token = tokenizer.next_token().unwrap().unwrap();
475 assert_eq!(token.token_type, TokenType::Text);
476 assert_eq!(token.span, "}");
477
478 let mut tokenizer2 = AssTokenizer::new("]text");
480 let token2 = tokenizer2.next_token().unwrap().unwrap();
481 assert_eq!(token2.token_type, TokenType::Text);
482 assert_eq!(token2.span, "]");
483 }
484
485 #[test]
486 fn tokenizer_bom_edge_cases() {
487 let mut tokenizer = AssTokenizer::new("\u{FEFF}content");
489 assert_eq!(tokenizer.position(), 3); let _token = tokenizer.next_token().unwrap();
493 tokenizer.reset();
494 assert_eq!(tokenizer.position(), 3); assert_eq!(tokenizer.line(), 1);
496 assert_eq!(tokenizer.column(), 1);
497 assert_eq!(tokenizer.context, TokenContext::Document);
498 }
499
500 #[test]
501 fn tokenizer_carriage_return_line_feed() {
502 let mut tokenizer = AssTokenizer::new("line1\r\nline2");
503
504 let token1 = tokenizer.next_token().unwrap().unwrap();
506 assert_eq!(token1.token_type, TokenType::Text);
507
508 let token2 = tokenizer.next_token().unwrap().unwrap();
510 assert_eq!(token2.token_type, TokenType::Newline);
511 assert_eq!(tokenizer.context, TokenContext::Document); let token3 = tokenizer.next_token().unwrap().unwrap();
515 assert_eq!(token3.token_type, TokenType::Text);
516 assert_eq!(token3.span, "line2");
517 }
518
519 #[test]
520 fn tokenizer_exclamation_comment_detection() {
521 let mut tokenizer = AssTokenizer::new("!:comment");
523 let token = tokenizer.next_token().unwrap().unwrap();
524 assert_eq!(token.token_type, TokenType::Comment);
525
526 let mut tokenizer2 = AssTokenizer::new("!text");
528 let token2 = tokenizer2.next_token().unwrap().unwrap();
529 assert_eq!(token2.token_type, TokenType::Text);
530 }
531
532 #[test]
533 fn tokenizer_field_value_context_handling() {
534 let mut tokenizer = AssTokenizer::new("key:value with spaces,next");
535
536 let token1 = tokenizer.next_token().unwrap().unwrap();
538 assert_eq!(token1.token_type, TokenType::Text);
539
540 let token2 = tokenizer.next_token().unwrap().unwrap();
542 assert_eq!(token2.token_type, TokenType::Colon);
543 assert_eq!(tokenizer.context, TokenContext::FieldValue);
544
545 let token3 = tokenizer.next_token().unwrap().unwrap();
547 assert!(matches!(
549 token3.token_type,
550 TokenType::Text | TokenType::Number | TokenType::HexValue
551 ));
552 }
553
554 #[test]
555 fn tokenizer_position_line_column_tracking() {
556 let mut tokenizer = AssTokenizer::new("first\nsecond\nthird");
557
558 assert_eq!(tokenizer.position(), 0);
560 assert_eq!(tokenizer.line(), 1);
561 assert_eq!(tokenizer.column(), 1);
562
563 let _token1 = tokenizer.next_token().unwrap().unwrap();
565 let pos1 = tokenizer.position();
566 let line1 = tokenizer.line();
567 let _col1 = tokenizer.column();
568
569 let _token2 = tokenizer.next_token().unwrap().unwrap(); assert!(tokenizer.line() > line1); let _token3 = tokenizer.next_token().unwrap().unwrap();
575 assert!(tokenizer.position() > pos1);
576 }
577
578 #[test]
579 fn tokenizer_all_delimiter_types() {
580 let mut tokenizer = AssTokenizer::new("[section]:value,field{override}text\n");
581 let tokens = tokenizer.tokenize_all().unwrap();
582
583 let types: HashSet<_> = tokens.iter().map(|t| &t.token_type).collect();
585
586 assert!(types.len() > 1);
588 assert!(
589 types.contains(&TokenType::SectionHeader) || types.contains(&TokenType::SectionOpen)
590 );
591 assert!(types.contains(&TokenType::Colon));
592 assert!(types.contains(&TokenType::Comma));
593 }
594
595 #[test]
596 fn tokenizer_empty_reset_state() {
597 let mut tokenizer = AssTokenizer::new("");
598
599 let result = tokenizer.next_token().unwrap();
601 assert!(result.is_none());
602
603 tokenizer.reset();
605 assert_eq!(tokenizer.position(), 0);
606 assert_eq!(tokenizer.line(), 1);
607 assert_eq!(tokenizer.column(), 1);
608 }
609
610 #[test]
611 fn tokenizer_whitespace_handling_contexts() {
612 let mut tokenizer = AssTokenizer::new(" [ section ] ");
614
615 let token1 = tokenizer.next_token().unwrap().unwrap();
617 assert!(matches!(
618 token1.token_type,
619 TokenType::SectionHeader | TokenType::SectionOpen
620 ));
621
622 let _remaining = tokenizer.tokenize_all().unwrap();
624 }
625
626 #[test]
627 fn tokenizer_issue_collection_access() {
628 let mut tokenizer = AssTokenizer::new("valid content");
629
630 assert!(tokenizer.issues().is_empty());
632
633 let _tokens = tokenizer.tokenize_all().unwrap();
635 let _issues = tokenizer.issues(); tokenizer.reset();
639 assert!(tokenizer.issues().is_empty());
640 }
641
642 #[test]
643 fn tokenizer_scanner_navigation_access() {
644 let mut tokenizer = AssTokenizer::new("test content");
645
646 let initial_pos = tokenizer.position();
648 let initial_line = tokenizer.line();
649 let initial_col = tokenizer.column();
650
651 assert_eq!(initial_pos, 0);
652 assert_eq!(initial_line, 1);
653 assert_eq!(initial_col, 1);
654
655 let _token = tokenizer.next_token().unwrap();
657 let _new_pos = tokenizer.position();
658 let _new_line = tokenizer.line();
659 let _new_col = tokenizer.column();
660 }
661
662 #[test]
663 fn tokenizer_mixed_context_characters() {
664 let mut tokenizer = AssTokenizer::new("text{override[section]:value}more");
666 let tokens = tokenizer.tokenize_all().unwrap();
667
668 assert!(!tokens.is_empty());
670
671 assert!(tokens.iter().any(|t| t.token_type == TokenType::Text));
673 }
674
675 #[test]
676 fn tokenizer_semicolon_comment_in_document_context() {
677 let mut tokenizer = AssTokenizer::new("; comment in document context");
678
679 let token = tokenizer.next_token().unwrap().unwrap();
681 assert_eq!(token.token_type, TokenType::Comment);
682 }
683
684 #[test]
685 fn tokenizer_no_bom_content() {
686 let mut tokenizer = AssTokenizer::new("content without BOM");
687 assert_eq!(tokenizer.position(), 0); let _token = tokenizer.next_token().unwrap();
690 assert!(tokenizer.position() > 0);
691 }
692
693 #[test]
694 fn tokenizer_infinite_loop_protection_error() {
695 let source = "invalid_char\x00";
697 let mut tokenizer = AssTokenizer::new(source);
698
699 match tokenizer.next_token() {
701 Ok(_) | Err(_) => {
702 assert!(
704 tokenizer.position() < source.len() || tokenizer.position() == source.len()
705 );
706 }
707 }
708 }
709
710 #[test]
711 fn tokenizer_position_line_column_advancement() {
712 let source = "[Section]\nKey=Value\n! Comment";
713 let mut tokenizer = AssTokenizer::new(source);
714
715 let mut last_pos = 0;
717 let mut tokens = Vec::new();
718
719 while let Ok(Some(token)) = tokenizer.next_token() {
720 let current_pos = tokenizer.position();
722 if !tokenizer.scanner.navigator().is_at_end() {
723 assert!(current_pos > last_pos, "Position must advance");
724 }
725
726 assert!(token.line >= 1);
728 assert!(token.column >= 1);
729
730 tokens.push(token);
731 last_pos = current_pos;
732
733 if tokens.len() > 20 {
735 break;
736 }
737 }
738
739 assert!(!tokens.is_empty());
740 }
741
742 #[test]
743 fn tokenizer_span_creation_and_boundaries() {
744 let source = "[Test]\nField=Value123";
745 let mut tokenizer = AssTokenizer::new(source);
746
747 while let Ok(Some(token)) = tokenizer.next_token() {
748 assert!(
750 !token.span.is_empty()
751 || token.token_type == crate::tokenizer::tokens::TokenType::Comment
752 );
753 assert!(token.span.len() <= source.len());
754
755 let start_pos = token.span.as_ptr() as usize - source.as_ptr() as usize;
757 assert!(start_pos < source.len());
758 }
759 }
760
761 #[test]
762 fn tokenizer_iteration_limit_comprehensive() {
763 let source = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,1,2,3,4,5,6,7,8,9,0,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z";
765 let mut tokenizer = AssTokenizer::new(source);
766
767 let result = tokenizer.tokenize_all();
769
770 if let Ok(tokens) = result {
772 assert!(tokens.len() <= 50, "Should respect iteration limit");
774 } else {
775 }
777 }
778
779 #[test]
780 fn tokenizer_all_error_recovery() {
781 let source = "Valid[Section]\n\x00InvalidChar\nKey=Value";
782 let mut tokenizer = AssTokenizer::new(source);
783
784 let result = tokenizer.tokenize_all();
785
786 match result {
788 Ok(tokens) => {
789 assert!(!tokens.is_empty());
790 }
792 Err(_) => {
793 assert!(!tokenizer.issues().is_empty());
795 }
796 }
797 }
798
799 #[test]
800 fn tokenizer_empty_source_boundaries() {
801 let source = "";
802 let mut tokenizer = AssTokenizer::new(source);
803
804 assert_eq!(tokenizer.position(), 0);
806 assert_eq!(tokenizer.line(), 1);
807 assert_eq!(tokenizer.column(), 1);
808
809 let result = tokenizer.next_token();
810 assert!(result.is_ok());
811 assert!(result.unwrap().is_none());
812 }
813
814 #[test]
815 fn tokenizer_single_character_advancement() {
816 let source = "a";
817 let mut tokenizer = AssTokenizer::new(source);
818
819 let start_pos = tokenizer.position();
820 if let Ok(Some(token)) = tokenizer.next_token() {
821 let end_pos = tokenizer.position();
822 assert!(end_pos > start_pos);
823 assert_eq!(token.span, "a");
824 }
825 }
826
827 #[test]
828 fn tokenizer_multi_byte_character_advancement() {
829 let source = "🎵音楽";
830 let mut tokenizer = AssTokenizer::new(source);
831
832 let mut positions = Vec::new();
833 positions.push(tokenizer.position());
834
835 while let Ok(Some(_)) = tokenizer.next_token() {
836 positions.push(tokenizer.position());
837 if positions.len() > 10 {
838 break; }
840 }
841
842 for window in positions.windows(2) {
844 if window[1] != window[0] {
845 assert!(window[1] > window[0]);
846 }
847 }
848 }
849
850 #[test]
851 fn tokenizer_token_push_verification() {
852 let source = "Key1=Value1\nKey2=Value2";
853 let mut tokenizer = AssTokenizer::new(source);
854
855 let tokens = tokenizer.tokenize_all().unwrap_or_default();
856
857 assert!(!tokens.is_empty());
859
860 for token in &tokens {
862 assert!(
863 !token.span.is_empty()
864 || token.token_type == crate::tokenizer::tokens::TokenType::Comment
865 );
866 }
867 }
868
869 #[test]
870 fn tokenizer_context_based_token_creation() {
871 let source = "{\\b1}Bold text{\\b0}";
872 let mut tokenizer = AssTokenizer::new(source);
873
874 let mut token_count = 0;
875 while let Ok(Some(token)) = tokenizer.next_token() {
876 assert!(token.line >= 1);
878 assert!(token.column >= 1);
879 assert!(!token.span.is_empty());
880
881 token_count += 1;
882 if token_count > 15 {
883 break;
884 }
885 }
886
887 assert!(token_count > 0);
888 }
889
890 #[test]
891 fn tokenizer_section_header_start_tracking() {
892 let source = "[Script Info]";
894 let mut tokenizer = AssTokenizer::new(source);
895
896 let token = tokenizer.next_token().unwrap().unwrap();
898 assert_eq!(token.line, 1);
899 assert_eq!(token.column, 1);
900 }
901
902 #[test]
903 fn tokenizer_section_close_bracket() {
904 let source = "[Test]";
906 let mut tokenizer = AssTokenizer::new(source);
907
908 let _header = tokenizer.next_token().unwrap().unwrap();
910 let close = tokenizer.next_token().unwrap().unwrap();
912 assert_eq!(
913 close.token_type,
914 crate::tokenizer::tokens::TokenType::SectionClose
915 );
916 }
917
918 #[test]
919 fn tokenizer_colon_field_separator() {
920 let source = "Key:Value";
922 let mut tokenizer = AssTokenizer::new(source);
923
924 let _key = tokenizer.next_token().unwrap().unwrap();
926 let colon = tokenizer.next_token().unwrap().unwrap();
928 assert_eq!(colon.token_type, crate::tokenizer::tokens::TokenType::Colon);
929 }
930
931 #[test]
932 fn tokenizer_comma_separator() {
933 let source = "val1,val2";
935 let mut tokenizer = AssTokenizer::new(source);
936
937 let _val1 = tokenizer.next_token().unwrap().unwrap();
938 let comma = tokenizer.next_token().unwrap().unwrap();
939 assert_eq!(comma.token_type, crate::tokenizer::tokens::TokenType::Comma);
940 }
941
942 #[test]
943 fn tokenizer_newline_handling() {
944 let source = "line1\nline2\r\nline3";
946 let mut tokenizer = AssTokenizer::new(source);
947
948 let _line1 = tokenizer.next_token().unwrap().unwrap();
949 let newline1 = tokenizer.next_token().unwrap().unwrap();
950 assert_eq!(
951 newline1.token_type,
952 crate::tokenizer::tokens::TokenType::Newline
953 );
954 }
955
956 #[test]
957 fn tokenizer_style_override_tokens() {
958 let source = "{\\b1}text{\\b0}";
960 let mut tokenizer = AssTokenizer::new(source);
961
962 let override_block = tokenizer.next_token().unwrap().unwrap();
963 assert_eq!(
964 override_block.token_type,
965 crate::tokenizer::tokens::TokenType::OverrideBlock
966 );
967 }
968
969 #[test]
970 fn tokenizer_comment_exclamation() {
971 let source = "!: This is a comment";
973 let mut tokenizer = AssTokenizer::new(source);
974
975 let comment = tokenizer.next_token().unwrap().unwrap();
976 assert_eq!(
977 comment.token_type,
978 crate::tokenizer::tokens::TokenType::Comment
979 );
980 }
981
982 #[test]
983 fn tokenizer_comment_semicolon() {
984 let source = "; This is a comment";
986 let mut tokenizer = AssTokenizer::new(source);
987
988 let comment = tokenizer.next_token().unwrap().unwrap();
989 assert_eq!(
990 comment.token_type,
991 crate::tokenizer::tokens::TokenType::Comment
992 );
993 }
994
995 #[test]
996 fn tokenizer_whitespace_token() {
997 let source = "Key: \t ";
1000 let mut tokenizer = AssTokenizer::new(source);
1001
1002 let _key = tokenizer.next_token().unwrap().unwrap();
1004 let _colon = tokenizer.next_token().unwrap().unwrap();
1006 let whitespace = tokenizer.next_token().unwrap().unwrap();
1008 assert_eq!(
1009 whitespace.token_type,
1010 crate::tokenizer::tokens::TokenType::Whitespace
1011 );
1012 }
1013
1014 #[test]
1015 fn tokenizer_text_fallback() {
1016 let source = "regular_text_123";
1018 let mut tokenizer = AssTokenizer::new(source);
1019
1020 let text = tokenizer.next_token().unwrap().unwrap();
1021 assert_eq!(text.token_type, crate::tokenizer::tokens::TokenType::Text);
1022 }
1023
1024 #[test]
1025 fn tokenizer_infinite_loop_error_path() {
1026 let source = "test";
1028 let mut tokenizer = AssTokenizer::new(source);
1029
1030 let result = tokenizer.next_token();
1033 assert!(result.is_ok());
1034 }
1035
1036 #[test]
1037 fn tokenizer_span_creation_path() {
1038 let source = "test";
1040 let mut tokenizer = AssTokenizer::new(source);
1041
1042 let token = tokenizer.next_token().unwrap().unwrap();
1043 assert_eq!(token.span, "test");
1044 assert_eq!(token.line, 1);
1045 assert_eq!(token.column, 1);
1046 }
1047
1048 #[test]
1049 fn tokenizer_end_of_input_handling() {
1050 let source = "";
1052 let mut tokenizer = AssTokenizer::new(source);
1053
1054 let result = tokenizer.next_token().unwrap();
1055 assert!(result.is_none());
1056 }
1057
1058 #[test]
1059 fn tokenizer_all_error_propagation() {
1060 let source = "valid_content";
1062 let mut tokenizer = AssTokenizer::new(source);
1063
1064 let tokens = tokenizer.tokenize_all().unwrap();
1065 assert!(!tokens.is_empty());
1066 }
1067
1068 #[test]
1069 fn tokenizer_carriage_return_handling() {
1070 let source = "line1\rline2";
1072 let mut tokenizer = AssTokenizer::new(source);
1073
1074 let _line1 = tokenizer.next_token().unwrap().unwrap();
1075 let newline = tokenizer.next_token().unwrap().unwrap();
1076 assert_eq!(
1077 newline.token_type,
1078 crate::tokenizer::tokens::TokenType::Newline
1079 );
1080
1081 let _line2 = tokenizer.next_token().unwrap().unwrap();
1082 assert_eq!(tokenizer.line(), 2);
1083 }
1084}