1use crate::{utils::CoreError, Result};
7use alloc::{format, string::ToString};
8use core::str::Chars;
9
10#[cfg(not(feature = "std"))]
11extern crate alloc;
12#[cfg(feature = "simd")]
13use super::simd;
14use super::{state::TokenContext, tokens::TokenType};
15
16#[derive(Debug, Clone)]
21pub struct CharNavigator<'a> {
22 source: &'a str,
24 position: usize,
26 line: usize,
28 column: usize,
30 chars: Chars<'a>,
32 peek_char: Option<char>,
34 last_char: Option<char>,
36}
37
38impl<'a> CharNavigator<'a> {
39 #[must_use]
41 pub fn new(source: &'a str, position: usize, line: usize, column: usize) -> Self {
42 Self {
43 source,
44 position,
45 line,
46 column,
47 chars: source[position..].chars(),
48 peek_char: None,
49 last_char: None,
50 }
51 }
52
53 #[must_use]
55 pub const fn position(&self) -> usize {
56 self.position
57 }
58
59 #[must_use]
61 pub const fn line(&self) -> usize {
62 self.line
63 }
64
65 #[must_use]
67 pub const fn column(&self) -> usize {
68 self.column
69 }
70
71 pub fn peek_char(&mut self) -> Result<char> {
77 if let Some(ch) = self.peek_char {
78 Ok(ch)
79 } else if self.position < self.source.len() {
80 let ch = self.source[self.position..].chars().next().ok_or_else(|| {
81 CoreError::parse(format!("Invalid UTF-8 at position {}", self.position))
82 })?;
83 self.peek_char = Some(ch);
84 Ok(ch)
85 } else {
86 Err(CoreError::parse("Unexpected end of input".to_string()))
87 }
88 }
89
90 pub fn peek_next(&self) -> Result<char> {
96 let mut chars = self.source[self.position..].chars();
97 chars.next(); chars
99 .next()
100 .ok_or_else(|| CoreError::parse("Unexpected end of input".to_string()))
101 }
102
103 pub fn advance_char(&mut self) -> Result<char> {
109 let ch = self.peek_char()?;
110 self.peek_char = None;
111
112 let _ = self.chars.next();
113 self.position += ch.len_utf8();
114
115 match ch {
116 '\r' => {
117 self.line += 1;
118 self.column = 1;
119 }
120 '\n' => {
121 if self.last_char != Some('\r') {
123 self.line += 1;
124 }
125 self.column = 1;
126 }
127 _ => {
128 self.column += 1;
129 }
130 }
131
132 self.last_char = Some(ch);
133 Ok(ch)
134 }
135
136 pub fn skip_whitespace(&mut self) {
138 while self.position < self.source.len() {
139 if let Ok(ch) = self.peek_char() {
140 if ch.is_whitespace() && ch != '\n' && ch != '\r' {
141 let _ = self.advance_char();
142 } else {
143 break;
144 }
145 } else {
146 break;
147 }
148 }
149 }
150
151 #[must_use]
153 pub const fn is_at_end(&self) -> bool {
154 self.position >= self.source.len()
155 }
156}
157
158#[derive(Debug, Clone)]
160pub struct TokenScanner<'a> {
161 navigator: CharNavigator<'a>,
163 source: &'a str,
165}
166
167impl<'a> TokenScanner<'a> {
168 #[must_use]
170 pub fn new(source: &'a str, position: usize, line: usize, column: usize) -> Self {
171 Self {
172 navigator: CharNavigator::new(source, position, line, column),
173 source,
174 }
175 }
176
177 pub fn navigator_mut(&mut self) -> &mut CharNavigator<'a> {
179 &mut self.navigator
180 }
181
182 #[must_use]
184 pub const fn navigator(&self) -> &CharNavigator<'a> {
185 &self.navigator
186 }
187
188 pub fn scan_section_header(&mut self) -> Result<TokenType> {
194 self.navigator.advance_char()?; while !self.navigator.is_at_end() {
197 let ch = self.navigator.peek_char()?;
198 if ch == ']' {
199 break;
200 }
201 self.navigator.advance_char()?;
202 }
203
204 Ok(TokenType::SectionHeader)
205 }
206
207 pub fn scan_style_override(&mut self) -> Result<TokenType> {
213 self.navigator.advance_char()?; let mut brace_depth = 1;
216 while !self.navigator.is_at_end() && brace_depth > 0 {
217 let ch = self.navigator.peek_char()?;
218 match ch {
219 '{' => brace_depth += 1,
220 '}' => brace_depth -= 1,
221 _ => {}
222 }
223
224 if brace_depth > 0 {
225 self.navigator.advance_char()?;
226 }
227 }
228
229 Ok(TokenType::OverrideBlock)
230 }
231
232 pub fn scan_comment(&mut self) -> Result<TokenType> {
238 while !self.navigator.is_at_end() {
239 let ch = self.navigator.peek_char()?;
240 if ch == '\n' || ch == '\r' {
241 break;
242 }
243 self.navigator.advance_char()?;
244 }
245
246 Ok(TokenType::Comment)
247 }
248
249 pub fn scan_text(&mut self, context: TokenContext) -> Result<TokenType> {
255 let start = self.navigator.position();
256
257 #[cfg(feature = "simd")]
259 {
260 let use_simd = !matches!(context, TokenContext::FieldValue);
262
263 if use_simd {
264 if let Some(delimiter_pos) = self.scan_delimiters_simd(start) {
265 self.navigator.position = delimiter_pos;
266 } else {
267 self.navigator.position = self.source.len();
268 }
269 self.navigator.chars = self.source[self.navigator.position..].chars();
270 self.navigator.peek_char = None;
271 }
272 }
273
274 #[cfg(not(feature = "simd"))]
276 let use_scalar = true;
277 #[cfg(feature = "simd")]
278 let use_scalar = matches!(context, TokenContext::FieldValue);
279
280 if use_scalar {
281 while !self.navigator.is_at_end() {
282 let ch = self.navigator.peek_char()?;
283
284 let is_delimiter = match context {
286 TokenContext::FieldValue => {
287 matches!(ch, ',' | '{' | '}' | '[' | ']' | '\n' | '\r')
289 }
290 _ => {
291 matches!(ch, ',' | ':' | '{' | '}' | '[' | ']' | '\n' | '\r')
293 || (ch == ';' && context == TokenContext::Document)
294 }
295 };
296
297 if is_delimiter {
298 break;
299 }
300
301 self.navigator.advance_char()?;
302 }
303 }
304
305 let span = &self.source[start..self.navigator.position()];
306
307 if context == TokenContext::SectionHeader {
308 Ok(TokenType::SectionName)
309 } else if Self::is_hex_value(span) {
310 Ok(TokenType::HexValue)
311 } else if !span.is_empty()
312 && span
313 .chars()
314 .all(|c| c.is_ascii_digit() || c == '.' || c == '-')
315 {
316 Ok(TokenType::Number)
317 } else if !span.is_empty() && span.chars().all(char::is_whitespace) {
318 Ok(TokenType::Whitespace)
319 } else {
320 Ok(TokenType::Text)
321 }
322 }
323
324 fn is_hex_value(span: &str) -> bool {
326 if let Some(after_prefix) = span.strip_prefix("&H") {
328 let hex_part = after_prefix
329 .strip_suffix('&')
330 .map_or(after_prefix, |stripped| stripped);
331
332 if !hex_part.is_empty()
333 && hex_part.len() % 2 == 0
334 && hex_part.len() <= 8
335 && hex_part.chars().all(|c| c.is_ascii_hexdigit())
336 {
337 #[cfg(feature = "simd")]
338 {
339 return TokenScanner::parse_hex_simd(hex_part).is_some();
340 }
341 #[cfg(not(feature = "simd"))]
342 {
343 return true;
344 }
345 }
346 }
347
348 false
352 }
353
354 #[cfg(feature = "simd")]
356 fn scan_delimiters_simd(&self, start: usize) -> Option<usize> {
357 simd::scan_delimiters(&self.source[start..]).map(|offset| start + offset)
358 }
359
360 #[cfg(feature = "simd")]
362 fn parse_hex_simd(hex_str: &str) -> Option<u32> {
363 simd::parse_hex_u32(hex_str)
364 }
365
366 pub fn scan_field_value(&mut self) -> Result<TokenType> {
375 let start = self.navigator.position();
376
377 while !self.navigator.is_at_end() {
378 let ch = self.navigator.peek_char()?;
379
380 if ch == ',' || ch == '\n' || ch == '\r' || ch == '{' || ch == '[' {
382 break;
383 }
384
385 self.navigator.advance_char()?;
386 }
387
388 let span = &self.source[start..self.navigator.position()];
389
390 if !span.is_empty()
391 && span
392 .chars()
393 .all(|c| c.is_ascii_digit() || c == '.' || c == '-' || c == ':')
394 {
395 Ok(TokenType::Number)
396 } else if !span.is_empty() && span.chars().all(char::is_whitespace) {
397 Ok(TokenType::Whitespace)
398 } else {
399 Ok(TokenType::Text)
400 }
401 }
402}
403
404#[cfg(test)]
405mod tests {
406 use super::*;
407 #[cfg(not(feature = "std"))]
408 use alloc::vec;
409
410 #[test]
411 fn char_navigator_new() {
412 let source = "test content";
413 let nav = CharNavigator::new(source, 5, 2, 3);
414 assert_eq!(nav.position(), 5);
415 assert_eq!(nav.line(), 2);
416 assert_eq!(nav.column(), 3);
417 }
418
419 #[test]
420 fn char_navigator_peek_char() {
421 let source = "hello";
422 let mut nav = CharNavigator::new(source, 0, 1, 1);
423 assert_eq!(nav.peek_char().unwrap(), 'h');
424 assert_eq!(nav.peek_char().unwrap(), 'h'); assert_eq!(nav.position(), 0);
426 }
427
428 #[test]
429 fn char_navigator_peek_next() {
430 let source = "hello";
431 let nav = CharNavigator::new(source, 0, 1, 1);
432 assert_eq!(nav.peek_next().unwrap(), 'e');
433 }
434
435 #[test]
436 fn char_navigator_advance_char() {
437 let source = "hello";
438 let mut nav = CharNavigator::new(source, 0, 1, 1);
439 assert_eq!(nav.advance_char().unwrap(), 'h');
440 assert_eq!(nav.position(), 1);
441 assert_eq!(nav.column(), 2);
442 }
443
444 #[test]
445 fn char_navigator_advance_newline() {
446 let source = "line1\nline2";
447 let mut nav = CharNavigator::new(source, 0, 1, 1);
448 for _ in 0..5 {
450 nav.advance_char().unwrap();
451 }
452 assert_eq!(nav.advance_char().unwrap(), '\n');
453 assert_eq!(nav.line(), 2);
454 assert_eq!(nav.column(), 1);
455 }
456
457 #[test]
458 fn char_navigator_advance_carriage_return() {
459 let source = "line1\rline2";
460 let mut nav = CharNavigator::new(source, 0, 1, 1);
461 for _ in 0..5 {
463 nav.advance_char().unwrap();
464 }
465 assert_eq!(nav.advance_char().unwrap(), '\r');
466 assert_eq!(nav.line(), 2);
467 assert_eq!(nav.column(), 1);
468 }
469
470 #[test]
471 fn char_navigator_advance_crlf() {
472 let source = "line1\r\nline2";
473 let mut nav = CharNavigator::new(source, 0, 1, 1);
474 for _ in 0..5 {
476 nav.advance_char().unwrap();
477 }
478 assert_eq!(nav.advance_char().unwrap(), '\r');
479 assert_eq!(nav.line(), 2);
480 assert_eq!(nav.advance_char().unwrap(), '\n');
482 assert_eq!(nav.line(), 2); assert_eq!(nav.column(), 1);
484 }
485
486 #[test]
487 fn char_navigator_skip_whitespace() {
488 let source = " \t hello";
489 let mut nav = CharNavigator::new(source, 0, 1, 1);
490 nav.skip_whitespace();
491 assert_eq!(nav.peek_char().unwrap(), 'h');
492 }
493
494 #[test]
495 fn char_navigator_skip_whitespace_preserves_newlines() {
496 let source = " \n hello";
497 let mut nav = CharNavigator::new(source, 0, 1, 1);
498 nav.skip_whitespace();
499 assert_eq!(nav.peek_char().unwrap(), '\n');
500 }
501
502 #[test]
503 fn char_navigator_is_at_end() {
504 let source = "hi";
505 let nav = CharNavigator::new(source, 2, 1, 1);
506 assert!(nav.is_at_end());
507
508 let nav2 = CharNavigator::new(source, 0, 1, 1);
509 assert!(!nav2.is_at_end());
510 }
511
512 #[test]
513 fn char_navigator_peek_char_at_end() {
514 let source = "hi";
515 let mut nav = CharNavigator::new(source, 2, 1, 1);
516 assert!(nav.peek_char().is_err());
517 }
518
519 #[test]
520 fn char_navigator_peek_next_at_end() {
521 let source = "h";
522 let nav = CharNavigator::new(source, 0, 1, 1);
523 assert!(nav.peek_next().is_err());
524 }
525
526 #[test]
527 fn token_scanner_new() {
528 let source = "test content";
529 let scanner = TokenScanner::new(source, 5, 2, 3);
530 assert_eq!(scanner.navigator().position(), 5);
531 assert_eq!(scanner.navigator().line(), 2);
532 assert_eq!(scanner.navigator().column(), 3);
533 }
534
535 #[test]
536 fn token_scanner_scan_section_header() {
537 let source = "[Script Info]";
538 let mut scanner = TokenScanner::new(source, 0, 1, 1);
539 let token_type = scanner.scan_section_header().unwrap();
540 assert_eq!(token_type, TokenType::SectionHeader);
541 }
542
543 #[test]
544 fn token_scanner_scan_style_override() {
545 let source = "{\\b1\\i1}";
546 let mut scanner = TokenScanner::new(source, 0, 1, 1);
547 let token_type = scanner.scan_style_override().unwrap();
548 assert_eq!(token_type, TokenType::OverrideBlock);
549 }
550
551 #[test]
552 fn token_scanner_scan_style_override_nested() {
553 let source = "{\\b1{\\i1}\\b0}";
554 let mut scanner = TokenScanner::new(source, 0, 1, 1);
555 let token_type = scanner.scan_style_override().unwrap();
556 assert_eq!(token_type, TokenType::OverrideBlock);
557 }
558
559 #[test]
560 fn token_scanner_scan_comment() {
561 let source = "; This is a comment\nNext line";
562 let mut scanner = TokenScanner::new(source, 0, 1, 1);
563 let token_type = scanner.scan_comment().unwrap();
564 assert_eq!(token_type, TokenType::Comment);
565 }
566
567 #[test]
568 fn token_scanner_scan_text_basic() {
569 let source = "Hello World,";
570 let mut scanner = TokenScanner::new(source, 0, 1, 1);
571 let token_type = scanner.scan_text(TokenContext::Document).unwrap();
572 assert_eq!(token_type, TokenType::Text);
573 }
574
575 #[test]
576 fn token_scanner_scan_text_number() {
577 let source = "123.45,";
578 let mut scanner = TokenScanner::new(source, 0, 1, 1);
579 let token_type = scanner.scan_text(TokenContext::Document).unwrap();
580 assert_eq!(token_type, TokenType::Number);
581 }
582
583 #[test]
584 fn token_scanner_scan_text_hex_value() {
585 let source = "&HABCDEF&,";
586 let mut scanner = TokenScanner::new(source, 0, 1, 1);
587 let token_type = scanner.scan_text(TokenContext::Document).unwrap();
588 assert_eq!(token_type, TokenType::HexValue);
589 }
590
591 #[test]
592 fn token_scanner_scan_text_section_name() {
593 let source = "Script Info]";
594 let mut scanner = TokenScanner::new(source, 0, 1, 1);
595 let token_type = scanner.scan_text(TokenContext::SectionHeader).unwrap();
596 assert_eq!(token_type, TokenType::SectionName);
597 }
598
599 #[test]
600 fn token_scanner_scan_text_field_value_context() {
601 let source = "0:01:23.45,";
602 let mut scanner = TokenScanner::new(source, 0, 1, 1);
603 let token_type = scanner.scan_text(TokenContext::FieldValue).unwrap();
604 assert_eq!(token_type, TokenType::Text);
605 }
606
607 #[test]
608 fn token_scanner_scan_field_value() {
609 let source = "Some field value,";
610 let mut scanner = TokenScanner::new(source, 0, 1, 1);
611 let token_type = scanner.scan_field_value().unwrap();
612 assert_eq!(token_type, TokenType::Text);
613 }
614
615 #[test]
616 fn token_scanner_scan_field_value_number() {
617 let source = "0:01:23.45,";
618 let mut scanner = TokenScanner::new(source, 0, 1, 1);
619 let token_type = scanner.scan_field_value().unwrap();
620 assert_eq!(token_type, TokenType::Number);
621 }
622
623 #[test]
624 fn token_scanner_is_hex_value_simple() {
625 assert!(!TokenScanner::is_hex_value("ABCD"));
627 assert!(!TokenScanner::is_hex_value("1234"));
628
629 assert!(TokenScanner::is_hex_value("&HABCD&"));
631 assert!(TokenScanner::is_hex_value("&H1234&"));
632 assert!(!TokenScanner::is_hex_value("&HABCDE&")); assert!(!TokenScanner::is_hex_value("&HGHIJ&")); assert!(!TokenScanner::is_hex_value("")); }
636
637 #[test]
638 fn token_scanner_is_hex_value_with_prefix() {
639 assert!(TokenScanner::is_hex_value("&HFF00FF&"));
640 assert!(TokenScanner::is_hex_value("&HFF00FF"));
641 assert!(!TokenScanner::is_hex_value("&H&")); assert!(!TokenScanner::is_hex_value("&HGHIJ&")); }
644
645 #[test]
646 fn token_scanner_is_hex_value_max_length() {
647 assert!(!TokenScanner::is_hex_value("ABCDEF")); assert!(!TokenScanner::is_hex_value("00FF00FF")); assert!(!TokenScanner::is_hex_value("1234567890")); assert!(!TokenScanner::is_hex_value(&"A".repeat(100))); assert!(TokenScanner::is_hex_value("&H00FF00FF&")); assert!(TokenScanner::is_hex_value("&HABCD&")); assert!(!TokenScanner::is_hex_value("&H1234567890&")); assert!(!TokenScanner::is_hex_value("00")); assert!(!TokenScanner::is_hex_value("123abc")); }
663
664 #[test]
665 fn token_scanner_hex_value_trailing_ampersand_variants() {
666 assert!(TokenScanner::is_hex_value("&H00FFFFFF&"));
668 assert!(TokenScanner::is_hex_value("&HFF0000&"));
669 assert!(TokenScanner::is_hex_value("&H80FF00FF&"));
670
671 assert!(TokenScanner::is_hex_value("&H00FFFFFF"));
673 assert!(TokenScanner::is_hex_value("&HFF0000"));
674 assert!(TokenScanner::is_hex_value("&H80FF00FF"));
675
676 assert!(TokenScanner::is_hex_value("&H00&"));
678 assert!(TokenScanner::is_hex_value("&H00"));
679 assert!(!TokenScanner::is_hex_value("&H&")); assert!(!TokenScanner::is_hex_value("&H")); }
682
683 #[test]
684 fn token_scanner_scan_text_hex_value_ampersand_variants() {
685 let source1 = "&H00FFFFFF&";
687 let mut scanner1 = TokenScanner::new(source1, 0, 1, 1);
688 let token_type1 = scanner1.scan_text(TokenContext::Document).unwrap();
689 assert_eq!(token_type1, TokenType::HexValue);
690 assert_eq!(scanner1.navigator().position(), source1.len());
691
692 let source2 = "&H00FFFFFF";
694 let mut scanner2 = TokenScanner::new(source2, 0, 1, 1);
695 let token_type2 = scanner2.scan_text(TokenContext::Document).unwrap();
696 assert_eq!(token_type2, TokenType::HexValue);
697 assert_eq!(scanner2.navigator().position(), source2.len());
698
699 let source3 = "&HFF00&";
701 let mut scanner3 = TokenScanner::new(source3, 0, 1, 1);
702 let token_type3 = scanner3.scan_text(TokenContext::Document).unwrap();
703 assert_eq!(token_type3, TokenType::HexValue);
704
705 let source4 = "&HFF00";
706 let mut scanner4 = TokenScanner::new(source4, 0, 1, 1);
707 let token_type4 = scanner4.scan_text(TokenContext::Document).unwrap();
708 assert_eq!(token_type4, TokenType::HexValue);
709 }
710
711 #[test]
712 fn token_scanner_delimiter_context_field_value() {
713 let source = "Title: My Script";
714 let mut scanner = TokenScanner::new(source, 7, 1, 8); let token_type = scanner.scan_text(TokenContext::FieldValue).unwrap();
716 assert_eq!(token_type, TokenType::Text);
717 }
719
720 #[test]
721 fn token_scanner_delimiter_context_document() {
722 let source = "Field:Value";
723 let mut scanner = TokenScanner::new(source, 0, 1, 1);
724 let token_type = scanner.scan_text(TokenContext::Document).unwrap();
725 assert_eq!(token_type, TokenType::Text);
726 assert_eq!(scanner.navigator().position(), 5);
728 }
729
730 #[test]
731 fn token_scanner_various_delimiters() {
732 let test_cases = vec![
733 (",", TokenContext::Document),
734 ("{", TokenContext::Document),
735 ("}", TokenContext::Document),
736 ("[", TokenContext::Document),
737 ("]", TokenContext::Document),
738 ("\n", TokenContext::Document),
739 ("\r", TokenContext::Document),
740 ];
741
742 for (delimiter, context) in test_cases {
743 let source = format!("text{delimiter}more");
744 let mut scanner = TokenScanner::new(&source, 0, 1, 1);
745 let _token_type = scanner.scan_text(context).unwrap();
746 assert_eq!(scanner.navigator().position(), 4); }
748 }
749
750 #[test]
751 fn token_scanner_navigator_mut() {
752 let source = "test";
753 let mut scanner = TokenScanner::new(source, 0, 1, 1);
754 {
755 let nav_mut = scanner.navigator_mut();
756 nav_mut.advance_char().unwrap();
757 }
758 assert_eq!(scanner.navigator().position(), 1);
759 }
760
761 #[test]
762 fn char_navigator_utf8_handling() {
763 let source = "café";
764 let mut nav = CharNavigator::new(source, 0, 1, 1);
765 assert_eq!(nav.advance_char().unwrap(), 'c');
766 assert_eq!(nav.advance_char().unwrap(), 'a');
767 assert_eq!(nav.advance_char().unwrap(), 'f');
768 assert_eq!(nav.advance_char().unwrap(), 'é');
769 assert_eq!(nav.position(), 5); }
771
772 #[test]
773 fn token_scanner_empty_section_header() {
774 let source = "[]";
775 let mut scanner = TokenScanner::new(source, 0, 1, 1);
776 let token_type = scanner.scan_section_header().unwrap();
777 assert_eq!(token_type, TokenType::SectionHeader);
778 }
779
780 #[test]
781 fn token_scanner_unclosed_section_header() {
782 let source = "[Script Info";
783 let mut scanner = TokenScanner::new(source, 0, 1, 1);
784 let token_type = scanner.scan_section_header().unwrap();
785 assert_eq!(token_type, TokenType::SectionHeader);
786 }
787
788 #[test]
789 fn token_scanner_empty_style_override() {
790 let source = "{}";
791 let mut scanner = TokenScanner::new(source, 0, 1, 1);
792 let token_type = scanner.scan_style_override().unwrap();
793 assert_eq!(token_type, TokenType::OverrideBlock);
794 }
795
796 #[test]
797 fn token_scanner_unclosed_style_override() {
798 let source = "{\\b1\\i1";
799 let mut scanner = TokenScanner::new(source, 0, 1, 1);
800 let token_type = scanner.scan_style_override().unwrap();
801 assert_eq!(token_type, TokenType::OverrideBlock);
802 }
803
804 #[test]
805 fn token_scanner_comment_at_end() {
806 let source = "; Comment at end";
807 let mut scanner = TokenScanner::new(source, 0, 1, 1);
808 let token_type = scanner.scan_comment().unwrap();
809 assert_eq!(token_type, TokenType::Comment);
810 }
811
812 #[test]
813 fn char_navigator_advance_char_error_handling() {
814 let mut nav = CharNavigator::new("", 0, 1, 1);
815 assert!(nav.advance_char().is_err());
816 assert!(nav.peek_char().is_err());
817 assert!(nav.peek_next().is_err());
818 }
819
820 #[test]
821 fn char_navigator_peek_operations_edge_cases() {
822 let source = "a";
823 let mut nav = CharNavigator::new(source, 0, 1, 1);
824
825 assert_eq!(nav.peek_char().unwrap(), 'a');
827 assert_eq!(nav.peek_char().unwrap(), 'a'); assert!(nav.peek_next().is_err()); nav.advance_char().unwrap();
833 assert!(nav.peek_char().is_err()); assert!(nav.peek_next().is_err()); }
836
837 #[test]
838 fn char_navigator_line_column_tracking_complex() {
839 let source = "line1\r\nline2\rline3\nline4";
840 let mut nav = CharNavigator::new(source, 0, 1, 1);
841
842 for _ in "line1".chars() {
844 nav.advance_char().unwrap();
845 }
846 assert_eq!(nav.line(), 1);
847 assert_eq!(nav.column(), 6);
848
849 nav.advance_char().unwrap(); assert_eq!(nav.line(), 2);
851 assert_eq!(nav.column(), 1);
852
853 nav.advance_char().unwrap(); assert_eq!(nav.line(), 2);
855 assert_eq!(nav.column(), 1);
856
857 for _ in "line2".chars() {
859 nav.advance_char().unwrap();
860 }
861 nav.advance_char().unwrap(); assert_eq!(nav.line(), 3);
863 assert_eq!(nav.column(), 1);
864
865 for _ in "line3".chars() {
867 nav.advance_char().unwrap();
868 }
869 nav.advance_char().unwrap(); assert_eq!(nav.line(), 4);
871 assert_eq!(nav.column(), 1);
872 }
873
874 #[test]
875 fn char_navigator_skip_whitespace_variations() {
876 let source = " \t\u{00A0}\u{2000} text"; let mut nav = CharNavigator::new(source, 0, 1, 1);
878 nav.skip_whitespace();
879 assert_eq!(nav.peek_char().unwrap(), 't');
880
881 let source2 = " \n text";
883 let mut nav2 = CharNavigator::new(source2, 0, 1, 1);
884 nav2.skip_whitespace();
885 assert_eq!(nav2.peek_char().unwrap(), '\n');
886 }
887
888 #[test]
889 fn token_scanner_scan_text_field_value_context_edge_cases() {
890 let source = "0:00:30.50";
892 let mut scanner = TokenScanner::new(source, 0, 1, 1);
893 let token_type = scanner.scan_text(TokenContext::FieldValue).unwrap();
894 assert_eq!(token_type, TokenType::Text);
895 assert_eq!(scanner.navigator().position(), source.len());
896
897 let source2 = "text,next";
899 let mut scanner2 = TokenScanner::new(source2, 0, 1, 1);
900 let token_type2 = scanner2.scan_text(TokenContext::FieldValue).unwrap();
901 assert_eq!(token_type2, TokenType::Text);
902 assert_eq!(scanner2.navigator().position(), 4); }
904
905 #[test]
906 fn token_scanner_scan_text_number_detection() {
907 let source1 = "123.45";
909 let mut scanner1 = TokenScanner::new(source1, 0, 1, 1);
910 let token_type1 = scanner1.scan_text(TokenContext::Document).unwrap();
911 assert_eq!(token_type1, TokenType::Number);
912
913 let source2 = "-123.45";
915 let mut scanner2 = TokenScanner::new(source2, 0, 1, 1);
916 let token_type2 = scanner2.scan_text(TokenContext::Document).unwrap();
917 assert_eq!(token_type2, TokenType::Number);
918
919 let source3 = "123";
921 let mut scanner3 = TokenScanner::new(source3, 0, 1, 1);
922 let token_type3 = scanner3.scan_text(TokenContext::Document).unwrap();
923 assert_eq!(token_type3, TokenType::Number);
924
925 let source4 = "123abc";
927 let mut scanner4 = TokenScanner::new(source4, 0, 1, 1);
928 let token_type4 = scanner4.scan_text(TokenContext::Document).unwrap();
929 assert_eq!(token_type4, TokenType::Text);
930 }
931
932 #[test]
933 fn token_scanner_scan_field_value_comprehensive() {
934 let source1 = "0:01:30.50,next";
936 let mut scanner1 = TokenScanner::new(source1, 0, 1, 1);
937 let token_type1 = scanner1.scan_field_value().unwrap();
938 assert_eq!(token_type1, TokenType::Number);
939 assert_eq!(scanner1.navigator().position(), 10); let source2 = "Some text,next";
943 let mut scanner2 = TokenScanner::new(source2, 0, 1, 1);
944 let token_type2 = scanner2.scan_field_value().unwrap();
945 assert_eq!(token_type2, TokenType::Text);
946 assert_eq!(scanner2.navigator().position(), 9); let source3 = "text\nmore";
950 let mut scanner3 = TokenScanner::new(source3, 0, 1, 1);
951 let token_type3 = scanner3.scan_field_value().unwrap();
952 assert_eq!(token_type3, TokenType::Text);
953 assert_eq!(scanner3.navigator().position(), 4); let source4 = "text{override}";
956 let mut scanner4 = TokenScanner::new(source4, 0, 1, 1);
957 let token_type4 = scanner4.scan_field_value().unwrap();
958 assert_eq!(token_type4, TokenType::Text);
959 assert_eq!(scanner4.navigator().position(), 4); let source5 = "text[section]";
962 let mut scanner5 = TokenScanner::new(source5, 0, 1, 1);
963 let token_type5 = scanner5.scan_field_value().unwrap();
964 assert_eq!(token_type5, TokenType::Text);
965 assert_eq!(scanner5.navigator().position(), 4); }
967
968 #[test]
969 fn token_scanner_section_header_variations() {
970 let source1 = "[ Script Info ]";
972 let mut scanner1 = TokenScanner::new(source1, 1, 1, 2); let token_type1 = scanner1.scan_section_header().unwrap();
974 assert_eq!(token_type1, TokenType::SectionHeader);
975
976 let source2 = "[V4+ Styles]";
978 let mut scanner2 = TokenScanner::new(source2, 1, 1, 2); let token_type2 = scanner2.scan_section_header().unwrap();
980 assert_eq!(token_type2, TokenType::SectionHeader);
981
982 let source3 = "[Script Info\nNext line";
984 let mut scanner3 = TokenScanner::new(source3, 1, 1, 2); let token_type3 = scanner3.scan_section_header().unwrap();
986 assert_eq!(token_type3, TokenType::SectionHeader);
987 }
988
989 #[test]
990 fn token_scanner_style_override_complex() {
991 let source1 = "{\\b1{nested}\\i1}";
993 let mut scanner1 = TokenScanner::new(source1, 1, 1, 2); let token_type1 = scanner1.scan_style_override().unwrap();
995 assert_eq!(token_type1, TokenType::OverrideBlock);
996
997 let source2 = "{ }";
999 let mut scanner2 = TokenScanner::new(source2, 1, 1, 2); let token_type2 = scanner2.scan_style_override().unwrap();
1001 assert_eq!(token_type2, TokenType::OverrideBlock);
1002
1003 let source3 = "{\\b1\\i1\n";
1005 let mut scanner3 = TokenScanner::new(source3, 1, 1, 2); let token_type3 = scanner3.scan_style_override().unwrap();
1007 assert_eq!(token_type3, TokenType::OverrideBlock);
1008 }
1009
1010 #[test]
1011 fn token_scanner_comment_variations() {
1012 let source1 = "!: This is a comment";
1014 let mut scanner1 = TokenScanner::new(source1, 0, 1, 1);
1015 let token_type1 = scanner1.scan_comment().unwrap();
1016 assert_eq!(token_type1, TokenType::Comment);
1017
1018 let source2 = "; Comment";
1020 let mut scanner2 = TokenScanner::new(source2, 0, 1, 1);
1021 let token_type2 = scanner2.scan_comment().unwrap();
1022 assert_eq!(token_type2, TokenType::Comment);
1023 assert_eq!(scanner2.navigator().position(), source2.len());
1024
1025 let source3 = ";\n";
1027 let mut scanner3 = TokenScanner::new(source3, 0, 1, 1);
1028 let token_type3 = scanner3.scan_comment().unwrap();
1029 assert_eq!(token_type3, TokenType::Comment);
1030 }
1031
1032 #[test]
1033 fn token_scanner_unicode_handling() {
1034 let source1 = "中文测试";
1036 let mut scanner1 = TokenScanner::new(source1, 0, 1, 1);
1037 let token_type1 = scanner1.scan_text(TokenContext::Document).unwrap();
1038 assert_eq!(token_type1, TokenType::Text);
1039 assert_eq!(scanner1.navigator().position(), source1.len());
1040
1041 let source2 = "[スクリプト情報]";
1043 let mut scanner2 = TokenScanner::new(source2, 1, 1, 2); let token_type2 = scanner2.scan_section_header().unwrap();
1045 assert_eq!(token_type2, TokenType::SectionHeader);
1046
1047 let source3 = "🎭🎬🎪";
1049 let mut scanner3 = TokenScanner::new(source3, 0, 1, 1);
1050 let token_type3 = scanner3.scan_text(TokenContext::Document).unwrap();
1051 assert_eq!(token_type3, TokenType::Text);
1052 assert_eq!(scanner3.navigator().position(), source3.len());
1053 }
1054
1055 #[test]
1056 fn token_scanner_empty_content_handling() {
1057 let source1 = ",next";
1059 let mut scanner1 = TokenScanner::new(source1, 0, 1, 1);
1060 let token_type1 = scanner1.scan_text(TokenContext::Document).unwrap();
1061 assert_eq!(token_type1, TokenType::Text);
1062 assert_eq!(scanner1.navigator().position(), 0); let source2 = "text";
1066 let mut scanner2 = TokenScanner::new(source2, 4, 1, 5); let token_type2 = scanner2.scan_text(TokenContext::Document).unwrap();
1068 assert_eq!(token_type2, TokenType::Text);
1069 }
1070
1071 #[test]
1072 fn char_navigator_boundary_conditions() {
1073 let long_line = "a".repeat(10000);
1075 let mut nav = CharNavigator::new(&long_line, 0, 1, 1);
1076 for i in 1..=10000 {
1077 nav.advance_char().unwrap();
1078 assert_eq!(nav.column(), i + 1);
1079 }
1080 assert!(nav.is_at_end());
1081
1082 let many_lines = "a\n".repeat(1000);
1084 let mut nav2 = CharNavigator::new(&many_lines, 0, 1, 1);
1085 for i in 1..=1000 {
1086 nav2.advance_char().unwrap(); nav2.advance_char().unwrap(); assert_eq!(nav2.line(), i + 1);
1089 assert_eq!(nav2.column(), 1);
1090 }
1091 }
1092
1093 #[test]
1094 fn token_scanner_simd_fallback_coverage() {
1095 let source = "field:value,next";
1097 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1098 let token_type = scanner.scan_text(TokenContext::FieldValue).unwrap();
1099 assert_eq!(token_type, TokenType::Text);
1100 assert_eq!(scanner.navigator().position(), 11);
1102 }
1103
1104 #[test]
1105 fn char_navigator_error_recovery() {
1106 let source = "a";
1108 let mut nav = CharNavigator::new(source, 0, 1, 1);
1109
1110 nav.advance_char().unwrap();
1112 assert!(nav.is_at_end());
1113
1114 assert!(nav.advance_char().is_err());
1116 assert!(nav.peek_char().is_err());
1117 }
1118
1119 #[test]
1120 fn char_navigator_peek_char_caching() {
1121 let source = "hello";
1122 let mut nav = CharNavigator::new(source, 0, 1, 1);
1123
1124 assert_eq!(nav.peek_char().unwrap(), 'h');
1126
1127 assert_eq!(nav.peek_char().unwrap(), 'h');
1129 assert_eq!(nav.peek_char().unwrap(), 'h');
1130
1131 nav.advance_char().unwrap();
1133 assert_eq!(nav.peek_char().unwrap(), 'e');
1134 }
1135
1136 #[test]
1137 fn char_navigator_last_char_tracking() {
1138 let source = "a\r\nb";
1139 let mut nav = CharNavigator::new(source, 0, 1, 1);
1140
1141 nav.advance_char().unwrap(); nav.advance_char().unwrap(); assert_eq!(nav.line(), 2);
1144
1145 nav.advance_char().unwrap(); assert_eq!(nav.line(), 2);
1147 assert_eq!(nav.column(), 1);
1148 }
1149
1150 #[test]
1151 fn token_scanner_hex_value_edge_cases() {
1152 assert!(!TokenScanner::is_hex_value("FF"));
1154 assert!(!TokenScanner::is_hex_value("00"));
1155 assert!(!TokenScanner::is_hex_value("ABCDEF"));
1156 assert!(!TokenScanner::is_hex_value("123456"));
1157
1158 assert!(TokenScanner::is_hex_value("&HFF&"));
1160 assert!(TokenScanner::is_hex_value("&HFF"));
1161 assert!(TokenScanner::is_hex_value("&H00FF00FF&"));
1162 assert!(TokenScanner::is_hex_value("&H00FF00FF"));
1163
1164 assert!(!TokenScanner::is_hex_value("F")); assert!(!TokenScanner::is_hex_value("GG")); assert!(!TokenScanner::is_hex_value("&H&")); assert!(!TokenScanner::is_hex_value("&HG&")); assert!(!TokenScanner::is_hex_value("")); }
1171
1172 #[test]
1173 fn scan_text_classification_verification() {
1174 let source1 = "0:00:30.50";
1178 let mut scanner1 = TokenScanner::new(source1, 0, 1, 1);
1179 let token_type1 = scanner1.scan_text(TokenContext::FieldValue).unwrap();
1180 assert_eq!(token_type1, TokenType::Text);
1181
1182 let source2 = "123abc";
1184 let mut scanner2 = TokenScanner::new(source2, 0, 1, 1);
1185 let token_type2 = scanner2.scan_text(TokenContext::Document).unwrap();
1186 assert_eq!(token_type2, TokenType::Text);
1187
1188 let source3 = "&H00FF00&";
1190 let mut scanner3 = TokenScanner::new(source3, 0, 1, 1);
1191 let token_type3 = scanner3.scan_text(TokenContext::Document).unwrap();
1192 assert_eq!(token_type3, TokenType::HexValue);
1193 }
1194
1195 #[test]
1196 fn token_scanner_delimiter_combinations() {
1197 let source = "text:{}[],more";
1199 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1200 let token_type = scanner.scan_text(TokenContext::Document).unwrap();
1201 assert_eq!(token_type, TokenType::Text);
1202 assert_eq!(scanner.navigator().position(), 4); }
1204
1205 #[test]
1206 fn token_scanner_field_value_delimiter_handling() {
1207 let source = "0:00:30.50";
1209 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1210 let token_type = scanner.scan_text(TokenContext::FieldValue).unwrap();
1211 assert_eq!(token_type, TokenType::Text); assert_eq!(scanner.navigator().position(), source.len()); }
1214
1215 #[test]
1216 fn token_scanner_semicolon_context_sensitivity() {
1217 let source = "text;comment";
1219
1220 let mut scanner1 = TokenScanner::new(source, 0, 1, 1);
1222 let token_type1 = scanner1.scan_text(TokenContext::Document).unwrap();
1223 assert_eq!(token_type1, TokenType::Text);
1224 assert!(scanner1.navigator().position() >= 4);
1226
1227 let mut scanner2 = TokenScanner::new(source, 0, 1, 1);
1229 let token_type2 = scanner2.scan_text(TokenContext::FieldValue).unwrap();
1230 assert_eq!(token_type2, TokenType::Text);
1231 assert_eq!(scanner2.navigator().position(), source.len());
1232 }
1233
1234 #[test]
1235 fn token_scanner_number_detection_edge_cases() {
1236 let test_cases = vec![
1238 ("123", true),
1239 ("123.45", true),
1240 ("-123", true),
1241 ("-123.45", true),
1242 ("123.", true),
1243 (".45", true),
1244 ("-.45", true),
1245 ("123abc", false), ("", false), (".", true), ("-", true), ("--123", true), ("12.34.56", true), ];
1252
1253 for (input, expected_is_number) in test_cases {
1254 let source = format!("{input},");
1255 let mut scanner = TokenScanner::new(&source, 0, 1, 1);
1256 let token_type = scanner.scan_text(TokenContext::Document).unwrap();
1257
1258 if expected_is_number && !input.is_empty() {
1259 assert_eq!(token_type, TokenType::Number, "Failed for input: {input}");
1260 } else {
1261 assert_ne!(token_type, TokenType::Number, "Failed for input: {input}");
1262 }
1263 }
1264 }
1265
1266 #[test]
1267 fn token_scanner_style_override_brace_depth() {
1268 let source = "{{{{}}}}";
1270 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1271 let token_type = scanner.scan_style_override().unwrap();
1272 assert_eq!(token_type, TokenType::OverrideBlock);
1273 assert_eq!(scanner.navigator().position(), 7); }
1275
1276 #[test]
1277 fn token_scanner_style_override_unbalanced() {
1278 let source = "{{{}}"; let mut scanner = TokenScanner::new(source, 0, 1, 1);
1281 let token_type = scanner.scan_style_override().unwrap();
1282 assert_eq!(token_type, TokenType::OverrideBlock);
1283 assert_eq!(scanner.navigator().position(), source.len());
1285 }
1286
1287 #[test]
1288 fn char_navigator_whitespace_at_end() {
1289 let source = "text ";
1290 let mut nav = CharNavigator::new(source, 4, 1, 5); nav.skip_whitespace();
1292 assert!(nav.is_at_end());
1293 }
1294
1295 #[test]
1296 fn char_navigator_mixed_newlines() {
1297 let source = "\r\n\n\r";
1298 let mut nav = CharNavigator::new(source, 0, 1, 1);
1299
1300 nav.advance_char().unwrap();
1302 assert_eq!(nav.line(), 2);
1303
1304 nav.advance_char().unwrap();
1306 assert_eq!(nav.line(), 2);
1307
1308 nav.advance_char().unwrap();
1310 assert_eq!(nav.line(), 3);
1311
1312 nav.advance_char().unwrap();
1314 assert_eq!(nav.line(), 4);
1315 }
1316
1317 #[test]
1318 fn token_scanner_empty_span_handling() {
1319 let source = ",";
1321 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1322 let token_type = scanner.scan_text(TokenContext::Document).unwrap();
1323 assert_eq!(token_type, TokenType::Text);
1324 assert_eq!(scanner.navigator().position(), 0); }
1326
1327 #[test]
1328 fn token_scanner_field_value_time_format() {
1329 let source = "1:23:45.67";
1331 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1332 let token_type = scanner.scan_field_value().unwrap();
1333 assert_eq!(token_type, TokenType::Number);
1334 assert_eq!(scanner.navigator().position(), source.len());
1335 }
1336
1337 #[test]
1338 fn char_navigator_position_consistency() {
1339 let source = "café🎭";
1340 let mut nav = CharNavigator::new(source, 0, 1, 1);
1341
1342 let start_pos = nav.position();
1343 nav.advance_char().unwrap(); assert_eq!(nav.position(), start_pos + 1);
1345
1346 nav.advance_char().unwrap(); assert_eq!(nav.position(), start_pos + 2);
1348
1349 nav.advance_char().unwrap(); assert_eq!(nav.position(), start_pos + 3);
1351
1352 nav.advance_char().unwrap(); assert_eq!(nav.position(), start_pos + 5);
1354
1355 nav.advance_char().unwrap(); assert_eq!(nav.position(), start_pos + 9);
1357 }
1358
1359 #[test]
1360 fn token_scanner_all_contexts_coverage() {
1361 let contexts = vec![
1363 TokenContext::Document,
1364 TokenContext::SectionHeader,
1365 TokenContext::FieldValue,
1366 TokenContext::StyleOverride,
1367 ];
1368
1369 for context in contexts {
1370 let source = "test:value,more";
1371 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1372 let token_type = scanner.scan_text(context).unwrap();
1373
1374 match context {
1376 TokenContext::SectionHeader => assert_eq!(token_type, TokenType::SectionName),
1377 _ => assert!(matches!(
1378 token_type,
1379 TokenType::Text | TokenType::Number | TokenType::HexValue
1380 )),
1381 }
1382 }
1383 }
1384
1385 #[test]
1386 fn char_navigator_column_reset_on_newlines() {
1387 let source = "long line text\nshort\n";
1388 let mut nav = CharNavigator::new(source, 0, 1, 1);
1389
1390 for _ in 0..14 {
1392 nav.advance_char().unwrap();
1393 }
1394 assert_eq!(nav.column(), 15);
1395
1396 nav.advance_char().unwrap(); assert_eq!(nav.line(), 2);
1399 assert_eq!(nav.column(), 1);
1400
1401 for _ in 0..5 {
1403 nav.advance_char().unwrap();
1404 }
1405 assert_eq!(nav.column(), 6);
1406
1407 nav.advance_char().unwrap(); assert_eq!(nav.line(), 3);
1410 assert_eq!(nav.column(), 1);
1411 }
1412
1413 #[test]
1414 fn char_navigator_utf8_error_handling() {
1415 let source = "valid\x7F\x7E";
1417 let mut nav = CharNavigator::new(source, 0, 1, 1);
1418
1419 assert!(nav.advance_char().is_ok());
1421 assert!(nav.advance_char().is_ok());
1422 assert!(nav.advance_char().is_ok());
1423 assert!(nav.advance_char().is_ok());
1424 assert!(nav.advance_char().is_ok());
1425
1426 let result = nav.advance_char();
1428 match result {
1429 Ok(_) | Err(_) => {
1430 assert!(nav.position() > 0);
1432 }
1433 }
1434 }
1435
1436 #[test]
1437 fn char_navigator_peek_char_caching_coverage() {
1438 let source = "abc";
1439 let mut nav = CharNavigator::new(source, 0, 1, 1);
1440
1441 let first_peek = nav.peek_char();
1443 assert_eq!(first_peek, Ok('a'));
1444
1445 let second_peek = nav.peek_char();
1447 assert_eq!(second_peek, Ok('a'));
1448
1449 assert!(nav.advance_char().is_ok());
1451
1452 let third_peek = nav.peek_char();
1454 assert_eq!(third_peek, Ok('b'));
1455 }
1456
1457 #[test]
1458 fn char_navigator_last_char_tracking_coverage() {
1459 let source = "xy\nz";
1460 let mut nav = CharNavigator::new(source, 0, 1, 1);
1461
1462 nav.advance_char().unwrap(); assert_eq!(nav.last_char, Some('x'));
1465
1466 nav.advance_char().unwrap(); assert_eq!(nav.last_char, Some('y'));
1468
1469 nav.advance_char().unwrap(); assert_eq!(nav.last_char, Some('\n'));
1471 assert_eq!(nav.line(), 2);
1472
1473 nav.advance_char().unwrap(); assert_eq!(nav.last_char, Some('z'));
1475 }
1476
1477 #[test]
1478 fn token_scanner_hex_value_comprehensive_coverage() {
1479 assert!(TokenScanner::is_hex_value("&H1234&"));
1481 assert!(TokenScanner::is_hex_value("&HFFFF&"));
1482 assert!(TokenScanner::is_hex_value("&H00&"));
1483
1484 assert!(TokenScanner::is_hex_value("&H1234"));
1486 assert!(TokenScanner::is_hex_value("&HABCD"));
1487
1488 assert!(!TokenScanner::is_hex_value("&H&"));
1490 assert!(!TokenScanner::is_hex_value("&H"));
1491
1492 assert!(!TokenScanner::is_hex_value("&H123&"));
1494 assert!(!TokenScanner::is_hex_value("&H0&"));
1495
1496 assert!(!TokenScanner::is_hex_value("&H123456789ABCDEF&")); assert!(!TokenScanner::is_hex_value("&HZ123&"));
1501 assert!(!TokenScanner::is_hex_value("&H12G4&"));
1502 }
1503
1504 #[test]
1505 fn token_scanner_delimiter_context_comprehensive() {
1506 let source = ",{[}]:;\n\r";
1507 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1508
1509 let result = scanner.scan_field_value();
1511 assert!(result.is_ok());
1512
1513 let source2 = ";comment";
1515 let scanner2 = TokenScanner::new(source2, 0, 1, 1);
1516 let nav_pos = scanner2.navigator().position();
1517
1518 assert_eq!(nav_pos, 0);
1520 }
1521
1522 #[test]
1523 fn token_scanner_scan_text_number_classification() {
1524 let source = "123.45";
1525 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1526
1527 let result = scanner.scan_text(crate::tokenizer::state::TokenContext::Document);
1528 assert!(result.is_ok());
1529
1530 let token_type = result.unwrap();
1531 assert_eq!(token_type, crate::tokenizer::tokens::TokenType::Number);
1532 }
1533
1534 #[test]
1535 fn token_scanner_section_header_boundary_coverage() {
1536 let source = "[Section]";
1537 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1538
1539 let result = scanner.scan_section_header();
1541 assert!(result.is_ok());
1542 assert_eq!(
1543 result.unwrap(),
1544 crate::tokenizer::tokens::TokenType::SectionHeader
1545 );
1546 }
1547
1548 #[test]
1549 fn token_scanner_style_override_brace_matching() {
1550 let source = "{\\b1}text{\\b0}";
1551 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1552
1553 let result = scanner.scan_style_override();
1555 assert!(result.is_ok());
1556 assert_eq!(
1557 result.unwrap(),
1558 crate::tokenizer::tokens::TokenType::OverrideBlock
1559 );
1560 }
1561
1562 #[test]
1563 fn token_scanner_simd_fallback_forced_coverage() {
1564 let source = "test,delimiter:content";
1565 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1566
1567 let result = scanner.scan_field_value();
1569 assert!(result.is_ok());
1570 }
1571
1572 #[test]
1573 fn char_navigator_advance_char_utf8_length_tracking() {
1574 let source = "a🎵b";
1575 let mut nav = CharNavigator::new(source, 0, 1, 1);
1576
1577 nav.advance_char().unwrap();
1579 assert_eq!(nav.position(), 1);
1580
1581 nav.advance_char().unwrap();
1583 assert_eq!(nav.position(), 5);
1584
1585 nav.advance_char().unwrap();
1587 assert_eq!(nav.position(), 6);
1588 }
1589
1590 #[test]
1591 fn token_scanner_empty_span_edge_cases() {
1592 let source = "";
1593 let scanner = TokenScanner::new(source, 0, 1, 1);
1594
1595 let nav_result = scanner.navigator();
1597 assert!(nav_result.is_at_end());
1598
1599 assert_eq!(nav_result.position(), 0);
1601 assert_eq!(nav_result.line(), 1);
1602 assert_eq!(nav_result.column(), 1);
1603 }
1604
1605 #[test]
1606 fn char_navigator_peek_operations_at_boundaries() {
1607 let source = "a";
1608 let mut nav = CharNavigator::new(source, 0, 1, 1);
1609
1610 assert_eq!(nav.peek_char().unwrap(), 'a');
1612
1613 assert!(nav.peek_next().is_err());
1615
1616 nav.advance_char().unwrap();
1618 assert!(nav.is_at_end());
1619
1620 assert!(nav.peek_char().is_err());
1622 assert!(nav.peek_next().is_err());
1623 }
1624
1625 #[test]
1626 fn token_scanner_all_delimiter_combinations_coverage() {
1627 let delimiters = [',', ':', '{', '}', '[', ']', '\n', '\r'];
1628
1629 for &delimiter in &delimiters {
1630 let source = format!("text{delimiter}more");
1631 let mut scanner = TokenScanner::new(&source, 0, 1, 1);
1632
1633 let result = scanner.scan_field_value();
1635 assert!(result.is_ok());
1636 }
1637 }
1638
1639 #[test]
1640 fn char_navigator_newline_variations_comprehensive() {
1641 let sources = [
1643 "line1\nline2", "line1\rline2", "line1\r\nline2", ];
1647
1648 for source in &sources {
1649 let mut nav = CharNavigator::new(source, 0, 1, 1);
1650
1651 while let Ok(ch) = nav.advance_char() {
1653 if ch == '\n' || ch == '\r' {
1654 break;
1655 }
1656 }
1657
1658 if !nav.is_at_end() {
1660 nav.advance_char().ok(); assert!(nav.line() >= 2);
1662 }
1663 }
1664 }
1665
1666 #[test]
1667 fn char_navigator_carriage_return_line_increment() {
1668 let source = "text\rmore";
1670 let mut nav = CharNavigator::new(source, 0, 1, 1);
1671
1672 for _ in 0..4 {
1674 nav.advance_char().unwrap();
1675 }
1676
1677 let ch = nav.advance_char().unwrap();
1679 assert_eq!(ch, '\r');
1680 assert_eq!(nav.line(), 2);
1681 assert_eq!(nav.column(), 1);
1682 }
1683
1684 #[test]
1685 fn char_navigator_newline_line_increment() {
1686 let source = "text\nmore";
1688 let mut nav = CharNavigator::new(source, 0, 1, 1);
1689
1690 for _ in 0..4 {
1691 nav.advance_char().unwrap();
1692 }
1693
1694 let ch = nav.advance_char().unwrap();
1695 assert_eq!(ch, '\n');
1696 assert_eq!(nav.line(), 2);
1697 assert_eq!(nav.column(), 1);
1698 }
1699
1700 #[test]
1701 fn char_navigator_column_increment_default() {
1702 let source = "abc";
1704 let mut nav = CharNavigator::new(source, 0, 1, 1);
1705
1706 nav.advance_char().unwrap(); assert_eq!(nav.column(), 2);
1708
1709 nav.advance_char().unwrap(); assert_eq!(nav.column(), 3);
1711
1712 nav.advance_char().unwrap(); assert_eq!(nav.column(), 4);
1714 }
1715
1716 #[test]
1717 fn char_navigator_skip_whitespace_loop() {
1718 let source = " \t\n text";
1720 let mut nav = CharNavigator::new(source, 0, 1, 1);
1721
1722 nav.skip_whitespace();
1723 assert_eq!(nav.position(), 4); }
1725
1726 #[test]
1727 fn token_scanner_section_header_closing_bracket() {
1728 let source = "[Test]";
1730 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1731
1732 let result = scanner.scan_section_header();
1733 assert!(result.is_ok());
1734 assert_eq!(
1735 result.unwrap(),
1736 crate::tokenizer::tokens::TokenType::SectionHeader
1737 );
1738 }
1739
1740 #[test]
1741 fn token_scanner_style_override_closing_brace() {
1742 let source = "{\\b1}";
1744 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1745
1746 let result = scanner.scan_style_override();
1747 assert!(result.is_ok());
1748 assert_eq!(
1749 result.unwrap(),
1750 crate::tokenizer::tokens::TokenType::OverrideBlock
1751 );
1752 }
1753
1754 #[test]
1755 fn token_scanner_comment_scanning() {
1756 let source = "! This is a comment";
1758 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1759
1760 let result = scanner.scan_comment();
1761 assert!(result.is_ok());
1762 assert_eq!(
1763 result.unwrap(),
1764 crate::tokenizer::tokens::TokenType::Comment
1765 );
1766 }
1767
1768 #[test]
1769 fn token_scanner_scan_text_hex_detection() {
1770 let source = "&H1234&";
1772 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1773
1774 let result = scanner.scan_text(crate::tokenizer::state::TokenContext::Document);
1775 assert!(result.is_ok());
1776 assert_eq!(
1777 result.unwrap(),
1778 crate::tokenizer::tokens::TokenType::HexValue
1779 );
1780 }
1781
1782 #[test]
1783 fn token_scanner_scan_text_number_detection_targeted() {
1784 let source = "123.45";
1786 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1787
1788 let result = scanner.scan_text(crate::tokenizer::state::TokenContext::Document);
1789 assert!(result.is_ok());
1790 assert_eq!(result.unwrap(), crate::tokenizer::tokens::TokenType::Number);
1791 }
1792
1793 #[test]
1794 fn token_scanner_scan_text_section_name_context() {
1795 let source = "Script Info";
1797 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1798
1799 let result = scanner.scan_text(crate::tokenizer::state::TokenContext::SectionHeader);
1800 assert!(result.is_ok());
1801 assert_eq!(
1802 result.unwrap(),
1803 crate::tokenizer::tokens::TokenType::SectionName
1804 );
1805 }
1806
1807 #[test]
1808 fn token_scanner_scan_text_field_value_context_targeted() {
1809 let source = "value_text";
1811 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1812
1813 let result = scanner.scan_text(crate::tokenizer::state::TokenContext::FieldValue);
1814 assert!(result.is_ok());
1815 }
1817
1818 #[test]
1819 fn token_scanner_scan_text_default_case() {
1820 let source = "regular_text";
1822 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1823
1824 let result = scanner.scan_text(crate::tokenizer::state::TokenContext::Document);
1825 assert!(result.is_ok());
1826 assert_eq!(result.unwrap(), crate::tokenizer::tokens::TokenType::Text);
1827 }
1828
1829 #[test]
1830 fn token_scanner_is_hex_value_ampersand_suffix() {
1831 assert!(TokenScanner::is_hex_value("&H1234&"));
1833 assert!(TokenScanner::is_hex_value("&HABCD&"));
1834 assert!(!TokenScanner::is_hex_value("&H&")); }
1836
1837 #[test]
1838 fn token_scanner_is_hex_value_no_ampersand() {
1839 assert!(TokenScanner::is_hex_value("&H1234"));
1841 assert!(TokenScanner::is_hex_value("&HABCD"));
1842 }
1843
1844 #[test]
1845 fn token_scanner_scan_field_value_basic() {
1846 let source = "field_value,next";
1848 let mut scanner = TokenScanner::new(source, 0, 1, 1);
1849
1850 let result = scanner.scan_field_value();
1851 assert!(result.is_ok());
1852 }
1853
1854 #[test]
1855 fn char_navigator_peek_char_error_path() {
1856 let source = "a";
1858 let mut nav = CharNavigator::new(source, 0, 1, 1);
1859
1860 nav.advance_char().unwrap();
1862 assert!(nav.is_at_end());
1863
1864 let result = nav.peek_char();
1866 assert!(result.is_err());
1867 }
1868
1869 #[test]
1870 fn char_navigator_peek_next_error_path() {
1871 let source = "a";
1873 let nav = CharNavigator::new(source, 0, 1, 1);
1874
1875 let result = nav.peek_next();
1877 assert!(result.is_err());
1878 }
1879}