ass_core/tokenizer/scanner/
text_scanner.rs1use super::token_scanner::TokenScanner;
7use crate::tokenizer::{state::TokenContext, tokens::TokenType};
8use crate::Result;
9
10#[cfg(feature = "simd")]
11use crate::tokenizer::simd;
12
13impl TokenScanner<'_> {
14 pub fn scan_text(&mut self, context: TokenContext) -> Result<TokenType> {
20 let start = self.navigator.position();
21
22 #[cfg(feature = "simd")]
24 {
25 let use_simd = !matches!(context, TokenContext::FieldValue);
27
28 if use_simd {
29 if let Some(delimiter_pos) = self.scan_delimiters_simd(start) {
30 self.navigator.position = delimiter_pos;
31 } else {
32 self.navigator.position = self.source.len();
33 }
34 self.navigator.chars = self.source[self.navigator.position..].chars();
35 self.navigator.peek_char = None;
36 }
37 }
38
39 #[cfg(not(feature = "simd"))]
41 let use_scalar = true;
42 #[cfg(feature = "simd")]
43 let use_scalar = matches!(context, TokenContext::FieldValue);
44
45 if use_scalar {
46 while !self.navigator.is_at_end() {
47 let ch = self.navigator.peek_char()?;
48
49 let is_delimiter = match context {
51 TokenContext::FieldValue => {
52 matches!(ch, ',' | '{' | '}' | '[' | ']' | '\n' | '\r')
54 }
55 _ => {
56 matches!(ch, ',' | ':' | '{' | '}' | '[' | ']' | '\n' | '\r')
58 || (ch == ';' && context == TokenContext::Document)
59 }
60 };
61
62 if is_delimiter {
63 break;
64 }
65
66 self.navigator.advance_char()?;
67 }
68 }
69
70 let span = &self.source[start..self.navigator.position()];
71
72 if context == TokenContext::SectionHeader {
73 Ok(TokenType::SectionName)
74 } else if Self::is_hex_value(span) {
75 Ok(TokenType::HexValue)
76 } else if !span.is_empty()
77 && span
78 .chars()
79 .all(|c| c.is_ascii_digit() || c == '.' || c == '-')
80 {
81 Ok(TokenType::Number)
82 } else if !span.is_empty() && span.chars().all(char::is_whitespace) {
83 Ok(TokenType::Whitespace)
84 } else {
85 Ok(TokenType::Text)
86 }
87 }
88
89 pub(super) fn is_hex_value(span: &str) -> bool {
91 if let Some(after_prefix) = span.strip_prefix("&H") {
93 let hex_part = after_prefix
94 .strip_suffix('&')
95 .map_or(after_prefix, |stripped| stripped);
96
97 if !hex_part.is_empty()
98 && hex_part.len() % 2 == 0
99 && hex_part.len() <= 8
100 && hex_part.chars().all(|c| c.is_ascii_hexdigit())
101 {
102 #[cfg(feature = "simd")]
103 {
104 return TokenScanner::parse_hex_simd(hex_part).is_some();
105 }
106 #[cfg(not(feature = "simd"))]
107 {
108 return true;
109 }
110 }
111 }
112
113 false
117 }
118
119 #[cfg(feature = "simd")]
121 fn scan_delimiters_simd(&self, start: usize) -> Option<usize> {
122 simd::scan_delimiters(&self.source[start..]).map(|offset| start + offset)
123 }
124
125 #[cfg(feature = "simd")]
127 fn parse_hex_simd(hex_str: &str) -> Option<u32> {
128 simd::parse_hex_u32(hex_str)
129 }
130
131 pub fn scan_field_value(&mut self) -> Result<TokenType> {
140 let start = self.navigator.position();
141
142 while !self.navigator.is_at_end() {
143 let ch = self.navigator.peek_char()?;
144
145 if ch == ',' || ch == '\n' || ch == '\r' || ch == '{' || ch == '[' {
147 break;
148 }
149
150 self.navigator.advance_char()?;
151 }
152
153 let span = &self.source[start..self.navigator.position()];
154
155 if !span.is_empty()
156 && span
157 .chars()
158 .all(|c| c.is_ascii_digit() || c == '.' || c == '-' || c == ':')
159 {
160 Ok(TokenType::Number)
161 } else if !span.is_empty() && span.chars().all(char::is_whitespace) {
162 Ok(TokenType::Whitespace)
163 } else {
164 Ok(TokenType::Text)
165 }
166 }
167}