1use super::Version;
4#[cfg(feature = "python-bindings")]
5use super::VersionToken;
6use ahash::AHashMap;
7use once_cell::sync::Lazy;
8use rez_next_common::RezCoreError;
9use smallvec::SmallVec;
10use std::sync::RwLock;
11
12static STRING_INTERN_POOL: Lazy<RwLock<AHashMap<String, &'static str>>> =
14 Lazy::new(|| RwLock::new(AHashMap::new()));
15
16#[derive(Debug, Clone, PartialEq)]
18pub enum TokenType {
19 Numeric(u64),
20 Alphanumeric(String),
21 Separator(char),
22}
23
24#[derive(Debug, Clone, Copy, PartialEq)]
26enum ParseState {
27 Start,
28 InToken,
29 InSeparator,
30 End,
31}
32
33pub struct StateMachineParser {
35 use_interning: bool,
37 max_tokens: usize,
39 max_numeric_tokens: usize,
41}
42
43impl StateMachineParser {
44 pub fn new() -> Self {
46 Self {
47 use_interning: true,
48 max_tokens: 10,
49 max_numeric_tokens: 5,
50 }
51 }
52
53 pub fn with_config(use_interning: bool, max_tokens: usize, max_numeric_tokens: usize) -> Self {
55 Self {
56 use_interning,
57 max_tokens,
58 max_numeric_tokens,
59 }
60 }
61
62 fn intern_string(&self, s: String) -> String {
64 if !self.use_interning || s.len() > 64 {
65 return s;
66 }
67
68 {
70 let pool = STRING_INTERN_POOL.read().unwrap();
71 if let Some(&interned) = pool.get(&s) {
72 return interned.to_string();
73 }
74 }
75
76 {
78 let mut pool = STRING_INTERN_POOL.write().unwrap();
79 if let Some(&interned) = pool.get(&s) {
81 return interned.to_string();
82 }
83
84 if pool.len() < 10000 {
86 let leaked: &'static str = Box::leak(s.clone().into_boxed_str());
87 pool.insert(s.clone(), leaked);
88 return leaked.to_string();
89 }
90 }
91
92 s
93 }
94
95 #[inline(always)]
97 fn is_valid_separator(c: char) -> bool {
98 matches!(c, '.' | '-' | '_' | '+')
99 }
100
101 #[inline(always)]
103 fn is_token_char(c: char) -> bool {
104 c.is_ascii_alphanumeric() || c == '_'
105 }
106
107 pub fn parse_tokens(
109 &self,
110 input: &str,
111 ) -> Result<(SmallVec<[TokenType; 8]>, SmallVec<[char; 7]>), RezCoreError> {
112 if input.is_empty() {
113 return Ok((SmallVec::new(), SmallVec::new()));
114 }
115
116 let mut tokens = SmallVec::new();
117 let mut separators = SmallVec::new();
118 let mut state = ParseState::Start;
119 let mut current_token = String::new();
120 let mut numeric_count = 0;
121
122 let chars: SmallVec<[char; 64]> = input.chars().collect();
123 let mut i = 0;
124
125 while i < chars.len() {
126 let c = chars[i];
127
128 match state {
129 ParseState::Start => {
130 if Self::is_token_char(c) {
131 current_token.push(c);
132 state = ParseState::InToken;
133 } else if Self::is_valid_separator(c) {
134 return Err(RezCoreError::VersionParse(format!(
135 "Version cannot start with separator '{}'",
136 c
137 )));
138 } else {
139 return Err(RezCoreError::VersionParse(format!(
140 "Invalid character '{}' at start of version",
141 c
142 )));
143 }
144 }
145
146 ParseState::InToken => {
147 if Self::is_token_char(c) {
148 current_token.push(c);
149 } else if Self::is_valid_separator(c) {
150 self.finalize_token(&mut current_token, &mut tokens, &mut numeric_count)?;
152 separators.push(c);
153 state = ParseState::InSeparator;
154 } else {
155 return Err(RezCoreError::VersionParse(format!(
156 "Invalid character '{}' in token",
157 c
158 )));
159 }
160 }
161
162 ParseState::InSeparator => {
163 if Self::is_token_char(c) {
164 current_token.push(c);
165 state = ParseState::InToken;
166 } else {
167 return Err(RezCoreError::VersionParse(format!(
168 "Expected token character after separator, found '{}'",
169 c
170 )));
171 }
172 }
173
174 ParseState::End => break,
175 }
176
177 i += 1;
178 }
179
180 if state == ParseState::InToken && !current_token.is_empty() {
182 self.finalize_token(&mut current_token, &mut tokens, &mut numeric_count)?;
183 } else if state == ParseState::InSeparator {
184 return Err(RezCoreError::VersionParse(
185 "Version cannot end with separator".to_string(),
186 ));
187 }
188
189 if tokens.len() > self.max_tokens {
191 return Err(RezCoreError::VersionParse(format!(
192 "Too many tokens: {} (max: {})",
193 tokens.len(),
194 self.max_tokens
195 )));
196 }
197
198 if numeric_count > self.max_numeric_tokens {
199 return Err(RezCoreError::VersionParse(format!(
200 "Too many numeric tokens: {} (max: {})",
201 numeric_count, self.max_numeric_tokens
202 )));
203 }
204
205 Ok((tokens, separators))
206 }
207
208 fn finalize_token(
210 &self,
211 current_token: &mut String,
212 tokens: &mut SmallVec<[TokenType; 8]>,
213 numeric_count: &mut usize,
214 ) -> Result<(), RezCoreError> {
215 if current_token.is_empty() {
216 return Err(RezCoreError::VersionParse("Empty token found".to_string()));
217 }
218
219 if current_token.starts_with('_') || current_token.ends_with('_') {
221 return Err(RezCoreError::VersionParse(format!(
222 "Invalid token format: '{}'",
223 current_token
224 )));
225 }
226
227 if current_token == "not" || current_token == "version" {
229 return Err(RezCoreError::VersionParse(format!(
230 "Invalid version token: '{}'",
231 current_token
232 )));
233 }
234
235 if current_token.chars().all(|c| c.is_alphabetic()) && current_token.len() > 10 {
237 return Err(RezCoreError::VersionParse(format!(
238 "Invalid version token: '{}'",
239 current_token
240 )));
241 }
242
243 if current_token.chars().all(|c| c.is_ascii_digit()) {
245 if let Ok(num) = current_token.parse::<u64>() {
246 tokens.push(TokenType::Numeric(num));
247 *numeric_count += 1;
248 } else {
249 let interned = self.intern_string(current_token.clone());
251 tokens.push(TokenType::Alphanumeric(interned));
252 }
253 } else {
254 let interned = self.intern_string(current_token.clone());
256 tokens.push(TokenType::Alphanumeric(interned));
257 }
258
259 current_token.clear();
260 Ok(())
261 }
262}
263
264pub struct VersionParser {
266 inner: StateMachineParser,
267}
268
269impl VersionParser {
270 pub fn new() -> Self {
272 Self {
273 inner: StateMachineParser::new(),
274 }
275 }
276
277 #[cfg(feature = "python-bindings")]
279 pub fn parse_tokens(
280 &self,
281 input: &str,
282 ) -> Result<(Vec<VersionToken>, Vec<char>), RezCoreError> {
283 let (_tokens, separators) = self.inner.parse_tokens(input)?;
284
285 let legacy_tokens = Vec::new();
287 let legacy_separators: Vec<char> = separators.into_iter().collect();
288
289 Ok((legacy_tokens, legacy_separators))
292 }
293
294 pub fn parse_version(&self, input: &str) -> Result<Version, RezCoreError> {
296 Version::parse(input)
299 }
300}
301
302impl Default for VersionParser {
303 fn default() -> Self {
304 Self::new()
305 }
306}
307
308impl Default for StateMachineParser {
309 fn default() -> Self {
310 Self::new()
311 }
312}
313
314#[cfg(test)]
315mod tests {
316 use super::*;
317
318 #[test]
319 fn test_parser_creation() {
320 let _parser = VersionParser::new();
321 let _state_machine_parser = StateMachineParser::new();
322 assert!(true);
324 }
325
326 #[test]
327 fn test_state_machine_parser_basic() {
328 let parser = StateMachineParser::new();
329
330 let (tokens, separators) = parser.parse_tokens("").unwrap();
332 assert!(tokens.is_empty());
333 assert!(separators.is_empty());
334
335 let (tokens, separators) = parser.parse_tokens("1.2.3").unwrap();
337 assert_eq!(tokens.len(), 3);
338 assert_eq!(separators.len(), 2);
339
340 match &tokens[0] {
342 TokenType::Numeric(n) => assert_eq!(*n, 1),
343 _ => panic!("Expected numeric token"),
344 }
345
346 assert_eq!(separators[0], '.');
347 assert_eq!(separators[1], '.');
348 }
349
350 #[test]
351 fn test_state_machine_parser_alphanumeric() {
352 let parser = StateMachineParser::new();
353
354 let (tokens, separators) = parser.parse_tokens("1.2.3-alpha1").unwrap();
355 assert_eq!(tokens.len(), 4);
356 assert_eq!(separators.len(), 3);
357
358 match &tokens[0] {
360 TokenType::Numeric(n) => assert_eq!(*n, 1),
361 _ => panic!("Expected numeric token"),
362 }
363
364 match &tokens[3] {
365 TokenType::Alphanumeric(s) => assert_eq!(s, "alpha1"),
366 _ => panic!("Expected alphanumeric token"),
367 }
368 }
369
370 #[test]
371 fn test_state_machine_parser_errors() {
372 let parser = StateMachineParser::new();
373
374 assert!(parser.parse_tokens(".1.2.3").is_err());
376
377 assert!(parser.parse_tokens("1.2.3.").is_err());
379
380 assert!(parser.parse_tokens("1.2.3@").is_err());
382
383 assert!(parser.parse_tokens("_invalid").is_err());
385 assert!(parser.parse_tokens("invalid_").is_err());
386 }
387
388 #[test]
389 fn test_string_interning() {
390 let parser = StateMachineParser::with_config(true, 10, 5);
391
392 let (tokens1, _) = parser.parse_tokens("1.0.0-alpha").unwrap();
394 let (tokens2, _) = parser.parse_tokens("1.0.0-alpha").unwrap();
395
396 if let (TokenType::Alphanumeric(s1), TokenType::Alphanumeric(s2)) =
398 (&tokens1[3], &tokens2[3])
399 {
400 assert_eq!(s1, s2);
402 }
403 }
404
405 #[test]
406 fn test_performance_limits() {
407 let parser = StateMachineParser::new();
408
409 let too_many_tokens = (0..15).map(|i| i.to_string()).collect::<Vec<_>>().join(".");
411 assert!(parser.parse_tokens(&too_many_tokens).is_err());
412
413 let too_many_numeric = (0..10).map(|i| i.to_string()).collect::<Vec<_>>().join(".");
415 assert!(parser.parse_tokens(&too_many_numeric).is_err());
416 }
417
418 #[test]
419 fn test_character_classification() {
420 assert!(StateMachineParser::is_valid_separator('.'));
421 assert!(StateMachineParser::is_valid_separator('-'));
422 assert!(StateMachineParser::is_valid_separator('_'));
423 assert!(StateMachineParser::is_valid_separator('+'));
424 assert!(!StateMachineParser::is_valid_separator('@'));
425
426 assert!(StateMachineParser::is_token_char('a'));
427 assert!(StateMachineParser::is_token_char('1'));
428 assert!(StateMachineParser::is_token_char('_'));
429 assert!(!StateMachineParser::is_token_char('.'));
430 }
431}