1use crate::utils::CoreError;
19use wide::u8x16;
20
21#[cfg(not(feature = "std"))]
22extern crate alloc;
23#[cfg(not(feature = "std"))]
24#[cfg(feature = "simd")]
25use wide::u8x16;
26
27#[must_use]
50pub fn scan_delimiters(text: &str) -> Option<usize> {
51 let bytes = text.as_bytes();
52
53 #[cfg(feature = "simd")]
54 {
55 if bytes.len() >= 16 {
56 return scan_delimiters_simd_impl(bytes);
57 }
58 }
59
60 scan_delimiters_scalar(bytes)
61}
62
63#[cfg(feature = "simd")]
65fn scan_delimiters_simd_impl(bytes: &[u8]) -> Option<usize> {
66 let delim_colon = u8x16::splat(b':');
67 let delim_comma = u8x16::splat(b',');
68 let delim_open_brace = u8x16::splat(b'{');
69 let delim_close_brace = u8x16::splat(b'}');
70 let delim_open_bracket = u8x16::splat(b'[');
71 let delim_close_bracket = u8x16::splat(b']');
72 let delim_newline = u8x16::splat(b'\n');
73 let delim_carriage = u8x16::splat(b'\r');
74
75 let chunks = bytes.chunks_exact(16);
76 let remainder = chunks.remainder();
77
78 for (chunk_idx, chunk) in chunks.enumerate() {
79 let chunk_array: [u8; 16] = chunk.try_into().unwrap();
80 let simd_chunk = u8x16::from(chunk_array);
81
82 let mask = simd_chunk.cmp_eq(delim_colon)
83 | simd_chunk.cmp_eq(delim_comma)
84 | simd_chunk.cmp_eq(delim_open_brace)
85 | simd_chunk.cmp_eq(delim_close_brace)
86 | simd_chunk.cmp_eq(delim_open_bracket)
87 | simd_chunk.cmp_eq(delim_close_bracket)
88 | simd_chunk.cmp_eq(delim_newline)
89 | simd_chunk.cmp_eq(delim_carriage);
90
91 let mask_bits = mask.move_mask();
92 if mask_bits != 0 {
93 let first_match = mask_bits.trailing_zeros() as usize;
94 return Some(chunk_idx * 16 + first_match);
95 }
96 }
97
98 if !remainder.is_empty() {
99 let remainder_offset = bytes.len() - remainder.len();
100 if let Some(pos) = scan_delimiters_scalar(remainder) {
101 return Some(remainder_offset + pos);
102 }
103 }
104
105 None
106}
107
108fn scan_delimiters_scalar(bytes: &[u8]) -> Option<usize> {
110 for (i, &byte) in bytes.iter().enumerate() {
111 match byte {
112 b':' | b',' | b'{' | b'}' | b'[' | b']' | b'\n' | b'\r' => return Some(i),
113 _ => {}
114 }
115 }
116 None
117}
118
119#[must_use]
141pub fn parse_hex_u32(hex_str: &str) -> Option<u32> {
142 if hex_str.is_empty() || hex_str.len() > 8 {
143 return None;
144 }
145
146 #[cfg(feature = "simd")]
147 {
148 if hex_str.len() >= 4 {
149 return parse_hex_simd_impl(hex_str);
150 }
151 }
152
153 parse_hex_scalar(hex_str)
154}
155
156#[cfg(feature = "simd")]
158fn parse_hex_simd_impl(hex_str: &str) -> Option<u32> {
159 let bytes = hex_str.as_bytes();
160
161 if bytes.len() <= 8 {
163 return parse_hex_scalar_direct(bytes);
164 }
165
166 let chunks = bytes.chunks_exact(16);
168 let remainder = chunks.remainder();
169
170 for chunk in chunks {
171 let chunk_array: [u8; 16] = chunk.try_into().unwrap();
172 let simd_chunk = u8x16::from(chunk_array);
173
174 if !validate_hex_chars_simd(simd_chunk) {
175 return None;
176 }
177 }
178
179 for &byte in remainder {
180 if !byte.is_ascii_hexdigit() {
181 return None;
182 }
183 }
184
185 parse_hex_scalar(hex_str)
186}
187
188#[cfg(feature = "simd")]
190fn validate_hex_chars_simd(simd_chunk: u8x16) -> bool {
191 let mut valid_mask = u8x16::splat(0);
192
193 for digit in b'0'..=b'9' {
195 valid_mask |= simd_chunk.cmp_eq(u8x16::splat(digit));
196 }
197
198 for hex_char in b'A'..=b'F' {
200 valid_mask |= simd_chunk.cmp_eq(u8x16::splat(hex_char));
201 }
202
203 for hex_char in b'a'..=b'f' {
205 valid_mask |= simd_chunk.cmp_eq(u8x16::splat(hex_char));
206 }
207
208 valid_mask.move_mask() == 0xFFFF
209}
210
211#[cfg(feature = "simd")]
213fn parse_hex_scalar_direct(bytes: &[u8]) -> Option<u32> {
214 if bytes.is_empty() || bytes.len() > 8 {
215 return None;
216 }
217
218 let mut result: u32 = 0;
219
220 for &byte in bytes {
221 let digit_value = match byte {
222 b'0'..=b'9' => byte - b'0',
223 b'A'..=b'F' => byte - b'A' + 10,
224 b'a'..=b'f' => byte - b'a' + 10,
225 _ => return None,
226 };
227
228 result = result
229 .checked_mul(16)?
230 .checked_add(u32::from(digit_value))?;
231 }
232
233 Some(result)
234}
235
236fn parse_hex_scalar(hex_str: &str) -> Option<u32> {
238 u32::from_str_radix(hex_str, 16).ok()
239}
240
241pub fn validate_utf8_batch(bytes: &[u8]) -> Result<(), CoreError> {
251 #[cfg(feature = "simd")]
252 {
253 if bytes.len() >= 16 {
254 return validate_utf8_simd_impl(bytes);
255 }
256 }
257
258 validate_utf8_scalar(bytes)
259}
260
261#[cfg(feature = "simd")]
263fn validate_utf8_simd_impl(bytes: &[u8]) -> Result<(), CoreError> {
264 let chunks = bytes.chunks_exact(16);
265 let remainder = chunks.remainder();
266
267 for chunk in chunks {
268 let chunk_array: [u8; 16] = chunk.try_into().unwrap();
269 let simd_chunk = u8x16::from(chunk_array);
270 let ascii_mask = u8x16::splat(0x80);
271
272 let has_non_ascii = (simd_chunk & ascii_mask).move_mask();
273 if has_non_ascii != 0 {
274 return validate_utf8_scalar(bytes);
275 }
276 }
277
278 validate_utf8_scalar(remainder)
279}
280
281fn validate_utf8_scalar(bytes: &[u8]) -> Result<(), CoreError> {
283 core::str::from_utf8(bytes)
284 .map(|_| ())
285 .map_err(|e| CoreError::utf8_error(e.valid_up_to(), format!("{e}")))
286}
287
288#[cfg(test)]
289mod tests {
290 use super::*;
291 #[cfg(not(feature = "std"))]
292 use alloc::{format, vec};
293
294 #[test]
295 fn scan_delimiters_finds_colon() {
296 let text = "key: value";
297 assert_eq!(scan_delimiters(text), Some(3));
298 }
299
300 #[test]
301 fn scan_delimiters_finds_comma() {
302 let text = "value1, value2";
303 assert_eq!(scan_delimiters(text), Some(6));
304 }
305
306 #[test]
307 fn scan_delimiters_finds_brace() {
308 let text = "text{override}";
309 assert_eq!(scan_delimiters(text), Some(4));
310 }
311
312 #[test]
313 fn scan_delimiters_no_match() {
314 let text = "plain text";
315 assert_eq!(scan_delimiters(text), None);
316 }
317
318 #[test]
319 fn scan_delimiters_long_text() {
320 let text = format!("{}:value", "a".repeat(50));
321 assert_eq!(scan_delimiters(&text), Some(50));
322 }
323
324 #[test]
325 fn parse_hex_valid() {
326 assert_eq!(parse_hex_u32("FF"), Some(0xFF));
327 assert_eq!(parse_hex_u32("00FF00FF"), Some(0x00FF_00FF));
328 assert_eq!(parse_hex_u32("12345678"), Some(0x1234_5678));
329 assert_eq!(parse_hex_u32("abcdef"), Some(0x00ab_cdef));
330 assert_eq!(parse_hex_u32("ABCDEF"), Some(0x00AB_CDEF));
331 }
332
333 #[test]
334 fn parse_hex_invalid() {
335 assert_eq!(parse_hex_u32("GG"), None);
336 assert_eq!(parse_hex_u32("123456789"), None); assert_eq!(parse_hex_u32(""), None);
338 assert_eq!(parse_hex_u32("XYZ"), None);
339 }
340
341 #[test]
342 fn validate_utf8_valid() {
343 assert!(validate_utf8_batch(b"Hello, World!").is_ok());
344 assert!(validate_utf8_batch("Hello, 世界! 🎵".as_bytes()).is_ok());
345 assert!(validate_utf8_batch("a".repeat(50).as_bytes()).is_ok());
346 }
347
348 #[test]
349 fn validate_utf8_invalid() {
350 assert!(validate_utf8_batch(&[0xFF, 0xFE]).is_err());
351 }
352
353 #[test]
354 fn scan_delimiters_all_delimiter_types() {
355 let delimiters = vec![
357 ("text:more", 4, ':'),
358 ("text,more", 4, ','),
359 ("text{more", 4, '{'),
360 ("text}more", 4, '}'),
361 ("text[more", 4, '['),
362 ("text]more", 4, ']'),
363 ("text\nmore", 4, '\n'),
364 ("text\rmore", 4, '\r'),
365 ];
366
367 for (text, expected_pos, _delimiter) in delimiters {
368 assert_eq!(scan_delimiters(text), Some(expected_pos));
369 }
370 }
371
372 #[test]
373 fn scan_delimiters_empty_input() {
374 assert_eq!(scan_delimiters(""), None);
375 }
376
377 #[test]
378 fn scan_delimiters_single_char() {
379 assert_eq!(scan_delimiters(":"), Some(0));
380 assert_eq!(scan_delimiters("a"), None);
381 }
382
383 #[test]
384 fn scan_delimiters_at_beginning() {
385 assert_eq!(scan_delimiters(":text"), Some(0));
386 assert_eq!(scan_delimiters(",text"), Some(0));
387 assert_eq!(scan_delimiters("{text"), Some(0));
388 }
389
390 #[test]
391 fn scan_delimiters_at_end() {
392 assert_eq!(scan_delimiters("text:"), Some(4));
393 assert_eq!(scan_delimiters("text,"), Some(4));
394 assert_eq!(scan_delimiters("text}"), Some(4));
395 }
396
397 #[test]
398 fn scan_delimiters_multiple_delimiters() {
399 assert_eq!(scan_delimiters("a:b,c{d"), Some(1));
401 assert_eq!(scan_delimiters("text,more:values"), Some(4));
402 }
403
404 #[test]
405 fn scan_delimiters_exactly_16_bytes() {
406 let text = "abcdefghijklmno:"; assert_eq!(scan_delimiters(text), Some(15));
409 }
410
411 #[test]
412 fn scan_delimiters_less_than_16_bytes() {
413 let text = "short:text"; assert_eq!(scan_delimiters(text), Some(5));
416 }
417
418 #[test]
419 fn scan_delimiters_much_longer_than_16_bytes() {
420 let prefix = "a".repeat(32);
422 let text = format!("{prefix}:value");
423 assert_eq!(scan_delimiters(&text), Some(32));
424 }
425
426 #[test]
427 fn scan_delimiters_unicode_text() {
428 let text = "café🎭:value";
429 let colon_pos = text.find(':').unwrap();
430 assert_eq!(scan_delimiters(text), Some(colon_pos));
431 }
432
433 #[test]
434 fn parse_hex_edge_cases() {
435 assert_eq!(parse_hex_u32("0"), Some(0));
437 assert_eq!(parse_hex_u32("F"), Some(15));
438 assert_eq!(parse_hex_u32("FFFFFFFF"), Some(0xFFFF_FFFF));
439
440 assert_eq!(parse_hex_u32("aBcDeF"), Some(0x00ab_cdef));
442 assert_eq!(parse_hex_u32("AbCdEf"), Some(0x00ab_cdef));
443
444 assert_eq!(parse_hex_u32("00000001"), Some(1));
446 assert_eq!(parse_hex_u32("0000FF00"), Some(0xFF00));
447 }
448
449 #[test]
450 fn parse_hex_invalid_length() {
451 assert_eq!(parse_hex_u32("123456789"), None);
453 assert_eq!(parse_hex_u32("FFFFFFFFF"), None);
454 }
455
456 #[test]
457 fn parse_hex_invalid_characters() {
458 assert_eq!(parse_hex_u32("GHIJ"), None);
459 assert_eq!(parse_hex_u32("123G"), None);
460 assert_eq!(parse_hex_u32("12 34"), None); assert_eq!(parse_hex_u32("12-34"), None); assert_eq!(parse_hex_u32("FF\n"), None); }
464
465 #[test]
466 fn parse_hex_overflow_handling() {
467 assert_eq!(parse_hex_u32("FFFFFFFF"), Some(u32::MAX));
469 }
470
471 #[test]
472 fn validate_utf8_empty_input() {
473 assert!(validate_utf8_batch(&[]).is_ok());
474 }
475
476 #[test]
477 fn validate_utf8_ascii_only() {
478 let ascii_text = "Hello, World! 123 @#$%";
479 assert!(validate_utf8_batch(ascii_text.as_bytes()).is_ok());
480 }
481
482 #[test]
483 fn validate_utf8_exactly_16_bytes() {
484 let text = "1234567890123456"; assert!(validate_utf8_batch(text.as_bytes()).is_ok());
486 }
487
488 #[test]
489 fn validate_utf8_less_than_16_bytes() {
490 let text = "short"; assert!(validate_utf8_batch(text.as_bytes()).is_ok());
492 }
493
494 #[test]
495 fn validate_utf8_much_longer() {
496 let text = "a".repeat(100);
497 assert!(validate_utf8_batch(text.as_bytes()).is_ok());
498 }
499
500 #[test]
501 fn validate_utf8_mixed_unicode() {
502 let text = "ASCII中文🎵عربي";
503 assert!(validate_utf8_batch(text.as_bytes()).is_ok());
504 }
505
506 #[test]
507 fn validate_utf8_invalid_sequences() {
508 assert!(validate_utf8_batch(&[0xC0, 0x80]).is_err()); assert!(validate_utf8_batch(&[0xED, 0xA0, 0x80]).is_err()); assert!(validate_utf8_batch(&[0xF4, 0x90, 0x80, 0x80]).is_err()); }
513
514 #[test]
515 fn validate_utf8_incomplete_sequences() {
516 assert!(validate_utf8_batch(&[0xC2]).is_err()); assert!(validate_utf8_batch(&[0xE0, 0x80]).is_err()); assert!(validate_utf8_batch(&[0xF0, 0x90, 0x80]).is_err()); }
521
522 #[test]
523 fn scan_delimiters_scalar_fallback() {
524 let short_texts = vec![
526 "a:b", "test,val", "x{y}z", ];
530
531 for text in short_texts {
532 let result = scan_delimiters(text);
533 assert!(result.is_some());
534 }
535 }
536
537 #[test]
538 fn parse_hex_scalar_fallback() {
539 assert_eq!(parse_hex_u32("A"), Some(10));
541 assert_eq!(parse_hex_u32("FF"), Some(255));
542 assert_eq!(parse_hex_u32("123"), Some(0x123));
543 }
544
545 #[test]
546 fn scan_delimiters_boundary_at_chunk_edge() {
547 let text = format!("{}:", "a".repeat(15)); assert_eq!(scan_delimiters(&text), Some(15));
550
551 let text2 = format!("{}:", "a".repeat(16)); assert_eq!(scan_delimiters(&text2), Some(16));
554 }
555
556 #[test]
557 fn validate_utf8_non_ascii_in_chunks() {
558 let text = format!("{}café", "a".repeat(12)); assert!(validate_utf8_batch(text.as_bytes()).is_ok());
561 }
562
563 #[test]
564 fn parse_hex_case_sensitivity() {
565 assert_eq!(parse_hex_u32("abcdef"), parse_hex_u32("ABCDEF"));
567 assert_eq!(parse_hex_u32("deadbeef"), parse_hex_u32("DEADBEEF"));
568 }
569
570 #[test]
571 fn scan_delimiters_no_false_positives() {
572 let text = "abcdefghijklmnopqrstuvwxyz"; assert_eq!(scan_delimiters(text), None);
575
576 let text2 = "0123456789"; assert_eq!(scan_delimiters(text2), None);
578 }
579
580 #[test]
581 fn validate_utf8_chunk_remainder_handling() {
582 let text = format!("{}café", "a".repeat(17)); assert!(validate_utf8_batch(text.as_bytes()).is_ok());
585
586 let text2 = format!("{}🎵", "a".repeat(18)); assert!(validate_utf8_batch(text2.as_bytes()).is_ok());
588 }
589
590 #[test]
591 fn parse_hex_maximum_value() {
592 assert_eq!(parse_hex_u32("FFFFFFFF"), Some(u32::MAX));
594 assert_eq!(parse_hex_u32("ffffffff"), Some(u32::MAX));
595 }
596
597 #[test]
598 fn scan_delimiters_all_positions() {
599 for i in 0..10 {
601 let mut chars: Vec<char> = "abcdefghij".chars().collect();
602 chars[i] = ':';
603 let text: String = chars.iter().collect();
604 assert_eq!(scan_delimiters(&text), Some(i));
605 }
606 }
607}