#[cfg(all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
))]
mod neon;
#[cfg(all(target_arch = "x86_64", not(feature = "scalar-yaml")))]
mod x86;
#[cfg(all(
not(feature = "scalar-yaml"),
any(
all(target_arch = "aarch64", feature = "broadword-yaml"),
not(any(target_arch = "aarch64", target_arch = "x86_64"))
)
))]
mod broadword;
#[cfg(all(target_arch = "x86_64", not(feature = "scalar-yaml")))]
pub use x86::YamlCharClass;
#[cfg(all(
not(feature = "scalar-yaml"),
any(
all(target_arch = "aarch64", feature = "broadword-yaml"),
not(any(target_arch = "aarch64", target_arch = "x86_64"))
)
))]
pub use broadword::{YamlCharClass16, YamlCharClassBroadword};
#[cfg(all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
))]
pub use neon::YamlCharClass16;
#[inline]
pub fn find_quote_or_escape(input: &[u8], start: usize, end: usize) -> Option<usize> {
if start >= end || start >= input.len() {
return None;
}
let end = end.min(input.len());
#[cfg(feature = "scalar-yaml")]
{
find_quote_or_escape_scalar(input, start, end)
}
#[cfg(all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
))]
{
neon::find_quote_or_escape_neon(input, start, end)
}
#[cfg(all(target_arch = "x86_64", not(feature = "scalar-yaml")))]
{
x86::find_quote_or_escape_x86(input, start, end)
}
#[cfg(all(
not(feature = "scalar-yaml"),
any(
all(target_arch = "aarch64", feature = "broadword-yaml"),
not(any(target_arch = "aarch64", target_arch = "x86_64"))
)
))]
{
broadword::find_quote_or_escape_broadword(input, start, end)
}
}
#[inline]
pub fn find_single_quote(input: &[u8], start: usize, end: usize) -> Option<usize> {
if start >= end || start >= input.len() {
return None;
}
let end = end.min(input.len());
#[cfg(feature = "scalar-yaml")]
{
find_single_quote_scalar(input, start, end)
}
#[cfg(all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
))]
{
neon::find_single_quote_neon(input, start, end)
}
#[cfg(all(target_arch = "x86_64", not(feature = "scalar-yaml")))]
{
x86::find_single_quote_x86(input, start, end)
}
#[cfg(all(
not(feature = "scalar-yaml"),
any(
all(target_arch = "aarch64", feature = "broadword-yaml"),
not(any(target_arch = "aarch64", target_arch = "x86_64"))
)
))]
{
broadword::find_single_quote_broadword(input, start, end)
}
}
#[inline]
pub fn count_leading_spaces(input: &[u8], start: usize) -> usize {
if start >= input.len() {
return 0;
}
#[cfg(feature = "scalar-yaml")]
{
count_leading_spaces_scalar(input, start)
}
#[cfg(all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
))]
{
neon::count_leading_spaces_neon(input, start)
}
#[cfg(all(target_arch = "x86_64", not(feature = "scalar-yaml")))]
{
x86::count_leading_spaces_x86(input, start)
}
#[cfg(all(
not(feature = "scalar-yaml"),
any(
all(target_arch = "aarch64", feature = "broadword-yaml"),
not(any(target_arch = "aarch64", target_arch = "x86_64"))
)
))]
{
broadword::count_leading_spaces_broadword(input, start)
}
}
#[inline]
#[cfg(all(target_arch = "x86_64", not(feature = "scalar-yaml")))]
pub fn classify_yaml_chars(input: &[u8], offset: usize) -> Option<YamlCharClass> {
x86::classify_yaml_chars(input, offset)
}
#[inline]
#[cfg(all(
not(feature = "scalar-yaml"),
any(target_arch = "aarch64", not(target_arch = "x86_64"))
))]
#[allow(dead_code)]
pub fn classify_yaml_chars_16(input: &[u8], offset: usize) -> Option<YamlCharClass16> {
#[cfg(all(target_arch = "aarch64", not(feature = "broadword-yaml")))]
{
neon::classify_yaml_chars_16(input, offset)
}
#[cfg(any(
all(target_arch = "aarch64", feature = "broadword-yaml"),
not(any(target_arch = "aarch64", target_arch = "x86_64"))
))]
{
broadword::classify_yaml_chars_16(input, offset)
}
}
#[inline]
#[cfg(all(
not(feature = "scalar-yaml"),
any(
all(target_arch = "aarch64", feature = "broadword-yaml"),
not(any(target_arch = "aarch64", target_arch = "x86_64"))
)
))]
#[allow(dead_code)]
pub fn classify_yaml_chars_8(input: &[u8], offset: usize) -> Option<YamlCharClassBroadword> {
broadword::classify_yaml_chars_broadword(input, offset)
}
#[inline]
#[allow(dead_code)]
pub fn find_newline(input: &[u8], start: usize) -> Option<usize> {
if start >= input.len() {
return None;
}
#[cfg(feature = "scalar-yaml")]
{
find_newline_scalar(input, start)
}
#[cfg(all(target_arch = "x86_64", not(feature = "scalar-yaml")))]
{
x86::find_newline_x86(input, start)
}
#[cfg(all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
))]
{
neon::find_newline_broadword(input, start)
}
#[cfg(all(
not(feature = "scalar-yaml"),
any(
all(target_arch = "aarch64", feature = "broadword-yaml"),
not(any(target_arch = "aarch64", target_arch = "x86_64"))
)
))]
{
broadword::find_newline_broadword(input, start)
}
}
#[inline]
pub fn find_block_scalar_end(input: &[u8], start: usize, min_indent: usize) -> Option<usize> {
#[cfg(all(target_arch = "x86_64", not(feature = "scalar-yaml")))]
{
x86::find_block_scalar_end(input, start, min_indent)
}
#[cfg(all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
))]
{
Some(neon::find_block_scalar_end_neon(input, start, min_indent))
}
#[cfg(not(any(
all(target_arch = "x86_64", not(feature = "scalar-yaml")),
all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
)
)))]
{
find_block_scalar_end_scalar(input, start, min_indent)
}
}
#[inline]
pub fn parse_anchor_name(input: &[u8], start: usize) -> usize {
#[cfg(all(target_arch = "x86_64", not(feature = "scalar-yaml")))]
{
x86::parse_anchor_name(input, start)
}
#[cfg(all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
))]
{
neon::parse_anchor_name_neon(input, start)
}
#[cfg(not(any(
all(target_arch = "x86_64", not(feature = "scalar-yaml")),
all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
)
)))]
{
parse_anchor_name_scalar(input, start)
}
}
#[inline(always)]
pub fn find_json_escape(bytes: &[u8], start: usize) -> usize {
if start >= bytes.len() {
return bytes.len();
}
#[cfg(all(
target_arch = "aarch64",
not(feature = "broadword-yaml"),
not(feature = "scalar-yaml")
))]
{
neon::find_json_escape_neon(bytes, start)
}
#[cfg(all(target_arch = "x86_64", not(feature = "scalar-yaml")))]
{
x86::find_json_escape_x86(bytes, start).map_or(bytes.len(), |offset| start + offset)
}
#[cfg(any(
feature = "scalar-yaml",
not(any(target_arch = "aarch64", target_arch = "x86_64"))
))]
{
find_json_escape_scalar(bytes, start)
}
}
#[inline(always)]
#[allow(dead_code)]
fn find_json_escape_scalar(bytes: &[u8], start: usize) -> usize {
for (i, &b) in bytes[start..].iter().enumerate() {
if b == b'"' || b == b'\\' || b < 0x20 {
return start + i;
}
}
bytes.len()
}
#[allow(dead_code)]
fn parse_anchor_name_scalar(input: &[u8], start: usize) -> usize {
let mut pos = start;
while pos < input.len() {
let b = input[pos];
match b {
b' ' | b'\t' | b'\n' | b'\r' | b'[' | b']' | b'{' | b'}' | b',' => break,
b':' => {
if pos + 1 < input.len() {
let next = input[pos + 1];
if next == b' ' || next == b'\t' || next == b'\n' || next == b'\r' {
break;
}
}
pos += 1;
}
_ => pos += 1,
}
}
pos
}
#[allow(dead_code)]
fn find_block_scalar_end_scalar(input: &[u8], start: usize, min_indent: usize) -> Option<usize> {
let mut pos = start;
while pos < input.len() {
if input[pos] == b'\n' {
let line_start = pos + 1;
if line_start >= input.len() {
return Some(input.len());
}
let mut indent = 0;
while line_start + indent < input.len() && input[line_start + indent] == b' ' {
indent += 1;
}
if line_start + indent < input.len() {
let next_char = input[line_start + indent];
if next_char != b'\n' && next_char != b'\r' && indent < min_indent {
return Some(line_start);
}
}
}
pos += 1;
}
Some(input.len())
}
#[allow(dead_code)]
fn find_newline_scalar(input: &[u8], start: usize) -> Option<usize> {
for (i, &b) in input[start..].iter().enumerate() {
if b == b'\n' {
return Some(i);
}
}
None
}
#[allow(dead_code)]
fn find_quote_or_escape_scalar(input: &[u8], start: usize, end: usize) -> Option<usize> {
for (i, &b) in input[start..end].iter().enumerate() {
if b == b'"' || b == b'\\' {
return Some(i);
}
}
None
}
#[allow(dead_code)]
fn find_single_quote_scalar(input: &[u8], start: usize, end: usize) -> Option<usize> {
for (i, &b) in input[start..end].iter().enumerate() {
if b == b'\'' {
return Some(i);
}
}
None
}
#[allow(dead_code)]
fn count_leading_spaces_scalar(input: &[u8], start: usize) -> usize {
input[start..].iter().take_while(|&&b| b == b' ').count()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find_quote_or_escape_basic() {
let input = b"hello\"world";
assert_eq!(find_quote_or_escape(input, 0, input.len()), Some(5));
}
#[test]
fn test_find_quote_or_escape_backslash() {
let input = b"hello\\world";
assert_eq!(find_quote_or_escape(input, 0, input.len()), Some(5));
}
#[test]
fn test_find_quote_or_escape_none() {
let input = b"hello world";
assert_eq!(find_quote_or_escape(input, 0, input.len()), None);
}
#[test]
fn test_find_quote_or_escape_start_offset() {
let input = b"ab\"cd\"ef";
assert_eq!(find_quote_or_escape(input, 3, input.len()), Some(2)); }
#[test]
fn test_find_quote_or_escape_long_string() {
let mut input = vec![b'a'; 100];
input[50] = b'"';
assert_eq!(find_quote_or_escape(&input, 0, input.len()), Some(50));
}
#[test]
fn test_find_single_quote_basic() {
let input = b"hello'world";
assert_eq!(find_single_quote(input, 0, input.len()), Some(5));
}
#[test]
fn test_find_single_quote_none() {
let input = b"hello world";
assert_eq!(find_single_quote(input, 0, input.len()), None);
}
#[test]
fn test_find_single_quote_long_string() {
let mut input = vec![b'a'; 100];
input[75] = b'\'';
assert_eq!(find_single_quote(&input, 0, input.len()), Some(75));
}
#[test]
fn test_empty_range() {
let input = b"hello";
assert_eq!(find_quote_or_escape(input, 5, 5), None);
assert_eq!(find_quote_or_escape(input, 10, 5), None);
}
#[test]
fn test_find_at_boundary() {
let mut input = vec![b'a'; 32];
input[16] = b'"';
assert_eq!(find_quote_or_escape(&input, 0, input.len()), Some(16));
}
#[test]
fn test_find_json_escape_basic() {
let input = b"hello\"world";
assert_eq!(find_json_escape(input, 0), 5);
}
#[test]
fn test_find_json_escape_backslash() {
let input = b"hello\\world";
assert_eq!(find_json_escape(input, 0), 5);
}
#[test]
fn test_find_json_escape_control() {
let input = b"hello\nworld";
assert_eq!(find_json_escape(input, 0), 5);
}
#[test]
fn test_find_json_escape_none() {
let input = b"hello world";
assert_eq!(find_json_escape(input, 0), input.len());
}
#[test]
fn test_find_json_escape_long() {
let mut input = vec![b'a'; 100];
input[50] = b'"';
assert_eq!(find_json_escape(&input, 0), 50);
}
#[test]
fn test_find_json_escape_simd_matches_scalar() {
let test_cases: &[&[u8]] = &[
b"",
b"\"",
b"\\",
b"\n",
b"\t",
b"no special chars",
b"quote\"here",
b"newline\nhere",
&[b'x'; 100],
];
for &input in test_cases {
let scalar = find_json_escape_scalar(input, 0);
let simd = find_json_escape(input, 0);
assert_eq!(
scalar,
simd,
"JSON escape mismatch for {:?}",
String::from_utf8_lossy(input)
);
}
}
#[test]
fn test_simd_matches_scalar() {
let test_cases: &[&[u8]] = &[
b"",
b"\"",
b"\\",
b"'",
b"no special chars here",
b"quote at end\"",
b"\"quote at start",
b"has\\backslash",
b"has both \" and \\ chars",
&[b'x'; 100],
];
for &input in test_cases {
let scalar_dq = find_quote_or_escape_scalar(input, 0, input.len());
let simd_dq = find_quote_or_escape(input, 0, input.len());
assert_eq!(
scalar_dq,
simd_dq,
"double-quote mismatch for {:?}",
String::from_utf8_lossy(input)
);
let scalar_sq = find_single_quote_scalar(input, 0, input.len());
let simd_sq = find_single_quote(input, 0, input.len());
assert_eq!(
scalar_sq,
simd_sq,
"single-quote mismatch for {:?}",
String::from_utf8_lossy(input)
);
}
}
#[test]
fn test_count_leading_spaces_basic() {
assert_eq!(count_leading_spaces(b" hello", 0), 2);
assert_eq!(count_leading_spaces(b" world", 0), 4);
assert_eq!(count_leading_spaces(b"no spaces", 0), 0);
assert_eq!(count_leading_spaces(b"", 0), 0);
}
#[test]
fn test_count_leading_spaces_offset() {
assert_eq!(count_leading_spaces(b"xx hello", 2), 2);
assert_eq!(count_leading_spaces(b"key: value", 4), 3);
}
#[test]
fn test_count_leading_spaces_all_spaces() {
let spaces = vec![b' '; 100];
assert_eq!(count_leading_spaces(&spaces, 0), 100);
}
#[test]
fn test_count_leading_spaces_long() {
let mut input = vec![b' '; 50];
input.extend_from_slice(b"content");
assert_eq!(count_leading_spaces(&input, 0), 50);
}
#[test]
fn test_count_leading_spaces_at_boundary() {
let mut input = vec![b' '; 16];
input.push(b'x');
assert_eq!(count_leading_spaces(&input, 0), 16);
let mut input32 = vec![b' '; 32];
input32.push(b'x');
assert_eq!(count_leading_spaces(&input32, 0), 32);
}
#[test]
fn test_count_leading_spaces_simd_matches_scalar() {
let test_cases: &[&[u8]] = &[
b"",
b" ",
b" ",
b" ",
b" ",
b" ", b" ", b" ", b"no spaces",
b" one",
b" two",
b" x", b" x", ];
for &input in test_cases {
let scalar = count_leading_spaces_scalar(input, 0);
let simd = count_leading_spaces(input, 0);
assert_eq!(
scalar,
simd,
"leading spaces mismatch for {:?}",
String::from_utf8_lossy(input)
);
}
}
}