#[inline(always)]
const fn broadcast_byte(b: u8) -> u64 {
0x0101010101010101u64 * (b as u64)
}
const LO_BYTES: u64 = 0x0101010101010101u64;
const HI_BYTES: u64 = 0x8080808080808080u64;
#[inline(always)]
const fn has_zero_byte(x: u64) -> u64 {
x.wrapping_sub(LO_BYTES) & !x & HI_BYTES
}
#[inline(always)]
const fn find_byte(x: u64, target: u8) -> u64 {
has_zero_byte(x ^ broadcast_byte(target))
}
#[inline(always)]
const fn extract_mask_u64(x: u64) -> u8 {
const MAGIC: u64 = 0x0102040810204080u64;
((x >> 7).wrapping_mul(MAGIC) >> 56) as u8
}
#[derive(Debug, Clone, Copy, Default)]
#[allow(dead_code)]
pub struct YamlCharClassBroadword {
pub newlines: u8,
pub colons: u8,
pub hyphens: u8,
pub spaces: u8,
pub quotes_double: u8,
pub quotes_single: u8,
pub backslashes: u8,
pub hash: u8,
}
#[allow(dead_code)]
impl YamlCharClassBroadword {
#[inline(always)]
pub fn has_any(&self) -> bool {
(self.newlines
| self.colons
| self.hyphens
| self.spaces
| self.quotes_double
| self.quotes_single
| self.backslashes
| self.hash)
!= 0
}
#[inline(always)]
pub fn value_terminators(&self) -> u8 {
self.newlines | self.colons | self.spaces | self.hash
}
}
#[inline]
pub fn classify_yaml_chars_broadword(
input: &[u8],
offset: usize,
) -> Option<YamlCharClassBroadword> {
if offset + 8 > input.len() {
return None;
}
let chunk = u64::from_le_bytes(input[offset..offset + 8].try_into().unwrap());
let newlines = find_byte(chunk, b'\n');
let colons = find_byte(chunk, b':');
let hyphens = find_byte(chunk, b'-');
let spaces = find_byte(chunk, b' ');
let quotes_double = find_byte(chunk, b'"');
let quotes_single = find_byte(chunk, b'\'');
let backslashes = find_byte(chunk, b'\\');
let hash = find_byte(chunk, b'#');
Some(YamlCharClassBroadword {
newlines: extract_mask_u64(newlines),
colons: extract_mask_u64(colons),
hyphens: extract_mask_u64(hyphens),
spaces: extract_mask_u64(spaces),
quotes_double: extract_mask_u64(quotes_double),
quotes_single: extract_mask_u64(quotes_single),
backslashes: extract_mask_u64(backslashes),
hash: extract_mask_u64(hash),
})
}
#[derive(Debug, Clone, Copy, Default)]
#[allow(dead_code)]
pub struct YamlCharClass16 {
pub newlines: u16,
pub colons: u16,
pub hyphens: u16,
pub spaces: u16,
pub quotes_double: u16,
pub quotes_single: u16,
pub backslashes: u16,
pub hash: u16,
}
impl YamlCharClass16 {
#[inline(always)]
pub fn value_terminators(&self) -> u16 {
self.newlines | self.colons | self.spaces | self.hash
}
}
#[inline]
pub fn classify_yaml_chars_16(input: &[u8], offset: usize) -> Option<YamlCharClass16> {
if offset + 16 > input.len() {
return None;
}
let chunk0 = u64::from_le_bytes(input[offset..offset + 8].try_into().unwrap());
let chunk1 = u64::from_le_bytes(input[offset + 8..offset + 16].try_into().unwrap());
#[inline(always)]
fn classify_both(c0: u64, c1: u64, target: u8) -> u16 {
let m0 = extract_mask_u64(find_byte(c0, target)) as u16;
let m1 = extract_mask_u64(find_byte(c1, target)) as u16;
m0 | (m1 << 8)
}
Some(YamlCharClass16 {
newlines: classify_both(chunk0, chunk1, b'\n'),
colons: classify_both(chunk0, chunk1, b':'),
hyphens: classify_both(chunk0, chunk1, b'-'),
spaces: classify_both(chunk0, chunk1, b' '),
quotes_double: classify_both(chunk0, chunk1, b'"'),
quotes_single: classify_both(chunk0, chunk1, b'\''),
backslashes: classify_both(chunk0, chunk1, b'\\'),
hash: classify_both(chunk0, chunk1, b'#'),
})
}
#[inline]
pub fn find_quote_or_escape_broadword(input: &[u8], start: usize, end: usize) -> Option<usize> {
if start >= end || start >= input.len() {
return None;
}
let end = end.min(input.len());
let data = &input[start..end];
let len = data.len();
let mut offset = 0;
while offset + 8 <= len {
let chunk = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
let quotes = find_byte(chunk, b'"');
let backslashes = find_byte(chunk, b'\\');
let matches = quotes | backslashes;
if matches != 0 {
return Some(offset + (matches.trailing_zeros() / 8) as usize);
}
offset += 8;
}
data[offset..]
.iter()
.position(|&b| b == b'"' || b == b'\\')
.map(|pos| offset + pos)
}
#[inline]
pub fn find_single_quote_broadword(input: &[u8], start: usize, end: usize) -> Option<usize> {
if start >= end || start >= input.len() {
return None;
}
let end = end.min(input.len());
let data = &input[start..end];
let len = data.len();
let mut offset = 0;
while offset + 8 <= len {
let chunk = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
let matches = find_byte(chunk, b'\'');
if matches != 0 {
return Some(offset + (matches.trailing_zeros() / 8) as usize);
}
offset += 8;
}
data[offset..]
.iter()
.position(|&b| b == b'\'')
.map(|pos| offset + pos)
}
#[inline]
pub fn count_leading_spaces_broadword(input: &[u8], start: usize) -> usize {
if start >= input.len() {
return 0;
}
let data = &input[start..];
let len = data.len();
let mut offset = 0;
while offset + 8 <= len {
let chunk = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
let _non_spaces = has_zero_byte(chunk ^ broadcast_byte(b' '));
let space_matches = find_byte(chunk, b' ');
if space_matches != HI_BYTES {
let non_space_mask = !space_matches & HI_BYTES;
if non_space_mask != 0 {
return offset + (non_space_mask.trailing_zeros() / 8) as usize;
}
}
offset += 8;
}
offset + data[offset..].iter().take_while(|&&b| b == b' ').count()
}
#[inline]
pub fn find_newline_broadword(input: &[u8], start: usize) -> Option<usize> {
let data = &input[start..];
let len = data.len();
let mut offset = 0;
while offset + 8 <= len {
let chunk = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
let matches = find_byte(chunk, b'\n');
if matches != 0 {
return Some(offset + (matches.trailing_zeros() / 8) as usize);
}
offset += 8;
}
data[offset..]
.iter()
.position(|&b| b == b'\n')
.map(|pos| offset + pos)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_broadword_find_byte_basic() {
let data = b"hello:world";
let chunk = u64::from_le_bytes(data[0..8].try_into().unwrap());
let colon_mask = find_byte(chunk, b':');
assert_ne!(colon_mask, 0);
assert_eq!(colon_mask.trailing_zeros() / 8, 5);
}
#[test]
fn test_broadword_classify_basic() {
let input = b"key: value\n";
let class = classify_yaml_chars_broadword(input, 0).unwrap();
assert_eq!(class.colons, 0b00001000); assert_eq!(class.spaces, 0b00010000); }
#[test]
fn test_broadword_classify_multiple() {
let input = b": - # \"\n\\";
let class = classify_yaml_chars_broadword(input, 0).unwrap();
assert_ne!(class.colons, 0);
assert_ne!(class.hyphens, 0);
assert_ne!(class.hash, 0);
assert_ne!(class.quotes_double, 0);
assert_ne!(class.newlines, 0);
}
#[test]
fn test_broadword_classify_16_basic() {
let input = b"0123456789abcdef";
let class = classify_yaml_chars_16(input, 0).unwrap();
assert_eq!(class.colons, 0);
assert_eq!(class.newlines, 0);
}
#[test]
fn test_broadword_classify_16_with_matches() {
let input = b"key: val\nmore: x\n";
let class = classify_yaml_chars_16(input, 0).unwrap();
assert!(class.colons & (1 << 3) != 0);
assert!(class.colons & (1 << 13) != 0);
assert!(class.newlines & (1 << 8) != 0);
}
#[test]
fn test_broadword_find_quote_or_escape() {
let input = b"hello\"world";
assert_eq!(
find_quote_or_escape_broadword(input, 0, input.len()),
Some(5)
);
let input2 = b"hello\\world";
assert_eq!(
find_quote_or_escape_broadword(input2, 0, input2.len()),
Some(5)
);
let input3 = b"no special chars";
assert_eq!(
find_quote_or_escape_broadword(input3, 0, input3.len()),
None
);
let mut long = vec![b'a'; 100];
long[50] = b'"';
assert_eq!(
find_quote_or_escape_broadword(&long, 0, long.len()),
Some(50)
);
}
#[test]
fn test_broadword_find_single_quote() {
let input = b"hello'world";
assert_eq!(find_single_quote_broadword(input, 0, input.len()), Some(5));
let input2 = b"no quotes";
assert_eq!(find_single_quote_broadword(input2, 0, input2.len()), None);
}
#[test]
fn test_broadword_count_leading_spaces() {
assert_eq!(count_leading_spaces_broadword(b" hello", 0), 2);
assert_eq!(count_leading_spaces_broadword(b" world", 0), 4);
assert_eq!(count_leading_spaces_broadword(b"no spaces", 0), 0);
let mut input = vec![b' '; 50];
input.extend_from_slice(b"content");
assert_eq!(count_leading_spaces_broadword(&input, 0), 50);
}
#[test]
fn test_broadword_find_newline() {
let input = b"hello\nworld";
assert_eq!(find_newline_broadword(input, 0), Some(5));
let input2 = b"no newline here";
assert_eq!(find_newline_broadword(input2, 0), None);
let mut long = vec![b'a'; 100];
long[50] = b'\n';
assert_eq!(find_newline_broadword(&long, 0), Some(50));
}
#[test]
fn test_broadword_value_terminators() {
let input = b"value: x";
let class = classify_yaml_chars_broadword(input, 0).unwrap();
let terminators = class.value_terminators();
assert!(terminators & (1 << 5) != 0); assert!(terminators & (1 << 6) != 0); }
}