pub const LANES: usize = 16;
pub type Mask16 = u16;
#[inline(always)]
pub fn first_set_bit(mask: Mask16) -> u32 {
mask.trailing_zeros()
}
#[inline(always)]
pub fn clear_high_bits(mask: Mask16, n: usize) -> Mask16 {
if n >= 16 {
mask
} else {
mask & ((1u16 << n) - 1)
}
}
#[cfg(target_arch = "aarch64")]
mod imp {
use super::*;
use core::arch::aarch64::*;
#[inline(always)]
pub unsafe fn load(ptr: *const u8) -> uint8x16_t {
unsafe { vld1q_u8(ptr) }
}
#[inline(always)]
pub unsafe fn store(ptr: *mut u8, v: uint8x16_t) {
unsafe { vst1q_u8(ptr, v) };
}
#[inline(always)]
pub unsafe fn splat(b: u8) -> uint8x16_t {
unsafe { vdupq_n_u8(b) }
}
#[inline(always)]
pub unsafe fn cmpeq(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
unsafe { vceqq_u8(a, b) }
}
#[inline(always)]
pub unsafe fn cmple(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
unsafe { vcleq_u8(a, b) }
}
#[inline(always)]
pub unsafe fn or(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
unsafe { vorrq_u8(a, b) }
}
#[inline(always)]
pub unsafe fn movemask(v: uint8x16_t) -> Mask16 {
unsafe {
let high_bits = vreinterpretq_u16_u8(vshrq_n_u8(v, 7));
let paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7));
let paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
let paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
let lo = vgetq_lane_u8(paired64, 0) as u16;
let hi = vgetq_lane_u8(paired64, 8) as u16;
lo | (hi << 8)
}
}
}
#[cfg(target_arch = "x86_64")]
mod imp {
use super::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
#[inline(always)]
pub unsafe fn load(ptr: *const u8) -> __m128i {
_mm_loadu_si128(ptr as *const __m128i)
}
#[inline(always)]
pub unsafe fn store(ptr: *mut u8, v: __m128i) {
_mm_storeu_si128(ptr as *mut __m128i, v);
}
#[inline(always)]
pub unsafe fn splat(b: u8) -> __m128i {
_mm_set1_epi8(b as i8)
}
#[inline(always)]
pub unsafe fn cmpeq(a: __m128i, b: __m128i) -> __m128i {
_mm_cmpeq_epi8(a, b)
}
#[inline(always)]
pub unsafe fn cmple(a: __m128i, b: __m128i) -> __m128i {
_mm_cmpeq_epi8(_mm_max_epu8(a, b), b)
}
#[inline(always)]
pub unsafe fn or(a: __m128i, b: __m128i) -> __m128i {
_mm_or_si128(a, b)
}
#[inline(always)]
pub unsafe fn movemask(v: __m128i) -> Mask16 {
_mm_movemask_epi8(v) as u16
}
}
#[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))]
mod imp {
use super::*;
#[derive(Clone, Copy)]
pub struct SimdVec([u8; LANES]);
#[inline(always)]
pub unsafe fn load(ptr: *const u8) -> SimdVec {
let mut v = SimdVec([0u8; LANES]);
core::ptr::copy_nonoverlapping(ptr, v.0.as_mut_ptr(), LANES);
v
}
#[inline(always)]
pub unsafe fn store(ptr: *mut u8, v: SimdVec) {
core::ptr::copy_nonoverlapping(v.0.as_ptr(), ptr, LANES);
}
#[inline(always)]
pub unsafe fn splat(b: u8) -> SimdVec {
SimdVec([b; LANES])
}
#[inline(always)]
pub unsafe fn cmpeq(a: SimdVec, b: SimdVec) -> SimdVec {
let mut r = SimdVec([0u8; LANES]);
for i in 0..LANES {
r.0[i] = if a.0[i] == b.0[i] { 0xFF } else { 0x00 };
}
r
}
#[inline(always)]
pub unsafe fn cmple(a: SimdVec, b: SimdVec) -> SimdVec {
let mut r = SimdVec([0u8; LANES]);
for i in 0..LANES {
r.0[i] = if a.0[i] <= b.0[i] { 0xFF } else { 0x00 };
}
r
}
#[inline(always)]
pub unsafe fn or(a: SimdVec, b: SimdVec) -> SimdVec {
let mut r = SimdVec([0u8; LANES]);
for i in 0..LANES {
r.0[i] = a.0[i] | b.0[i];
}
r
}
#[inline(always)]
pub unsafe fn movemask(v: SimdVec) -> Mask16 {
let mut result: u16 = 0;
for i in 0..LANES {
if v.0[i] & 0x80 != 0 {
result |= 1 << i;
}
}
result
}
}
pub use imp::*;
#[inline]
pub fn simd_has_special_chars(bytes: &[u8]) -> bool {
let len = bytes.len();
let mut i = 0;
unsafe {
let v_1f = splat(0x1f);
let v_comma = splat(b',');
let v_at = splat(b'@');
let v_lparen = splat(b'(');
let v_rparen = splat(b')');
let v_lbracket = splat(b'[');
let v_rbracket = splat(b']');
let v_quote = splat(b'"');
let v_backslash = splat(b'\\');
while i + LANES <= len {
let chunk = load(bytes.as_ptr().add(i));
let mask = movemask(or(
or(
or(
cmple(chunk, v_1f),
or(cmpeq(chunk, v_comma), cmpeq(chunk, v_at)),
),
or(cmpeq(chunk, v_lparen), cmpeq(chunk, v_rparen)),
),
or(
or(cmpeq(chunk, v_lbracket), cmpeq(chunk, v_rbracket)),
or(cmpeq(chunk, v_quote), cmpeq(chunk, v_backslash)),
),
));
if mask != 0 {
return true;
}
i += LANES;
}
}
static NEEDS_QUOTE: [bool; 256] = {
let mut t = [false; 256];
let mut j = 0usize;
while j < 32 {
t[j] = true;
j += 1;
}
t[b',' as usize] = true;
t[b'@' as usize] = true;
t[b'(' as usize] = true;
t[b')' as usize] = true;
t[b'[' as usize] = true;
t[b']' as usize] = true;
t[b'"' as usize] = true;
t[b'\\' as usize] = true;
t
};
while i < len {
if NEEDS_QUOTE[bytes[i] as usize] {
return true;
}
i += 1;
}
false
}
#[inline]
pub fn simd_find_escape(bytes: &[u8], start: usize) -> usize {
let len = bytes.len();
let mut i = start;
unsafe {
let v_1f = splat(0x1f);
let v_quote = splat(b'"');
let v_backslash = splat(b'\\');
while i + LANES <= len {
let chunk = load(bytes.as_ptr().add(i));
let mask = movemask(or(
cmple(chunk, v_1f),
or(cmpeq(chunk, v_quote), cmpeq(chunk, v_backslash)),
));
if mask != 0 {
return i + first_set_bit(mask) as usize;
}
i += LANES;
}
}
while i < len {
let b = bytes[i];
if b <= 0x1f || b == b'"' || b == b'\\' {
return i;
}
i += 1;
}
len
}
#[inline]
pub fn simd_find_quote_or_backslash(bytes: &[u8], start: usize) -> usize {
let len = bytes.len();
let mut i = start;
unsafe {
let v_quote = splat(b'"');
let v_backslash = splat(b'\\');
while i + LANES <= len {
let chunk = load(bytes.as_ptr().add(i));
let mask = movemask(or(cmpeq(chunk, v_quote), cmpeq(chunk, v_backslash)));
if mask != 0 {
return i + first_set_bit(mask) as usize;
}
i += LANES;
}
}
while i < len {
let b = bytes[i];
if b == b'"' || b == b'\\' {
return i;
}
i += 1;
}
len
}
#[inline]
pub fn simd_find_plain_delimiter(bytes: &[u8], start: usize) -> usize {
let len = bytes.len();
let mut i = start;
unsafe {
let v_comma = splat(b',');
let v_rparen = splat(b')');
let v_rbracket = splat(b']');
let v_colon = splat(b':');
let v_backslash = splat(b'\\');
while i + LANES <= len {
let chunk = load(bytes.as_ptr().add(i));
let mask = movemask(or(
or(cmpeq(chunk, v_comma), cmpeq(chunk, v_rparen)),
or(
or(cmpeq(chunk, v_rbracket), cmpeq(chunk, v_colon)),
cmpeq(chunk, v_backslash),
),
));
if mask != 0 {
return i + first_set_bit(mask) as usize;
}
i += LANES;
}
}
while i < len {
match bytes[i] {
b',' | b')' | b']' | b':' | b'\\' => return i,
_ => i += 1,
}
}
len
}
#[inline]
pub fn simd_skip_whitespace(bytes: &[u8], start: usize) -> usize {
let len = bytes.len();
let mut i = start;
unsafe {
let v_space = splat(b' ');
let v_tab = splat(b'\t');
let v_nl = splat(b'\n');
let v_cr = splat(b'\r');
while i + LANES <= len {
let chunk = load(bytes.as_ptr().add(i));
let ws_mask = movemask(or(
or(cmpeq(chunk, v_space), cmpeq(chunk, v_tab)),
or(cmpeq(chunk, v_nl), cmpeq(chunk, v_cr)),
));
if ws_mask == 0xFFFF {
i += LANES;
continue;
}
if ws_mask == 0 {
return i;
}
let non_ws = !ws_mask & 0xFFFF;
return i + first_set_bit(non_ws) as usize;
}
}
while i < len {
match bytes[i] {
b' ' | b'\t' | b'\n' | b'\r' => i += 1,
_ => return i,
}
}
i
}
#[inline]
pub fn simd_write_escaped(buf: &mut Vec<u8>, s: &[u8]) {
static ESCAPE: [u8; 256] = {
let mut t = [0u8; 256];
t[b'"' as usize] = b'"';
t[b'\\' as usize] = b'\\';
t[b'\n' as usize] = b'n';
t[b'\t' as usize] = b't';
t
};
buf.push(b'"');
let len = s.len();
let mut start = 0;
while start < len {
let next_esc = simd_find_escape(s, start);
if next_esc > start {
buf.extend_from_slice(&s[start..next_esc]);
}
if next_esc >= len {
break;
}
let b = s[next_esc];
let esc = ESCAPE[b as usize];
if esc != 0 {
buf.push(b'\\');
buf.push(esc);
} else {
buf.extend_from_slice(b"\\u00");
static HEX: &[u8; 16] = b"0123456789abcdef";
buf.push(HEX[(b >> 4) as usize]);
buf.push(HEX[(b & 0xf) as usize]);
}
start = next_esc + 1;
}
buf.push(b'"');
}
#[inline]
pub fn simd_bulk_extend(dst: &mut Vec<u8>, src: &[u8]) {
let n = src.len();
if n == 0 {
return;
}
if n < 32 {
dst.extend_from_slice(src);
return;
}
dst.reserve(n);
let dst_start = dst.len();
unsafe {
let src_ptr = src.as_ptr();
let dst_ptr = dst.as_mut_ptr().add(dst_start);
let mut i = 0usize;
while i + LANES <= n {
let chunk = load(src_ptr.add(i));
store(dst_ptr.add(i), chunk);
i += LANES;
}
if i < n {
core::ptr::copy_nonoverlapping(src_ptr.add(i), dst_ptr.add(i), n - i);
}
dst.set_len(dst_start + n);
}
}
static DEC_DIGITS: &[u8; 200] = b"0001020304050607080910111213141516171819\
2021222324252627282930313233343536373839\
4041424344454647484950515253545556575859\
6061626364656667686970717273747576777879\
8081828384858687888990919293949596979899";
#[inline(always)]
pub fn fast_write_u64(buf: &mut Vec<u8>, mut v: u64) {
if v < 10 {
buf.push(b'0' + v as u8);
return;
}
if v < 100 {
let idx = v as usize * 2;
buf.push(DEC_DIGITS[idx]);
buf.push(DEC_DIGITS[idx + 1]);
return;
}
let mut tmp = [0u8; 20];
let mut i = 20usize;
while v >= 100 {
let rem = (v % 100) as usize;
v /= 100;
i -= 2;
unsafe {
*tmp.get_unchecked_mut(i) = DEC_DIGITS[rem * 2];
*tmp.get_unchecked_mut(i + 1) = DEC_DIGITS[rem * 2 + 1];
}
}
if v >= 10 {
let idx = v as usize * 2;
i -= 2;
unsafe {
*tmp.get_unchecked_mut(i) = DEC_DIGITS[idx];
*tmp.get_unchecked_mut(i + 1) = DEC_DIGITS[idx + 1];
}
} else {
i -= 1;
tmp[i] = b'0' + v as u8;
}
buf.extend_from_slice(&tmp[i..]);
}
#[inline(always)]
pub fn fast_write_i64(buf: &mut Vec<u8>, v: i64) {
if v < 0 {
buf.push(b'-');
fast_write_u64(buf, (-(v as i128)) as u64);
} else {
fast_write_u64(buf, v as u64);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simd_has_special_chars() {
assert!(!simd_has_special_chars(b"hello world"));
assert!(simd_has_special_chars(b"hello,world"));
assert!(simd_has_special_chars(b"hello@world"));
assert!(simd_has_special_chars(b"hello(world"));
assert!(simd_has_special_chars(b"hello)world"));
assert!(simd_has_special_chars(b"hello[world"));
assert!(simd_has_special_chars(b"hello]world"));
assert!(simd_has_special_chars(b"hello\"world"));
assert!(simd_has_special_chars(b"hello\\world"));
assert!(simd_has_special_chars(b"hello\nworld"));
assert!(simd_has_special_chars(b"hello\tworld"));
assert!(!simd_has_special_chars(b"abcdefghijklmnop")); assert!(!simd_has_special_chars(b"abcdefghijklmnopqrstuvwx")); assert!(simd_has_special_chars(b"abcdefghijklmno,")); assert!(simd_has_special_chars(b"abcdefghijklmnop,")); }
#[test]
fn test_simd_find_escape() {
assert_eq!(simd_find_escape(b"hello world", 0), 11);
assert_eq!(simd_find_escape(b"hello\"world", 0), 5);
assert_eq!(simd_find_escape(b"hello\\world", 0), 5);
assert_eq!(simd_find_escape(b"hello\nworld", 0), 5);
assert_eq!(simd_find_escape(b"abcdefghijklmnop\"", 0), 16); }
#[test]
fn test_simd_find_quote_or_backslash() {
assert_eq!(simd_find_quote_or_backslash(b"hello world", 0), 11);
assert_eq!(simd_find_quote_or_backslash(b"hello\"world", 0), 5);
assert_eq!(simd_find_quote_or_backslash(b"hello\\world", 0), 5);
assert_eq!(simd_find_quote_or_backslash(b"abcdefghijklmnop\"", 0), 16);
}
#[test]
fn test_simd_find_plain_delimiter() {
assert_eq!(simd_find_plain_delimiter(b"hello world", 0), 11);
assert_eq!(simd_find_plain_delimiter(b"hello,world", 0), 5);
assert_eq!(simd_find_plain_delimiter(b"hello)world", 0), 5);
assert_eq!(simd_find_plain_delimiter(b"hello]world", 0), 5);
}
#[test]
fn test_simd_skip_whitespace() {
assert_eq!(simd_skip_whitespace(b" hello", 0), 3);
assert_eq!(simd_skip_whitespace(b"\t\n\r hello", 0), 4);
assert_eq!(simd_skip_whitespace(b"hello", 0), 0);
assert_eq!(simd_skip_whitespace(b" ", 0), 3);
assert_eq!(simd_skip_whitespace(b" hello", 0), 18);
}
#[test]
fn test_simd_write_escaped() {
let mut buf = Vec::new();
simd_write_escaped(&mut buf, b"hello");
assert_eq!(buf, b"\"hello\"");
buf.clear();
simd_write_escaped(&mut buf, b"hello\"world");
assert_eq!(buf, b"\"hello\\\"world\"");
buf.clear();
simd_write_escaped(&mut buf, b"line1\nline2");
assert_eq!(buf, b"\"line1\\nline2\"");
buf.clear();
simd_write_escaped(&mut buf, b"tab\there");
assert_eq!(buf, b"\"tab\\there\"");
buf.clear();
simd_write_escaped(&mut buf, b"back\\slash");
assert_eq!(buf, b"\"back\\\\slash\"");
buf.clear();
simd_write_escaped(&mut buf, &[0x01]);
assert_eq!(buf, b"\"\\u0001\"");
}
#[test]
fn test_fast_write_u64() {
let cases = [0u64, 1, 9, 10, 99, 100, 999, 1234, 99999, 1000000, u64::MAX];
for &v in &cases {
let mut buf1 = Vec::new();
fast_write_u64(&mut buf1, v);
let expected = v.to_string();
assert_eq!(
String::from_utf8(buf1).unwrap(),
expected,
"fast_write_u64({}) failed",
v
);
}
}
#[test]
fn test_fast_write_i64() {
let cases = [0i64, 1, -1, 42, -42, i64::MAX, i64::MIN];
for &v in &cases {
let mut buf1 = Vec::new();
fast_write_i64(&mut buf1, v);
let expected = v.to_string();
assert_eq!(
String::from_utf8(buf1).unwrap(),
expected,
"fast_write_i64({}) failed",
v
);
}
}
}