simd-json 0.4.3

High performance JSON parser based on a port of simdjson
Documentation
#[cfg(target_arch = "x86")]
use std::arch::x86::{
    __m256i, _mm256_alignr_epi8, _mm256_and_si256, _mm256_cmpeq_epi8, _mm256_cmpgt_epi8,
    _mm256_movemask_epi8, _mm256_or_si256, _mm256_permute2x128_si256, _mm256_set1_epi8,
    _mm256_setr_epi8, _mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_srli_epi16,
    _mm256_subs_epu8, _mm256_testz_si256, _mm256_xor_si256,
};
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::{
    __m256i, _mm256_alignr_epi8, _mm256_and_si256, _mm256_cmpgt_epi8, _mm256_movemask_epi8,
    _mm256_or_si256, _mm256_permute2x128_si256, _mm256_set1_epi8, _mm256_setr_epi8,
    _mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_srli_epi16, _mm256_subs_epu8,
    _mm256_testz_si256, _mm256_xor_si256,
};

use crate::utf8check::{ProcessedUtfBytes, Utf8Check};
use crate::{mem, static_cast_i8};

impl Default for ProcessedUtfBytes<__m256i> {
    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    fn default() -> Self {
        unsafe {
            Self {
                prev: _mm256_setzero_si256(),
                incomplete: _mm256_setzero_si256(),
                error: _mm256_setzero_si256(),
            }
        }
    }
}

impl Utf8Check<__m256i> for ProcessedUtfBytes<__m256i> {
    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    unsafe fn new_processed_bytes() -> ProcessedUtfBytes<__m256i> {
        Self::default()
    }

    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    unsafe fn or(a: __m256i, b: __m256i) -> __m256i {
        _mm256_or_si256(a, b)
    }

    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    unsafe fn is_ascii(input: __m256i) -> bool {
        _mm256_movemask_epi8(input) == 0
    }

    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    unsafe fn check_eof(error: __m256i, incomplete: __m256i) -> __m256i {
        Self::or(error, incomplete)
    }

    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    unsafe fn is_incomplete(input: __m256i) -> __m256i {
        _mm256_subs_epu8(
            input,
            _mm256_setr_epi8(
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0xff_u8),
                static_cast_i8!(0b1111_0000_u8 - 1),
                static_cast_i8!(0b1110_0000_u8 - 1),
                static_cast_i8!(0b1100_0000_u8 - 1),
            ),
        )
    }

    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    unsafe fn prev1(input: __m256i, prev: __m256i) -> __m256i {
        _mm256_alignr_epi8(input, _mm256_permute2x128_si256(prev, input, 0x21), 16 - 1)
    }

    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    #[allow(clippy::too_many_lines)]
    unsafe fn check_special_cases(input: __m256i, prev1: __m256i) -> __m256i {
        const TOO_SHORT: u8 = 1 << 0;
        const TOO_LONG: u8 = 1 << 1;
        const OVERLONG_3: u8 = 1 << 2;
        const SURROGATE: u8 = 1 << 4;
        const OVERLONG_2: u8 = 1 << 5;
        const TWO_CONTS: u8 = 1 << 7;
        const TOO_LARGE: u8 = 1 << 3;
        const TOO_LARGE_1000: u8 = 1 << 6;
        const OVERLONG_4: u8 = 1 << 6;
        const CARRY: u8 = TOO_SHORT | TOO_LONG | TWO_CONTS;

        let byte_1_high: __m256i = _mm256_shuffle_epi8(
            _mm256_setr_epi8(
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TWO_CONTS),
                static_cast_i8!(TWO_CONTS),
                static_cast_i8!(TWO_CONTS),
                static_cast_i8!(TWO_CONTS),
                static_cast_i8!(TOO_SHORT | OVERLONG_2),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT | OVERLONG_3 | SURROGATE),
                static_cast_i8!(TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TOO_LONG),
                static_cast_i8!(TWO_CONTS),
                static_cast_i8!(TWO_CONTS),
                static_cast_i8!(TWO_CONTS),
                static_cast_i8!(TWO_CONTS),
                static_cast_i8!(TOO_SHORT | OVERLONG_2),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT | OVERLONG_3 | SURROGATE),
                static_cast_i8!(TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4),
            ),
            _mm256_and_si256(
                _mm256_srli_epi16(prev1, 4),
                _mm256_set1_epi8(static_cast_i8!(0xFF_u8 >> 4)),
            ),
        );

        let byte_1_low: __m256i = _mm256_shuffle_epi8(
            _mm256_setr_epi8(
                static_cast_i8!(CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4),
                static_cast_i8!(CARRY | OVERLONG_2),
                static_cast_i8!(CARRY),
                static_cast_i8!(CARRY),
                static_cast_i8!(CARRY | TOO_LARGE),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4),
                static_cast_i8!(CARRY | OVERLONG_2),
                static_cast_i8!(CARRY),
                static_cast_i8!(CARRY),
                static_cast_i8!(CARRY | TOO_LARGE),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
                static_cast_i8!(CARRY | TOO_LARGE | TOO_LARGE_1000),
            ),
            _mm256_and_si256(prev1, _mm256_set1_epi8(0x0F)),
        );

        let byte_2_high: __m256i = _mm256_shuffle_epi8(
            _mm256_setr_epi8(
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(
                    TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4
                ),
                static_cast_i8!(TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE),
                static_cast_i8!(TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE),
                static_cast_i8!(TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(
                    TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4
                ),
                static_cast_i8!(TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE),
                static_cast_i8!(TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE),
                static_cast_i8!(TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
                static_cast_i8!(TOO_SHORT),
            ),
            _mm256_and_si256(
                _mm256_srli_epi16(input, 4),
                _mm256_set1_epi8(static_cast_i8!(0xFF_u8 >> 4)),
            ),
        );

        _mm256_and_si256(_mm256_and_si256(byte_1_high, byte_1_low), byte_2_high)
    }

    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    unsafe fn check_multibyte_lengths(
        input: __m256i,
        prev: __m256i,
        special_cases: __m256i,
    ) -> __m256i {
        let prev2 = _mm256_alignr_epi8(input, _mm256_permute2x128_si256(prev, input, 0x21), 16 - 2);
        let prev3 = _mm256_alignr_epi8(input, _mm256_permute2x128_si256(prev, input, 0x21), 16 - 3);
        let must23 = Self::must_be_2_3_continuation(prev2, prev3);
        let must23_80 = _mm256_and_si256(must23, _mm256_set1_epi8(static_cast_i8!(0x80_u8)));
        _mm256_xor_si256(must23_80, special_cases)
    }

    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    unsafe fn must_be_2_3_continuation(prev2: __m256i, prev3: __m256i) -> __m256i {
        let is_third_byte =
            _mm256_subs_epu8(prev2, _mm256_set1_epi8(static_cast_i8!(0b1110_0000_u8 - 1)));
        let is_fourth_byte =
            _mm256_subs_epu8(prev3, _mm256_set1_epi8(static_cast_i8!(0b1111_0000_u8 - 1)));
        _mm256_cmpgt_epi8(
            _mm256_or_si256(is_third_byte, is_fourth_byte),
            _mm256_set1_epi8(0),
        )
    }

    #[cfg_attr(not(feature = "no-inline"), inline(always))]
    unsafe fn has_error(error: __m256i) -> bool {
        _mm256_testz_si256(error, error) != 1
    }
}