use crate::{
classification::{quotes::QuoteClassifiedIterator, ResumeClassifierState},
input::{error::InputError, InputBlockIterator},
FallibleIterator, MaskType, BLOCK_SIZE,
};
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
#[repr(u8)]
pub enum BracketType {
Square,
Curly,
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum Structural {
Closing(BracketType, usize),
Colon(usize),
Opening(BracketType, usize),
Comma(usize),
}
use Structural::*;
impl Structural {
#[inline(always)]
#[must_use]
pub fn idx(self) -> usize {
match self {
Closing(_, idx) | Colon(idx) | Opening(_, idx) | Comma(idx) => idx,
}
}
#[inline(always)]
#[must_use]
pub fn offset(self, amount: usize) -> Self {
match self {
Closing(b, idx) => Closing(b, idx + amount),
Colon(idx) => Colon(idx + amount),
Opening(b, idx) => Opening(b, idx + amount),
Comma(idx) => Comma(idx + amount),
}
}
#[inline(always)]
#[must_use]
pub fn is_closing(&self) -> bool {
matches!(self, Closing(_, _))
}
#[inline(always)]
#[must_use]
pub fn is_opening(&self) -> bool {
matches!(self, Opening(_, _))
}
}
pub trait StructuralIterator<'i, I, Q, M, const N: usize>:
FallibleIterator<Item = Structural, Error = InputError>
where
I: InputBlockIterator<'i, N>,
{
fn stop(self) -> ResumeClassifierState<'i, I, Q, M, N>;
fn resume(state: ResumeClassifierState<'i, I, Q, M, N>) -> Self;
fn turn_colons_off(&mut self);
fn turn_colons_on(&mut self, idx: usize);
fn turn_commas_off(&mut self);
fn turn_commas_on(&mut self, idx: usize);
fn turn_colons_and_commas_on(&mut self, idx: usize);
fn turn_colons_and_commas_off(&mut self);
}
pub(crate) mod nosimd;
pub(crate) mod shared;
#[cfg(target_arch = "x86")]
pub(crate) mod avx2_32;
#[cfg(target_arch = "x86_64")]
pub(crate) mod avx2_64;
#[cfg(target_arch = "x86_64")]
pub(crate) mod avx512_64;
#[cfg(target_arch = "aarch64")]
pub(crate) mod neon_64;
#[cfg(target_arch = "x86")]
pub(crate) mod ssse3_32;
#[cfg(target_arch = "x86_64")]
pub(crate) mod ssse3_64;
pub(crate) trait StructuralImpl {
type Classifier<'i, I, Q>: StructuralIterator<'i, I, Q, MaskType, BLOCK_SIZE>
where
I: InputBlockIterator<'i, BLOCK_SIZE>,
Q: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE>;
fn new<'i, I, Q>(iter: Q) -> Self::Classifier<'i, I, Q>
where
I: InputBlockIterator<'i, BLOCK_SIZE>,
Q: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE>;
fn resume<'i, I, Q>(state: ResumeClassifierState<'i, I, Q, MaskType, BLOCK_SIZE>) -> Self::Classifier<'i, I, Q>
where
I: InputBlockIterator<'i, BLOCK_SIZE>,
Q: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE>,
{
<Self::Classifier<'i, I, Q> as StructuralIterator<'i, I, Q, MaskType, BLOCK_SIZE>>::resume(state)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
classification::simd::{self, config_simd, Simd},
input::{BorrowedBytes, Input},
result::empty::EmptyRecorder,
};
#[test]
fn resumption_without_commas_or_colons() {
use BracketType::*;
use Structural::*;
let simd = simd::configure();
config_simd!(simd => |simd| {
let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
let json_string = json.to_owned();
let input = BorrowedBytes::new(json_string.as_bytes());
let iter = input.iter_blocks(&EmptyRecorder);
let quotes = simd.classify_quoted_sequences(iter);
let offset = input.leading_padding_len();
let mut classifier = simd.classify_structural_characters(quotes);
assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
let resume_state = classifier.stop();
let mut resumed_classifier = simd.resume_structural_classification(resume_state);
assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
});
}
#[test]
fn resumption_with_commas_but_no_colons() {
use BracketType::*;
use Structural::*;
let simd = simd::configure();
config_simd!(simd => |simd| {
let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
let json_string = json.to_owned();
let input = BorrowedBytes::new(json_string.as_bytes());
let iter = input.iter_blocks(&EmptyRecorder);
let quotes = simd.classify_quoted_sequences(iter);
let offset = input.leading_padding_len();
let mut classifier = simd.classify_structural_characters(quotes);
classifier.turn_commas_on(0);
assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
assert_eq!(Some(Comma(9 + offset)), classifier.next().unwrap());
assert_eq!(Some(Comma(13 + offset)), classifier.next().unwrap());
let resume_state = classifier.stop();
let mut resumed_classifier = simd.resume_structural_classification(resume_state);
assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
assert_eq!(Some(Comma(30 + offset)), resumed_classifier.next().unwrap());
});
}
#[test]
fn resumption_with_colons_but_no_commas() {
use BracketType::*;
use Structural::*;
let simd = simd::configure();
config_simd!(simd => |simd| {
let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
let json_string = json.to_owned();
let input = BorrowedBytes::new(json_string.as_bytes());
let iter = input.iter_blocks(&EmptyRecorder);
let quotes = simd.classify_quoted_sequences(iter);
let offset = input.leading_padding_len();
let mut classifier = simd.classify_structural_characters(quotes);
classifier.turn_colons_on(0);
assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
assert_eq!(Some(Colon(4 + offset)), classifier.next().unwrap());
assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
let resume_state = classifier.stop();
let mut resumed_classifier = simd.resume_structural_classification(resume_state);
assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
assert_eq!(Some(Colon(20 + offset)), resumed_classifier.next().unwrap());
assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
assert_eq!(Some(Colon(27 + offset)), resumed_classifier.next().unwrap());
});
}
#[test]
fn resumption_with_commas_and_colons() {
use BracketType::*;
use Structural::*;
let simd = simd::configure();
config_simd!(simd => |simd| {
let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
let json_string = json.to_owned();
let input = BorrowedBytes::new(json_string.as_bytes());
let iter = input.iter_blocks(&EmptyRecorder);
let quotes = simd.classify_quoted_sequences(iter);
let offset = input.leading_padding_len();
let mut classifier = simd.classify_structural_characters(quotes);
classifier.turn_commas_on(0);
classifier.turn_colons_on(0);
assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
assert_eq!(Some(Colon(4 + offset)), classifier.next().unwrap());
assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
assert_eq!(Some(Comma(9 + offset)), classifier.next().unwrap());
assert_eq!(Some(Comma(13 + offset)), classifier.next().unwrap());
let resume_state = classifier.stop();
let mut resumed_classifier = simd.resume_structural_classification(resume_state);
assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
assert_eq!(Some(Colon(20 + offset)), resumed_classifier.next().unwrap());
assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
assert_eq!(Some(Colon(27 + offset)), resumed_classifier.next().unwrap());
assert_eq!(Some(Comma(30 + offset)), resumed_classifier.next().unwrap());
});
}
#[test]
fn resumption_at_block_boundary() {
use BracketType::*;
use Structural::*;
let simd = simd::configure();
config_simd!(simd => |simd| {
let mut json_string = "{".to_owned();
json_string += &" ".repeat(128);
json_string += "}";
let input = BorrowedBytes::new(json_string.as_bytes());
let iter = input.iter_blocks(&EmptyRecorder);
let quotes = simd.classify_quoted_sequences(iter);
let offset = input.leading_padding_len();
let mut classifier = simd.classify_structural_characters(quotes);
assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
let resume_state = classifier.stop();
let mut resumed_classifier = simd.resume_structural_classification(resume_state);
assert_eq!(Some(Closing(Curly, 129 + offset)), resumed_classifier.next().unwrap());
});
}
}