1use crate::{
15 classification::{quotes::QuoteClassifiedIterator, ResumeClassifierState},
16 input::{error::InputError, InputBlockIterator},
17 FallibleIterator, MaskType, BLOCK_SIZE,
18};
19
20#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
22#[repr(u8)]
23pub enum BracketType {
24 Square,
26 Curly,
28}
29
30#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
32pub enum Structural {
33 Closing(BracketType, usize),
35 Colon(usize),
37 Opening(BracketType, usize),
39 Comma(usize),
41}
42use Structural::*;
43
44impl Structural {
45 #[inline(always)]
48 #[must_use]
49 pub fn idx(self) -> usize {
50 match self {
51 Closing(_, idx) | Colon(idx) | Opening(_, idx) | Comma(idx) => idx,
52 }
53 }
54
55 #[inline(always)]
68 #[must_use]
69 pub fn offset(self, amount: usize) -> Self {
70 match self {
71 Closing(b, idx) => Closing(b, idx + amount),
72 Colon(idx) => Colon(idx + amount),
73 Opening(b, idx) => Opening(b, idx + amount),
74 Comma(idx) => Comma(idx + amount),
75 }
76 }
77
78 #[inline(always)]
94 #[must_use]
95 pub fn is_closing(&self) -> bool {
96 matches!(self, Closing(_, _))
97 }
98
99 #[inline(always)]
115 #[must_use]
116 pub fn is_opening(&self) -> bool {
117 matches!(self, Opening(_, _))
118 }
119}
120
121pub trait StructuralIterator<'i, I, Q, M, const N: usize>:
124 FallibleIterator<Item = Structural, Error = InputError>
125where
126 I: InputBlockIterator<'i, N>,
127{
128 fn stop(self) -> ResumeClassifierState<'i, I, Q, M, N>;
131
132 fn resume(state: ResumeClassifierState<'i, I, Q, M, N>) -> Self;
134
135 fn turn_colons_off(&mut self);
137
138 fn turn_colons_on(&mut self, idx: usize);
145
146 fn turn_commas_off(&mut self);
148
149 fn turn_commas_on(&mut self, idx: usize);
156
157 fn turn_colons_and_commas_on(&mut self, idx: usize);
163
164 fn turn_colons_and_commas_off(&mut self);
170}
171
172pub(crate) mod nosimd;
173pub(crate) mod shared;
174
175#[cfg(target_arch = "x86")]
176pub(crate) mod avx2_32;
177#[cfg(target_arch = "x86_64")]
178pub(crate) mod avx2_64;
179#[cfg(target_arch = "x86_64")]
180pub(crate) mod avx512_64;
181#[cfg(target_arch = "aarch64")]
182pub(crate) mod neon_64;
183#[cfg(target_arch = "x86")]
184pub(crate) mod ssse3_32;
185#[cfg(target_arch = "x86_64")]
186pub(crate) mod ssse3_64;
187
188pub(crate) trait StructuralImpl {
189 type Classifier<'i, I, Q>: StructuralIterator<'i, I, Q, MaskType, BLOCK_SIZE>
190 where
191 I: InputBlockIterator<'i, BLOCK_SIZE>,
192 Q: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE>;
193
194 fn new<'i, I, Q>(iter: Q) -> Self::Classifier<'i, I, Q>
195 where
196 I: InputBlockIterator<'i, BLOCK_SIZE>,
197 Q: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE>;
198
199 fn resume<'i, I, Q>(state: ResumeClassifierState<'i, I, Q, MaskType, BLOCK_SIZE>) -> Self::Classifier<'i, I, Q>
200 where
201 I: InputBlockIterator<'i, BLOCK_SIZE>,
202 Q: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE>,
203 {
204 <Self::Classifier<'i, I, Q> as StructuralIterator<'i, I, Q, MaskType, BLOCK_SIZE>>::resume(state)
205 }
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211 use crate::{
212 classification::simd::{self, config_simd, Simd},
213 input::{BorrowedBytes, Input},
214 result::empty::EmptyRecorder,
215 };
216
217 #[test]
218 fn resumption_without_commas_or_colons() {
219 use BracketType::*;
220 use Structural::*;
221
222 let simd = simd::configure();
223 config_simd!(simd => |simd| {
224 let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
225 let json_string = json.to_owned();
226 let input = BorrowedBytes::new(json_string.as_bytes());
227 let iter = input.iter_blocks(&EmptyRecorder);
228 let quotes = simd.classify_quoted_sequences(iter);
229 let offset = input.leading_padding_len();
230
231 let mut classifier = simd.classify_structural_characters(quotes);
232
233 assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
234 assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
235
236 let resume_state = classifier.stop();
237
238 let mut resumed_classifier = simd.resume_structural_classification(resume_state);
239
240 assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
241 assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
242 });
243 }
244
245 #[test]
246 fn resumption_with_commas_but_no_colons() {
247 use BracketType::*;
248 use Structural::*;
249
250 let simd = simd::configure();
251 config_simd!(simd => |simd| {
252 let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
253 let json_string = json.to_owned();
254 let input = BorrowedBytes::new(json_string.as_bytes());
255 let iter = input.iter_blocks(&EmptyRecorder);
256 let quotes = simd.classify_quoted_sequences(iter);
257 let offset = input.leading_padding_len();
258
259 let mut classifier = simd.classify_structural_characters(quotes);
260 classifier.turn_commas_on(0);
261
262 assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
263 assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
264 assert_eq!(Some(Comma(9 + offset)), classifier.next().unwrap());
265 assert_eq!(Some(Comma(13 + offset)), classifier.next().unwrap());
266
267 let resume_state = classifier.stop();
268
269 let mut resumed_classifier = simd.resume_structural_classification(resume_state);
270
271 assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
272 assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
273 assert_eq!(Some(Comma(30 + offset)), resumed_classifier.next().unwrap());
274 });
275 }
276
277 #[test]
278 fn resumption_with_colons_but_no_commas() {
279 use BracketType::*;
280 use Structural::*;
281
282 let simd = simd::configure();
283 config_simd!(simd => |simd| {
284 let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
285 let json_string = json.to_owned();
286 let input = BorrowedBytes::new(json_string.as_bytes());
287 let iter = input.iter_blocks(&EmptyRecorder);
288 let quotes = simd.classify_quoted_sequences(iter);
289 let offset = input.leading_padding_len();
290
291 let mut classifier = simd.classify_structural_characters(quotes);
292 classifier.turn_colons_on(0);
293
294 assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
295 assert_eq!(Some(Colon(4 + offset)), classifier.next().unwrap());
296 assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
297
298 let resume_state = classifier.stop();
299
300 let mut resumed_classifier = simd.resume_structural_classification(resume_state);
301
302 assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
303 assert_eq!(Some(Colon(20 + offset)), resumed_classifier.next().unwrap());
304 assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
305 assert_eq!(Some(Colon(27 + offset)), resumed_classifier.next().unwrap());
306 });
307 }
308
309 #[test]
310 fn resumption_with_commas_and_colons() {
311 use BracketType::*;
312 use Structural::*;
313
314 let simd = simd::configure();
315 config_simd!(simd => |simd| {
316 let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
317 let json_string = json.to_owned();
318 let input = BorrowedBytes::new(json_string.as_bytes());
319 let iter = input.iter_blocks(&EmptyRecorder);
320 let quotes = simd.classify_quoted_sequences(iter);
321 let offset = input.leading_padding_len();
322
323 let mut classifier = simd.classify_structural_characters(quotes);
324 classifier.turn_commas_on(0);
325 classifier.turn_colons_on(0);
326
327 assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
328 assert_eq!(Some(Colon(4 + offset)), classifier.next().unwrap());
329 assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
330 assert_eq!(Some(Comma(9 + offset)), classifier.next().unwrap());
331 assert_eq!(Some(Comma(13 + offset)), classifier.next().unwrap());
332
333 let resume_state = classifier.stop();
334
335 let mut resumed_classifier = simd.resume_structural_classification(resume_state);
336
337 assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
338 assert_eq!(Some(Colon(20 + offset)), resumed_classifier.next().unwrap());
339 assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
340 assert_eq!(Some(Colon(27 + offset)), resumed_classifier.next().unwrap());
341 assert_eq!(Some(Comma(30 + offset)), resumed_classifier.next().unwrap());
342 });
343 }
344
345 #[test]
346 fn resumption_at_block_boundary() {
347 use BracketType::*;
348 use Structural::*;
349
350 let simd = simd::configure();
351 config_simd!(simd => |simd| {
352 let mut json_string = "{".to_owned();
353 json_string += &" ".repeat(128);
354 json_string += "}";
355 let input = BorrowedBytes::new(json_string.as_bytes());
356 let iter = input.iter_blocks(&EmptyRecorder);
357 let quotes = simd.classify_quoted_sequences(iter);
358 let offset = input.leading_padding_len();
359
360 let mut classifier = simd.classify_structural_characters(quotes);
361
362 assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
363
364 let resume_state = classifier.stop();
365 let mut resumed_classifier = simd.resume_structural_classification(resume_state);
366
367 assert_eq!(Some(Closing(Curly, 129 + offset)), resumed_classifier.next().unwrap());
368 });
369 }
370}