1use crate::{
15 classification::{quotes::QuoteClassifiedIterator, ResumeClassifierState},
16 input::{error::InputError, InputBlockIterator},
17 FallibleIterator, MaskType, BLOCK_SIZE,
18};
19
20#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
22#[repr(u8)]
23pub enum BracketType {
24 Square,
26 Curly,
28}
29
30#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
32pub enum Structural {
33 Closing(BracketType, usize),
35 Colon(usize),
37 Opening(BracketType, usize),
39 Comma(usize),
41}
42use Structural::*;
43
44impl Structural {
45 #[inline(always)]
48 #[must_use]
49 pub fn idx(self) -> usize {
50 match self {
51 Closing(_, idx) | Colon(idx) | Opening(_, idx) | Comma(idx) => idx,
52 }
53 }
54
55 #[inline(always)]
68 #[must_use]
69 pub fn offset(self, amount: usize) -> Self {
70 match self {
71 Closing(b, idx) => Closing(b, idx + amount),
72 Colon(idx) => Colon(idx + amount),
73 Opening(b, idx) => Opening(b, idx + amount),
74 Comma(idx) => Comma(idx + amount),
75 }
76 }
77
78 #[inline(always)]
94 #[must_use]
95 pub fn is_closing(&self) -> bool {
96 matches!(self, Closing(_, _))
97 }
98
99 #[inline(always)]
115 #[must_use]
116 pub fn is_opening(&self) -> bool {
117 matches!(self, Opening(_, _))
118 }
119}
120
121pub trait StructuralIterator<'i, I, Q, M, const N: usize>:
124 FallibleIterator<Item = Structural, Error = InputError>
125where
126 I: InputBlockIterator<'i, N>,
127{
128 fn stop(self) -> ResumeClassifierState<'i, I, Q, M, N>;
131
132 fn resume(state: ResumeClassifierState<'i, I, Q, M, N>) -> Self;
134
135 fn turn_colons_off(&mut self);
137
138 fn turn_colons_on(&mut self, idx: usize);
145
146 fn turn_commas_off(&mut self);
148
149 fn turn_commas_on(&mut self, idx: usize);
156
157 fn turn_colons_and_commas_on(&mut self, idx: usize);
163
164 fn turn_colons_and_commas_off(&mut self);
170}
171
172pub(crate) mod nosimd;
173pub(crate) mod shared;
174
175#[cfg(target_arch = "x86")]
176pub(crate) mod avx2_32;
177#[cfg(target_arch = "x86_64")]
178pub(crate) mod avx2_64;
179#[cfg(target_arch = "x86")]
180pub(crate) mod ssse3_32;
181#[cfg(target_arch = "x86_64")]
182pub(crate) mod ssse3_64;
183
184pub(crate) trait StructuralImpl {
185 type Classifier<'i, I, Q>: StructuralIterator<'i, I, Q, MaskType, BLOCK_SIZE>
186 where
187 I: InputBlockIterator<'i, BLOCK_SIZE>,
188 Q: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE>;
189
190 fn new<'i, I, Q>(iter: Q) -> Self::Classifier<'i, I, Q>
191 where
192 I: InputBlockIterator<'i, BLOCK_SIZE>,
193 Q: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE>;
194
195 fn resume<'i, I, Q>(state: ResumeClassifierState<'i, I, Q, MaskType, BLOCK_SIZE>) -> Self::Classifier<'i, I, Q>
196 where
197 I: InputBlockIterator<'i, BLOCK_SIZE>,
198 Q: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE>,
199 {
200 <Self::Classifier<'i, I, Q> as StructuralIterator<'i, I, Q, MaskType, BLOCK_SIZE>>::resume(state)
201 }
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207 use crate::{
208 classification::simd::{self, config_simd, Simd},
209 input::{BorrowedBytes, Input},
210 result::empty::EmptyRecorder,
211 };
212
213 #[test]
214 fn resumption_without_commas_or_colons() {
215 use BracketType::*;
216 use Structural::*;
217
218 let simd = simd::configure();
219 config_simd!(simd => |simd| {
220 let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
221 let json_string = json.to_owned();
222 let input = BorrowedBytes::new(json_string.as_bytes());
223 let iter = input.iter_blocks(&EmptyRecorder);
224 let quotes = simd.classify_quoted_sequences(iter);
225 let offset = input.leading_padding_len();
226
227 let mut classifier = simd.classify_structural_characters(quotes);
228
229 assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
230 assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
231
232 let resume_state = classifier.stop();
233
234 let mut resumed_classifier = simd.resume_structural_classification(resume_state);
235
236 assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
237 assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
238 });
239 }
240
241 #[test]
242 fn resumption_with_commas_but_no_colons() {
243 use BracketType::*;
244 use Structural::*;
245
246 let simd = simd::configure();
247 config_simd!(simd => |simd| {
248 let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
249 let json_string = json.to_owned();
250 let input = BorrowedBytes::new(json_string.as_bytes());
251 let iter = input.iter_blocks(&EmptyRecorder);
252 let quotes = simd.classify_quoted_sequences(iter);
253 let offset = input.leading_padding_len();
254
255 let mut classifier = simd.classify_structural_characters(quotes);
256 classifier.turn_commas_on(0);
257
258 assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
259 assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
260 assert_eq!(Some(Comma(9 + offset)), classifier.next().unwrap());
261 assert_eq!(Some(Comma(13 + offset)), classifier.next().unwrap());
262
263 let resume_state = classifier.stop();
264
265 let mut resumed_classifier = simd.resume_structural_classification(resume_state);
266
267 assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
268 assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
269 assert_eq!(Some(Comma(30 + offset)), resumed_classifier.next().unwrap());
270 });
271 }
272
273 #[test]
274 fn resumption_with_colons_but_no_commas() {
275 use BracketType::*;
276 use Structural::*;
277
278 let simd = simd::configure();
279 config_simd!(simd => |simd| {
280 let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
281 let json_string = json.to_owned();
282 let input = BorrowedBytes::new(json_string.as_bytes());
283 let iter = input.iter_blocks(&EmptyRecorder);
284 let quotes = simd.classify_quoted_sequences(iter);
285 let offset = input.leading_padding_len();
286
287 let mut classifier = simd.classify_structural_characters(quotes);
288 classifier.turn_colons_on(0);
289
290 assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
291 assert_eq!(Some(Colon(4 + offset)), classifier.next().unwrap());
292 assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
293
294 let resume_state = classifier.stop();
295
296 let mut resumed_classifier = simd.resume_structural_classification(resume_state);
297
298 assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
299 assert_eq!(Some(Colon(20 + offset)), resumed_classifier.next().unwrap());
300 assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
301 assert_eq!(Some(Colon(27 + offset)), resumed_classifier.next().unwrap());
302 });
303 }
304
305 #[test]
306 fn resumption_with_commas_and_colons() {
307 use BracketType::*;
308 use Structural::*;
309
310 let simd = simd::configure();
311 config_simd!(simd => |simd| {
312 let json = r#"{"a": [42, 36, { "b": { "c": 1, "d": 2 } }]}"#;
313 let json_string = json.to_owned();
314 let input = BorrowedBytes::new(json_string.as_bytes());
315 let iter = input.iter_blocks(&EmptyRecorder);
316 let quotes = simd.classify_quoted_sequences(iter);
317 let offset = input.leading_padding_len();
318
319 let mut classifier = simd.classify_structural_characters(quotes);
320 classifier.turn_commas_on(0);
321 classifier.turn_colons_on(0);
322
323 assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
324 assert_eq!(Some(Colon(4 + offset)), classifier.next().unwrap());
325 assert_eq!(Some(Opening(Square, 6 + offset)), classifier.next().unwrap());
326 assert_eq!(Some(Comma(9 + offset)), classifier.next().unwrap());
327 assert_eq!(Some(Comma(13 + offset)), classifier.next().unwrap());
328
329 let resume_state = classifier.stop();
330
331 let mut resumed_classifier = simd.resume_structural_classification(resume_state);
332
333 assert_eq!(Some(Opening(Curly, 15 + offset)), resumed_classifier.next().unwrap());
334 assert_eq!(Some(Colon(20 + offset)), resumed_classifier.next().unwrap());
335 assert_eq!(Some(Opening(Curly, 22 + offset)), resumed_classifier.next().unwrap());
336 assert_eq!(Some(Colon(27 + offset)), resumed_classifier.next().unwrap());
337 assert_eq!(Some(Comma(30 + offset)), resumed_classifier.next().unwrap());
338 });
339 }
340
341 #[test]
342 fn resumption_at_block_boundary() {
343 use BracketType::*;
344 use Structural::*;
345
346 let simd = simd::configure();
347 config_simd!(simd => |simd| {
348 let mut json_string = "{".to_owned();
349 json_string += &" ".repeat(128);
350 json_string += "}";
351 let input = BorrowedBytes::new(json_string.as_bytes());
352 let iter = input.iter_blocks(&EmptyRecorder);
353 let quotes = simd.classify_quoted_sequences(iter);
354 let offset = input.leading_padding_len();
355
356 let mut classifier = simd.classify_structural_characters(quotes);
357
358 assert_eq!(Some(Opening(Curly, offset)), classifier.next().unwrap());
359
360 let resume_state = classifier.stop();
361 let mut resumed_classifier = simd.resume_structural_classification(resume_state);
362
363 assert_eq!(Some(Closing(Curly, 129 + offset)), resumed_classifier.next().unwrap());
364 });
365 }
366}