1mod arena_jsonski;
15mod jsonski;
16mod langdale;
17mod scalar;
18
19use rayon::prelude::*;
20
21pub use arena_jsonski::ArenaJsonSkiSkip;
22pub use jsonski::JsonSkiSkip;
23pub use langdale::LangdaleSkip;
24pub use scalar::ScalarSkip;
25
26#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
28pub use crate::x86::skip::Avx2Skip;
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub struct SkipResult {
33 pub consumed: usize,
35 pub has_escapes: bool,
37}
38
39pub trait Skip {
41 fn skip_object(&self, input: &[u8]) -> Option<SkipResult>;
43
44 fn skip_array(&self, input: &[u8]) -> Option<SkipResult>;
46
47 fn skip_string(&self, input: &[u8]) -> Option<SkipResult>;
49
50 fn skip_value(&self, input: &[u8]) -> Option<SkipResult>;
52}
53
54#[derive(Debug, Clone, Copy, Default)]
56pub enum SkipStrategy {
57 Scalar,
59 Langdale,
61 #[default]
63 JsonSki,
64 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
66 Avx2,
67}
68
69impl std::fmt::Display for SkipStrategy {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 let name = match self {
72 Self::Scalar => "Scalar",
73 Self::Langdale => "Langdale",
74 Self::JsonSki => "JsonSki",
75 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
76 Self::Avx2 => "Avx2",
77 };
78 write!(f, "{name}")
79 }
80}
81
82impl SkipStrategy {
83 #[must_use]
85 pub fn skipper(&self) -> Box<dyn Skip + Send + Sync> {
86 match self {
87 Self::Scalar => Box::new(ScalarSkip),
88 Self::Langdale => Box::new(LangdaleSkip::new()),
89 Self::JsonSki => Box::new(JsonSkiSkip::new()),
90 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
91 Self::Avx2 => Box::new(Avx2Skip::new()),
92 }
93 }
94
95 #[must_use]
97 pub fn best_simd() -> Self {
98 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
99 {
100 if Avx2Skip::is_available() {
101 return Self::Avx2;
102 }
103 }
104 Self::JsonSki
105 }
106
107 #[must_use]
109 pub fn all_strategies() -> Vec<Self> {
110 let mut strategies = vec![Self::Scalar, Self::Langdale, Self::JsonSki];
111 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
112 {
113 if Avx2Skip::is_available() {
114 strategies.push(Self::Avx2);
115 }
116 }
117 strategies
118 }
119}
120
121#[must_use]
143pub fn skip_objects_parallel(documents: &[&[u8]]) -> Vec<Option<SkipResult>> {
144 documents
145 .par_iter()
146 .map(|doc| {
147 let skipper = SkipStrategy::best_simd().skipper();
148 skipper.skip_object(doc)
149 })
150 .collect()
151}
152
153#[must_use]
155pub fn skip_arrays_parallel(documents: &[&[u8]]) -> Vec<Option<SkipResult>> {
156 documents
157 .par_iter()
158 .map(|doc| {
159 let skipper = SkipStrategy::best_simd().skipper();
160 skipper.skip_array(doc)
161 })
162 .collect()
163}
164
165#[must_use]
168pub fn skip_values_parallel(documents: &[&[u8]]) -> Vec<Option<SkipResult>> {
169 documents
170 .par_iter()
171 .map(|doc| {
172 let skipper = SkipStrategy::best_simd().skipper();
173 skipper.skip_value(doc)
174 })
175 .collect()
176}
177
178#[derive(Debug, Clone, Copy)]
183pub struct ParallelSkipper {
184 strategy: SkipStrategy,
185}
186
187impl std::fmt::Display for ParallelSkipper {
188 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
189 write!(f, "ParallelSkipper(strategy={})", self.strategy)
190 }
191}
192
193impl ParallelSkipper {
194 #[must_use]
196 pub const fn new(strategy: SkipStrategy) -> Self {
197 Self { strategy }
198 }
199
200 #[must_use]
202 pub fn best_simd() -> Self {
203 Self::new(SkipStrategy::best_simd())
204 }
205
206 #[must_use]
208 pub fn skip_objects(&self, documents: &[&[u8]]) -> Vec<Option<SkipResult>> {
209 let strategy = self.strategy;
210 documents
211 .par_iter()
212 .map(|doc| {
213 let skipper = strategy.skipper();
214 skipper.skip_object(doc)
215 })
216 .collect()
217 }
218
219 #[must_use]
221 pub fn skip_arrays(&self, documents: &[&[u8]]) -> Vec<Option<SkipResult>> {
222 let strategy = self.strategy;
223 documents
224 .par_iter()
225 .map(|doc| {
226 let skipper = strategy.skipper();
227 skipper.skip_array(doc)
228 })
229 .collect()
230 }
231
232 #[must_use]
234 pub fn skip_values(&self, documents: &[&[u8]]) -> Vec<Option<SkipResult>> {
235 let strategy = self.strategy;
236 documents
237 .par_iter()
238 .map(|doc| {
239 let skipper = strategy.skipper();
240 skipper.skip_value(doc)
241 })
242 .collect()
243 }
244
245 #[must_use]
250 pub fn benchmark(
251 &self,
252 documents: &[&[u8]],
253 iterations: usize,
254 ) -> (usize, std::time::Duration) {
255 let total_bytes: usize = documents.iter().map(|d| d.len()).sum();
256 let start = std::time::Instant::now();
257
258 for _ in 0..iterations {
259 let _ = self.skip_objects(documents);
260 }
261
262 (total_bytes.saturating_mul(iterations), start.elapsed())
263 }
264}
265
266#[cfg(test)]
267mod tests {
268 use super::*;
269
270 #[test]
275 fn test_skip_result_debug() {
276 let result = SkipResult {
277 consumed: 10,
278 has_escapes: true,
279 };
280 let debug_str = format!("{result:?}");
281 assert!(debug_str.contains("consumed"));
282 assert!(debug_str.contains("10"));
283 assert!(debug_str.contains("has_escapes"));
284 }
285
286 #[test]
287 fn test_skip_result_clone() {
288 let result = SkipResult {
289 consumed: 42,
290 has_escapes: false,
291 };
292 let cloned = result;
293 assert_eq!(result, cloned);
294 }
295
296 #[test]
297 fn test_skip_result_copy() {
298 let result = SkipResult {
299 consumed: 100,
300 has_escapes: true,
301 };
302 let copied = result;
303 assert_eq!(result.consumed, copied.consumed);
304 assert_eq!(result.has_escapes, copied.has_escapes);
305 }
306
307 #[test]
308 fn test_skip_result_equality() {
309 let a = SkipResult {
310 consumed: 5,
311 has_escapes: false,
312 };
313 let b = SkipResult {
314 consumed: 5,
315 has_escapes: false,
316 };
317 let c = SkipResult {
318 consumed: 5,
319 has_escapes: true,
320 };
321 let d = SkipResult {
322 consumed: 6,
323 has_escapes: false,
324 };
325
326 assert_eq!(a, b);
327 assert_ne!(a, c);
328 assert_ne!(a, d);
329 }
330
331 #[test]
336 fn test_skip_strategy_default() {
337 let strategy = SkipStrategy::default();
338 matches!(strategy, SkipStrategy::JsonSki);
339 }
340
341 #[test]
342 fn test_skip_strategy_debug() {
343 let strategy = SkipStrategy::Scalar;
344 let debug_str = format!("{strategy}");
345 assert!(debug_str.contains("Scalar"));
346 }
347
348 #[test]
349 fn test_skip_strategy_clone() {
350 let strategy = SkipStrategy::Langdale;
351 let cloned = strategy;
352 matches!(cloned, SkipStrategy::Langdale);
353 }
354
355 #[test]
356 fn test_skip_strategy_skipper_scalar() {
357 let strategy = SkipStrategy::Scalar;
358 let skipper = strategy.skipper();
359 let result = skipper.skip_object(b"}");
360 assert!(result.is_some());
361 }
362
363 #[test]
364 fn test_skip_strategy_skipper_langdale() {
365 let strategy = SkipStrategy::Langdale;
366 let skipper = strategy.skipper();
367 let result = skipper.skip_object(b"}");
368 assert!(result.is_some());
369 }
370
371 #[test]
372 fn test_skip_strategy_skipper_jsonski() {
373 let strategy = SkipStrategy::JsonSki;
374 let skipper = strategy.skipper();
375 let result = skipper.skip_object(b"}");
376 assert!(result.is_some());
377 }
378
379 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
380 #[test]
381 fn test_skip_strategy_skipper_avx2() {
382 let strategy = SkipStrategy::Avx2;
383 let skipper = strategy.skipper();
384 let result = skipper.skip_object(b"}");
385 assert!(result.is_some());
386 }
387
388 #[test]
389 fn test_skip_strategy_best_simd() {
390 let strategy = SkipStrategy::best_simd();
391 let skipper = strategy.skipper();
393 let result = skipper.skip_object(b"}");
394 assert!(result.is_some());
395 }
396
397 #[test]
398 fn test_skip_strategy_all_strategies() {
399 let strategies = SkipStrategy::all_strategies();
400 assert!(strategies.len() >= 3);
402 assert!(strategies.iter().any(|s| matches!(s, SkipStrategy::Scalar)));
403 assert!(
404 strategies
405 .iter()
406 .any(|s| matches!(s, SkipStrategy::Langdale))
407 );
408 assert!(
409 strategies
410 .iter()
411 .any(|s| matches!(s, SkipStrategy::JsonSki))
412 );
413 }
414
415 #[test]
420 fn test_skip_objects_parallel_empty() {
421 let documents: Vec<&[u8]> = vec![];
422 let results = skip_objects_parallel(&documents);
423 assert!(results.is_empty());
424 }
425
426 #[test]
427 fn test_skip_objects_parallel_single() {
428 let documents: Vec<&[u8]> = vec![b"}"];
429 let results = skip_objects_parallel(&documents);
430 assert_eq!(results.len(), 1);
431 assert!(results[0].is_some());
432 }
433
434 #[test]
435 fn test_skip_objects_parallel_multiple() {
436 let documents: Vec<&[u8]> = vec![
437 br#""key": "value"}"#,
438 br#""name": "test"}"#,
439 br#""a": 1, "b": 2}"#,
440 ];
441 let results = skip_objects_parallel(&documents);
442 assert_eq!(results.len(), 3);
443 for result in &results {
444 assert!(result.is_some());
445 }
446 }
447
448 #[test]
449 fn test_skip_arrays_parallel_empty() {
450 let documents: Vec<&[u8]> = vec![];
451 let results = skip_arrays_parallel(&documents);
452 assert!(results.is_empty());
453 }
454
455 #[test]
456 fn test_skip_arrays_parallel_single() {
457 let documents: Vec<&[u8]> = vec![b"]"];
458 let results = skip_arrays_parallel(&documents);
459 assert_eq!(results.len(), 1);
460 assert!(results[0].is_some());
461 }
462
463 #[test]
464 fn test_skip_arrays_parallel_multiple() {
465 let documents: Vec<&[u8]> = vec![b"1, 2, 3]", b"\"a\", \"b\"]", b"]"];
466 let results = skip_arrays_parallel(&documents);
467 assert_eq!(results.len(), 3);
468 for result in &results {
469 assert!(result.is_some());
470 }
471 }
472
473 #[test]
474 fn test_skip_values_parallel_empty() {
475 let documents: Vec<&[u8]> = vec![];
476 let results = skip_values_parallel(&documents);
477 assert!(results.is_empty());
478 }
479
480 #[test]
481 fn test_skip_values_parallel_mixed() {
482 let documents: Vec<&[u8]> = vec![
483 b"{}", b"[]", b"\"str\"", b"123", b"true", b"null", ];
490 let results = skip_values_parallel(&documents);
491 assert_eq!(results.len(), 6);
492 }
493
494 #[test]
499 fn test_parallel_skipper_new() {
500 let skipper = ParallelSkipper::new(SkipStrategy::Scalar);
501 let docs: Vec<&[u8]> = vec![b"}"];
502 let results = skipper.skip_objects(&docs);
503 assert_eq!(results.len(), 1);
504 }
505
506 #[test]
507 fn test_parallel_skipper_best_simd() {
508 let skipper = ParallelSkipper::best_simd();
509 let docs: Vec<&[u8]> = vec![b"}"];
510 let results = skipper.skip_objects(&docs);
511 assert_eq!(results.len(), 1);
512 }
513
514 #[test]
515 fn test_parallel_skipper_debug() {
516 let skipper = ParallelSkipper::new(SkipStrategy::JsonSki);
517 let debug_str = format!("{skipper}");
518 assert!(debug_str.contains("ParallelSkipper"));
519 }
520
521 #[test]
522 fn test_parallel_skipper_clone() {
523 let skipper = ParallelSkipper::new(SkipStrategy::Langdale);
524 let cloned = skipper;
525 let docs: Vec<&[u8]> = vec![b"}"];
526 let results1 = skipper.skip_objects(&docs);
527 let results2 = cloned.skip_objects(&docs);
528 assert_eq!(results1.len(), results2.len());
529 }
530
531 #[test]
532 fn test_parallel_skipper_skip_objects() {
533 let skipper = ParallelSkipper::new(SkipStrategy::JsonSki);
534 let docs: Vec<&[u8]> = vec![br#""a": 1}"#, br#""b": 2}"#];
535 let results = skipper.skip_objects(&docs);
536 assert_eq!(results.len(), 2);
537 assert!(results.iter().all(std::option::Option::is_some));
538 }
539
540 #[test]
541 fn test_parallel_skipper_skip_arrays() {
542 let skipper = ParallelSkipper::new(SkipStrategy::JsonSki);
543 let docs: Vec<&[u8]> = vec![b"1, 2]", b"3, 4]"];
544 let results = skipper.skip_arrays(&docs);
545 assert_eq!(results.len(), 2);
546 assert!(results.iter().all(std::option::Option::is_some));
547 }
548
549 #[test]
550 fn test_parallel_skipper_skip_values() {
551 let skipper = ParallelSkipper::new(SkipStrategy::JsonSki);
552 let docs: Vec<&[u8]> = vec![b"{}", b"[]", b"42"];
553 let results = skipper.skip_values(&docs);
554 assert_eq!(results.len(), 3);
555 }
556
557 #[test]
558 fn test_parallel_skipper_benchmark() {
559 let skipper = ParallelSkipper::new(SkipStrategy::Scalar);
560 let docs: Vec<&[u8]> = vec![br#""key": "value"}"#];
561 let (bytes, duration) = skipper.benchmark(&docs, 1);
562 assert!(bytes > 0);
564 let _ = duration;
566 }
567
568 #[test]
573 fn test_all_strategies_produce_same_results() {
574 let test_cases: Vec<&[u8]> = vec![
575 b"}", br#""key": "value"}"#, br#""a": {"b": 1}}"#, b"]", b"1, 2, 3]", ];
581
582 let strategies = SkipStrategy::all_strategies();
583
584 for input in &test_cases {
585 let results: Vec<Option<SkipResult>> = strategies
586 .iter()
587 .map(|s| s.skipper().skip_object(input))
588 .collect();
589
590 let first = results[0].as_ref();
592 for (i, result) in results.iter().enumerate().skip(1) {
593 assert_eq!(
594 first.map(|r| r.consumed),
595 result.as_ref().map(|r| r.consumed),
596 "Strategy {i} differs from first for input {input:?}"
597 );
598 }
599 }
600 }
601}