Skip to main content

pjson_rs/parser/
simd_zero_copy.rs

1//! SIMD-accelerated zero-copy parser using sonic-rs
2//!
3//! This module combines the benefits of SIMD acceleration from sonic-rs
4//! with zero-copy parsing techniques to achieve maximum performance.
5
6use crate::{
7    domain::{DomainError, DomainResult},
8    parser::{
9        ValueType,
10        buffer_pool::{BufferPool, BufferSize, PooledBuffer},
11        zero_copy::{LazyJsonValue, LazyParser, MemoryUsage},
12    },
13};
14use std::{marker::PhantomData, sync::Arc};
15
16/// SIMD-accelerated zero-copy parser
17pub struct SimdZeroCopyParser<'a> {
18    buffer_pool: Arc<BufferPool>,
19    current_buffer: Option<PooledBuffer>,
20    input: &'a [u8],
21    position: usize,
22    depth: usize,
23    simd_enabled: bool,
24    _phantom: PhantomData<&'a ()>,
25}
26
27/// Configuration for SIMD zero-copy parser
28#[derive(Debug, Clone)]
29pub struct SimdZeroCopyConfig {
30    /// Maximum nesting depth for safety
31    pub max_depth: usize,
32    /// Enable SIMD acceleration when available
33    pub enable_simd: bool,
34    /// Buffer pool configuration
35    pub buffer_pool_config: Option<crate::parser::buffer_pool::PoolConfig>,
36    /// Minimum size for SIMD processing
37    pub simd_threshold: usize,
38    /// Enable memory usage tracking
39    pub track_memory_usage: bool,
40}
41
42/// Parse result containing both the value and memory statistics
43#[derive(Debug)]
44pub struct SimdParseResult<'a> {
45    pub value: LazyJsonValue<'a>,
46    pub memory_usage: MemoryUsage,
47    pub simd_used: bool,
48    pub processing_time_ns: u64,
49}
50
51/// Statistics about SIMD parsing performance
52#[derive(Debug, Clone)]
53pub struct SimdParsingStats {
54    pub total_parses: u64,
55    pub simd_accelerated_parses: u64,
56    pub total_bytes_processed: u64,
57    pub average_processing_time_ns: u64,
58    pub simd_efficiency: f64,
59}
60
61impl<'a> Default for SimdZeroCopyParser<'a> {
62    fn default() -> Self {
63        Self::new()
64    }
65}
66
67impl<'a> SimdZeroCopyParser<'a> {
68    /// Create new SIMD zero-copy parser with default configuration
69    pub fn new() -> Self {
70        Self::with_config(SimdZeroCopyConfig::default())
71    }
72
73    /// Create parser with custom configuration
74    pub fn with_config(config: SimdZeroCopyConfig) -> Self {
75        let buffer_pool = if let Some(pool_config) = config.buffer_pool_config {
76            Arc::new(BufferPool::with_config(pool_config))
77        } else {
78            Arc::new(BufferPool::new())
79        };
80
81        Self {
82            buffer_pool,
83            current_buffer: None,
84            input: &[],
85            position: 0,
86            depth: 0,
87            simd_enabled: config.enable_simd && Self::is_simd_available(),
88            _phantom: PhantomData,
89        }
90    }
91
92    /// Parse JSON with SIMD acceleration and zero-copy optimization
93    pub fn parse_simd(&mut self, input: &'a [u8]) -> DomainResult<SimdParseResult<'a>> {
94        let start_time = std::time::Instant::now();
95
96        self.input = input;
97        self.position = 0;
98        self.depth = 0;
99
100        // Determine if we should use SIMD based on input size
101        let use_simd = self.simd_enabled && input.len() >= 256; // Threshold for SIMD benefit
102
103        let value = if use_simd {
104            self.parse_with_simd(input)?
105        } else {
106            self.parse_without_simd(input)?
107        };
108
109        let processing_time = start_time.elapsed().as_nanos() as u64;
110        let memory_usage = value.memory_usage();
111
112        Ok(SimdParseResult {
113            value,
114            memory_usage,
115            simd_used: use_simd,
116            processing_time_ns: processing_time,
117        })
118    }
119
120    /// Parse using sonic-rs SIMD acceleration
121    fn parse_with_simd(&mut self, input: &'a [u8]) -> DomainResult<LazyJsonValue<'a>> {
122        // First, use sonic-rs to validate and get structural information
123        let sonic_result = self.sonic_preprocess(input)?;
124
125        // Then do zero-copy extraction based on sonic's findings
126        match sonic_result.value_type {
127            ValueType::Object => self.parse_simd_object(input, &sonic_result),
128            ValueType::Array => self.parse_simd_array(input, &sonic_result),
129            ValueType::String => self.parse_simd_string(input, &sonic_result),
130            ValueType::Number => self.parse_simd_number(input, &sonic_result),
131            ValueType::Boolean => self.parse_simd_boolean(input, &sonic_result),
132            ValueType::Null => Ok(LazyJsonValue::Null),
133        }
134    }
135
136    /// Parse without SIMD acceleration (fallback to pure zero-copy)
137    fn parse_without_simd(&mut self, input: &'a [u8]) -> DomainResult<LazyJsonValue<'a>> {
138        // Use the zero-copy parser directly
139        let mut zero_copy_parser = crate::parser::zero_copy::ZeroCopyParser::new();
140        zero_copy_parser.parse_lazy(input)
141    }
142
143    /// Use sonic-rs for structural analysis
144    fn sonic_preprocess(&self, input: &[u8]) -> DomainResult<SonicStructuralInfo> {
145        // This is a simplified version - actual implementation would use sonic-rs
146        // to get structural information about the JSON
147
148        if input.is_empty() {
149            return Err(DomainError::InvalidInput("Empty input".to_string()));
150        }
151
152        // Detect value type from first non-whitespace character
153        let mut pos = 0;
154        while pos < input.len() && input[pos].is_ascii_whitespace() {
155            pos += 1;
156        }
157
158        if pos >= input.len() {
159            return Err(DomainError::InvalidInput("Only whitespace".to_string()));
160        }
161
162        let value_type = match input[pos] {
163            b'{' => ValueType::Object,
164            b'[' => ValueType::Array,
165            b'"' => ValueType::String,
166            b't' | b'f' => ValueType::Boolean,
167            b'n' => ValueType::Null,
168            b'-' | b'0'..=b'9' => ValueType::Number,
169            _ => {
170                let ch = input[pos] as char;
171                return Err(DomainError::InvalidInput(format!(
172                    "Invalid JSON start character: {ch}"
173                )));
174            }
175        };
176
177        Ok(SonicStructuralInfo {
178            value_type,
179            start_pos: pos,
180            has_escapes: self.detect_escapes(input),
181            is_simd_friendly: self.is_simd_friendly(input),
182        })
183    }
184
185    /// Parse object with SIMD acceleration
186    fn parse_simd_object(
187        &mut self,
188        input: &'a [u8],
189        info: &SonicStructuralInfo,
190    ) -> DomainResult<LazyJsonValue<'a>> {
191        // For objects, we still return a slice but use SIMD for validation
192        if info.is_simd_friendly {
193            // Use SIMD for fast validation of structure
194            self.simd_validate_object_structure(input)?;
195        }
196
197        // Return zero-copy slice
198        Ok(LazyJsonValue::ObjectSlice(input))
199    }
200
201    /// Parse array with SIMD acceleration
202    fn parse_simd_array(
203        &mut self,
204        input: &'a [u8],
205        info: &SonicStructuralInfo,
206    ) -> DomainResult<LazyJsonValue<'a>> {
207        if info.is_simd_friendly {
208            // Use SIMD for fast validation of array structure
209            self.simd_validate_array_structure(input)?;
210        }
211
212        Ok(LazyJsonValue::ArraySlice(input))
213    }
214
215    /// Parse string with SIMD acceleration
216    fn parse_simd_string(
217        &mut self,
218        input: &'a [u8],
219        info: &SonicStructuralInfo,
220    ) -> DomainResult<LazyJsonValue<'a>> {
221        if !info.has_escapes {
222            // No escapes - pure zero copy
223            let start = info.start_pos + 1; // Skip opening quote
224            let end = input.len() - 1; // Skip closing quote
225            Ok(LazyJsonValue::StringBorrowed(&input[start..end]))
226        } else {
227            // Has escapes - need to process with SIMD-accelerated unescaping
228            let unescaped = self.simd_unescape_string(input)?;
229            Ok(LazyJsonValue::StringOwned(unescaped))
230        }
231    }
232
233    /// Parse number with SIMD acceleration
234    fn parse_simd_number(
235        &mut self,
236        input: &'a [u8],
237        _info: &SonicStructuralInfo,
238    ) -> DomainResult<LazyJsonValue<'a>> {
239        // SIMD validation of number format
240        if self.simd_enabled {
241            self.simd_validate_number(input)?;
242        }
243
244        Ok(LazyJsonValue::NumberSlice(input))
245    }
246
247    /// Parse boolean with SIMD acceleration
248    fn parse_simd_boolean(
249        &mut self,
250        input: &'a [u8],
251        _info: &SonicStructuralInfo,
252    ) -> DomainResult<LazyJsonValue<'a>> {
253        // SIMD comparison for "true" or "false"
254        if self.simd_enabled {
255            if input == b"true" {
256                return Ok(LazyJsonValue::Boolean(true));
257            } else if input == b"false" {
258                return Ok(LazyJsonValue::Boolean(false));
259            } else {
260                return Err(DomainError::InvalidInput(
261                    "Invalid boolean value".to_string(),
262                ));
263            }
264        }
265
266        // Fallback to regular parsing
267        match input {
268            b"true" => Ok(LazyJsonValue::Boolean(true)),
269            b"false" => Ok(LazyJsonValue::Boolean(false)),
270            _ => Err(DomainError::InvalidInput(
271                "Invalid boolean value".to_string(),
272            )),
273        }
274    }
275
276    // SIMD validation methods (simplified implementations)
277
278    fn simd_validate_object_structure(&self, input: &[u8]) -> DomainResult<()> {
279        // Simplified: just check that we have matching braces
280        // Real implementation would use SIMD to validate JSON structure
281        let open_count = input.iter().filter(|&&c| c == b'{').count();
282        let close_count = input.iter().filter(|&&c| c == b'}').count();
283
284        if open_count == close_count && open_count > 0 {
285            Ok(())
286        } else {
287            Err(DomainError::InvalidInput(
288                "Unmatched braces in object".to_string(),
289            ))
290        }
291    }
292
293    fn simd_validate_array_structure(&self, input: &[u8]) -> DomainResult<()> {
294        // Simplified: just check that we have matching brackets
295        let open_count = input.iter().filter(|&&c| c == b'[').count();
296        let close_count = input.iter().filter(|&&c| c == b']').count();
297
298        if open_count == close_count && open_count > 0 {
299            Ok(())
300        } else {
301            Err(DomainError::InvalidInput(
302                "Unmatched brackets in array".to_string(),
303            ))
304        }
305    }
306
307    fn simd_validate_number(&self, input: &[u8]) -> DomainResult<()> {
308        // Simplified number validation using SIMD concepts
309        // Real implementation would use SIMD instructions for fast validation
310
311        if input.is_empty() {
312            return Err(DomainError::InvalidInput("Empty number".to_string()));
313        }
314
315        // Quick ASCII digit check that could be SIMD-accelerated
316        let is_valid = input.iter().all(|&c| {
317            c.is_ascii_digit() || c == b'.' || c == b'-' || c == b'+' || c == b'e' || c == b'E'
318        });
319
320        if is_valid {
321            Ok(())
322        } else {
323            Err(DomainError::InvalidInput(
324                "Invalid number format".to_string(),
325            ))
326        }
327    }
328
329    fn simd_unescape_string(&self, input: &[u8]) -> DomainResult<String> {
330        // Simplified SIMD-style string unescaping
331        // Real implementation would use vector instructions for processing escapes
332
333        let mut result = Vec::with_capacity(input.len());
334        let mut i = 1; // Skip opening quote
335
336        while i < input.len() - 1 {
337            // Stop before closing quote
338            if input[i] == b'\\' && i + 1 < input.len() - 1 {
339                match input[i + 1] {
340                    b'n' => result.push(b'\n'),
341                    b'r' => result.push(b'\r'),
342                    b't' => result.push(b'\t'),
343                    b'\\' => result.push(b'\\'),
344                    b'"' => result.push(b'"'),
345                    c => result.push(c),
346                }
347                i += 2;
348            } else {
349                result.push(input[i]);
350                i += 1;
351            }
352        }
353
354        String::from_utf8(result)
355            .map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))
356    }
357
358    // Utility methods
359
360    fn detect_escapes(&self, input: &[u8]) -> bool {
361        input.contains(&b'\\')
362    }
363
364    fn is_simd_friendly(&self, input: &[u8]) -> bool {
365        // Check if input is large enough and aligned for SIMD processing
366        input.len() >= 32 && (input.as_ptr() as usize).is_multiple_of(32)
367    }
368
369    fn is_simd_available() -> bool {
370        // Check if SIMD instructions are available
371        #[cfg(target_arch = "x86_64")]
372        {
373            std::arch::is_x86_feature_detected!("avx2")
374        }
375        #[cfg(not(target_arch = "x86_64"))]
376        {
377            false
378        }
379    }
380
381    /// Get buffer from pool for intermediate processing
382    pub fn get_buffer(&mut self, min_size: usize) -> DomainResult<&mut PooledBuffer> {
383        if self.current_buffer.is_none()
384            || self.current_buffer.as_ref().unwrap().capacity() < min_size
385        {
386            let size = BufferSize::for_capacity(min_size);
387            self.current_buffer = Some(self.buffer_pool.get_buffer(size)?);
388        }
389
390        Ok(self.current_buffer.as_mut().unwrap())
391    }
392
393    /// Release current buffer back to pool
394    pub fn release_buffer(&mut self) {
395        self.current_buffer = None;
396    }
397}
398
399impl<'a> LazyParser<'a> for SimdZeroCopyParser<'a> {
400    type Output = SimdParseResult<'a>;
401    type Error = DomainError;
402
403    fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error> {
404        self.parse_simd(input)
405    }
406
407    fn remaining(&self) -> &'a [u8] {
408        if self.position < self.input.len() {
409            &self.input[self.position..]
410        } else {
411            &[]
412        }
413    }
414
415    fn is_complete(&self) -> bool {
416        self.position >= self.input.len()
417    }
418
419    fn reset(&mut self) {
420        self.input = &[];
421        self.position = 0;
422        self.depth = 0;
423        self.release_buffer();
424    }
425}
426
427/// Structural information from sonic-rs preprocessing
428#[derive(Debug, Clone)]
429struct SonicStructuralInfo {
430    value_type: ValueType,
431    start_pos: usize,
432    has_escapes: bool,
433    is_simd_friendly: bool,
434}
435
436impl Default for SimdZeroCopyConfig {
437    fn default() -> Self {
438        Self {
439            max_depth: 64,
440            enable_simd: true,
441            buffer_pool_config: None,
442            simd_threshold: 256,
443            track_memory_usage: true,
444        }
445    }
446}
447
448impl SimdZeroCopyConfig {
449    /// Configuration optimized for maximum performance
450    pub fn high_performance() -> Self {
451        Self {
452            max_depth: 128,
453            enable_simd: true,
454            buffer_pool_config: Some(crate::parser::buffer_pool::PoolConfig::simd_optimized()),
455            simd_threshold: 128,       // Lower threshold for more SIMD usage
456            track_memory_usage: false, // Disable for maximum speed
457        }
458    }
459
460    /// Configuration for memory-constrained environments
461    pub fn low_memory() -> Self {
462        Self {
463            max_depth: 32,
464            enable_simd: false,
465            buffer_pool_config: Some(crate::parser::buffer_pool::PoolConfig::low_memory()),
466            simd_threshold: 1024, // Higher threshold
467            track_memory_usage: true,
468        }
469    }
470}
471
472impl Default for SimdParsingStats {
473    fn default() -> Self {
474        Self {
475            total_parses: 0,
476            simd_accelerated_parses: 0,
477            total_bytes_processed: 0,
478            average_processing_time_ns: 0,
479            simd_efficiency: 0.0,
480        }
481    }
482}
483
484impl SimdParsingStats {
485    /// Calculate SIMD usage ratio
486    pub fn simd_usage_ratio(&self) -> f64 {
487        if self.total_parses == 0 {
488            0.0
489        } else {
490            self.simd_accelerated_parses as f64 / self.total_parses as f64
491        }
492    }
493
494    /// Calculate average throughput in MB/s
495    pub fn average_throughput_mbps(&self) -> f64 {
496        if self.average_processing_time_ns == 0 {
497            0.0
498        } else {
499            let seconds = self.average_processing_time_ns as f64 / 1_000_000_000.0;
500            let mb = self.total_bytes_processed as f64 / (1024.0 * 1024.0);
501            mb / seconds
502        }
503    }
504}
505
506#[cfg(test)]
507mod tests {
508    use super::*;
509
510    #[test]
511    fn test_simd_parser_creation() {
512        let parser = SimdZeroCopyParser::new();
513        assert!(!parser.simd_enabled || SimdZeroCopyParser::is_simd_available());
514    }
515
516    #[test]
517    fn test_simple_parsing() {
518        let mut parser = SimdZeroCopyParser::new();
519        let input = br#""hello world""#;
520
521        let result = parser.parse_simd(input).unwrap();
522        match result.value {
523            LazyJsonValue::StringBorrowed(s) => {
524                assert_eq!(s, b"hello world");
525            }
526            _ => panic!("Expected string"),
527        }
528    }
529
530    #[test]
531    fn test_number_parsing() {
532        let mut parser = SimdZeroCopyParser::new();
533        let input = b"123.456";
534
535        let result = parser.parse_simd(input).unwrap();
536        match result.value {
537            LazyJsonValue::NumberSlice(n) => {
538                assert_eq!(n, b"123.456");
539            }
540            _ => panic!("Expected number"),
541        }
542    }
543
544    #[test]
545    fn test_boolean_parsing() {
546        let mut parser = SimdZeroCopyParser::new();
547
548        let result = parser.parse_simd(b"true").unwrap();
549        assert_eq!(result.value, LazyJsonValue::Boolean(true));
550
551        parser.reset();
552        let result = parser.parse_simd(b"false").unwrap();
553        assert_eq!(result.value, LazyJsonValue::Boolean(false));
554    }
555
556    #[test]
557    fn test_object_parsing() {
558        let mut parser = SimdZeroCopyParser::new();
559        let input = br#"{"key": "value", "number": 42}"#;
560
561        let result = parser.parse_simd(input).unwrap();
562        match result.value {
563            LazyJsonValue::ObjectSlice(obj) => {
564                assert_eq!(obj, input);
565            }
566            _ => panic!("Expected object"),
567        }
568    }
569
570    #[test]
571    fn test_memory_usage_tracking() {
572        let mut parser = SimdZeroCopyParser::new();
573        let input = br#""test string""#;
574
575        let result = parser.parse_simd(input).unwrap();
576        assert_eq!(result.memory_usage.allocated_bytes, 0); // Zero-copy string
577        assert!(result.memory_usage.referenced_bytes > 0);
578    }
579
580    #[test]
581    fn test_buffer_pool_integration() {
582        let mut parser = SimdZeroCopyParser::new();
583        let buffer = parser.get_buffer(1024).unwrap();
584        assert!(buffer.capacity() >= 1024);
585
586        parser.release_buffer();
587        assert!(parser.current_buffer.is_none());
588    }
589}