pjson_rs/parser/
simd_zero_copy.rs

1//! SIMD-accelerated zero-copy parser using sonic-rs
2//!
3//! This module combines the benefits of SIMD acceleration from sonic-rs
4//! with zero-copy parsing techniques to achieve maximum performance.
5
6use crate::{
7    domain::{DomainError, DomainResult},
8    parser::{
9        ValueType,
10        buffer_pool::{BufferPool, BufferSize, PooledBuffer},
11        zero_copy::{LazyJsonValue, LazyParser, MemoryUsage},
12    },
13};
14use std::{marker::PhantomData, sync::Arc};
15
16/// SIMD-accelerated zero-copy parser
17pub struct SimdZeroCopyParser<'a> {
18    buffer_pool: Arc<BufferPool>,
19    current_buffer: Option<PooledBuffer>,
20    input: &'a [u8],
21    position: usize,
22    depth: usize,
23    max_depth: usize,
24    simd_enabled: bool,
25    _phantom: PhantomData<&'a ()>,
26}
27
28/// Configuration for SIMD zero-copy parser
29#[derive(Debug, Clone)]
30pub struct SimdZeroCopyConfig {
31    /// Maximum nesting depth for safety
32    pub max_depth: usize,
33    /// Enable SIMD acceleration when available
34    pub enable_simd: bool,
35    /// Buffer pool configuration
36    pub buffer_pool_config: Option<crate::parser::buffer_pool::PoolConfig>,
37    /// Minimum size for SIMD processing
38    pub simd_threshold: usize,
39    /// Enable memory usage tracking
40    pub track_memory_usage: bool,
41}
42
43/// Parse result containing both the value and memory statistics
44#[derive(Debug)]
45pub struct SimdParseResult<'a> {
46    pub value: LazyJsonValue<'a>,
47    pub memory_usage: MemoryUsage,
48    pub simd_used: bool,
49    pub processing_time_ns: u64,
50}
51
52/// Statistics about SIMD parsing performance
53#[derive(Debug, Clone)]
54pub struct SimdParsingStats {
55    pub total_parses: u64,
56    pub simd_accelerated_parses: u64,
57    pub total_bytes_processed: u64,
58    pub average_processing_time_ns: u64,
59    pub simd_efficiency: f64,
60}
61
62impl<'a> Default for SimdZeroCopyParser<'a> {
63    fn default() -> Self {
64        Self::new()
65    }
66}
67
68impl<'a> SimdZeroCopyParser<'a> {
69    /// Create new SIMD zero-copy parser with default configuration
70    pub fn new() -> Self {
71        Self::with_config(SimdZeroCopyConfig::default())
72    }
73
74    /// Create parser with custom configuration
75    pub fn with_config(config: SimdZeroCopyConfig) -> Self {
76        let buffer_pool = if let Some(pool_config) = config.buffer_pool_config {
77            Arc::new(BufferPool::with_config(pool_config))
78        } else {
79            Arc::new(BufferPool::new())
80        };
81
82        Self {
83            buffer_pool,
84            current_buffer: None,
85            input: &[],
86            position: 0,
87            depth: 0,
88            max_depth: config.max_depth,
89            simd_enabled: config.enable_simd && Self::is_simd_available(),
90            _phantom: PhantomData,
91        }
92    }
93
94    /// Parse JSON with SIMD acceleration and zero-copy optimization
95    pub fn parse_simd(&mut self, input: &'a [u8]) -> DomainResult<SimdParseResult<'a>> {
96        let start_time = std::time::Instant::now();
97
98        self.input = input;
99        self.position = 0;
100        self.depth = 0;
101
102        // Determine if we should use SIMD based on input size
103        let use_simd = self.simd_enabled && input.len() >= 256; // Threshold for SIMD benefit
104
105        let value = if use_simd {
106            self.parse_with_simd(input)?
107        } else {
108            self.parse_without_simd(input)?
109        };
110
111        let processing_time = start_time.elapsed().as_nanos() as u64;
112        let memory_usage = value.memory_usage();
113
114        Ok(SimdParseResult {
115            value,
116            memory_usage,
117            simd_used: use_simd,
118            processing_time_ns: processing_time,
119        })
120    }
121
122    /// Parse using sonic-rs SIMD acceleration
123    fn parse_with_simd(&mut self, input: &'a [u8]) -> DomainResult<LazyJsonValue<'a>> {
124        // First, use sonic-rs to validate and get structural information
125        let sonic_result = self.sonic_preprocess(input)?;
126
127        // Then do zero-copy extraction based on sonic's findings
128        match sonic_result.value_type {
129            ValueType::Object => self.parse_simd_object(input, &sonic_result),
130            ValueType::Array => self.parse_simd_array(input, &sonic_result),
131            ValueType::String => self.parse_simd_string(input, &sonic_result),
132            ValueType::Number => self.parse_simd_number(input, &sonic_result),
133            ValueType::Boolean => self.parse_simd_boolean(input, &sonic_result),
134            ValueType::Null => Ok(LazyJsonValue::Null),
135        }
136    }
137
138    /// Parse without SIMD acceleration (fallback to pure zero-copy)
139    fn parse_without_simd(&mut self, input: &'a [u8]) -> DomainResult<LazyJsonValue<'a>> {
140        // Use the zero-copy parser directly
141        let mut zero_copy_parser = crate::parser::zero_copy::ZeroCopyParser::new();
142        zero_copy_parser.parse_lazy(input)
143    }
144
145    /// Use sonic-rs for structural analysis
146    fn sonic_preprocess(&self, input: &[u8]) -> DomainResult<SonicStructuralInfo> {
147        // This is a simplified version - actual implementation would use sonic-rs
148        // to get structural information about the JSON
149
150        if input.is_empty() {
151            return Err(DomainError::InvalidInput("Empty input".to_string()));
152        }
153
154        // Detect value type from first non-whitespace character
155        let mut pos = 0;
156        while pos < input.len() && input[pos].is_ascii_whitespace() {
157            pos += 1;
158        }
159
160        if pos >= input.len() {
161            return Err(DomainError::InvalidInput("Only whitespace".to_string()));
162        }
163
164        let value_type = match input[pos] {
165            b'{' => ValueType::Object,
166            b'[' => ValueType::Array,
167            b'"' => ValueType::String,
168            b't' | b'f' => ValueType::Boolean,
169            b'n' => ValueType::Null,
170            b'-' | b'0'..=b'9' => ValueType::Number,
171            _ => {
172                let ch = input[pos] as char;
173                return Err(DomainError::InvalidInput(format!(
174                    "Invalid JSON start character: {ch}"
175                )));
176            }
177        };
178
179        Ok(SonicStructuralInfo {
180            value_type,
181            start_pos: pos,
182            estimated_size: input.len(),
183            has_escapes: self.detect_escapes(input),
184            is_simd_friendly: self.is_simd_friendly(input),
185        })
186    }
187
188    /// Parse object with SIMD acceleration
189    fn parse_simd_object(
190        &mut self,
191        input: &'a [u8],
192        info: &SonicStructuralInfo,
193    ) -> DomainResult<LazyJsonValue<'a>> {
194        // For objects, we still return a slice but use SIMD for validation
195        if info.is_simd_friendly {
196            // Use SIMD for fast validation of structure
197            self.simd_validate_object_structure(input)?;
198        }
199
200        // Return zero-copy slice
201        Ok(LazyJsonValue::ObjectSlice(input))
202    }
203
204    /// Parse array with SIMD acceleration
205    fn parse_simd_array(
206        &mut self,
207        input: &'a [u8],
208        info: &SonicStructuralInfo,
209    ) -> DomainResult<LazyJsonValue<'a>> {
210        if info.is_simd_friendly {
211            // Use SIMD for fast validation of array structure
212            self.simd_validate_array_structure(input)?;
213        }
214
215        Ok(LazyJsonValue::ArraySlice(input))
216    }
217
218    /// Parse string with SIMD acceleration
219    fn parse_simd_string(
220        &mut self,
221        input: &'a [u8],
222        info: &SonicStructuralInfo,
223    ) -> DomainResult<LazyJsonValue<'a>> {
224        if !info.has_escapes {
225            // No escapes - pure zero copy
226            let start = info.start_pos + 1; // Skip opening quote
227            let end = input.len() - 1; // Skip closing quote
228            Ok(LazyJsonValue::StringBorrowed(&input[start..end]))
229        } else {
230            // Has escapes - need to process with SIMD-accelerated unescaping
231            let unescaped = self.simd_unescape_string(input)?;
232            Ok(LazyJsonValue::StringOwned(unescaped))
233        }
234    }
235
236    /// Parse number with SIMD acceleration
237    fn parse_simd_number(
238        &mut self,
239        input: &'a [u8],
240        _info: &SonicStructuralInfo,
241    ) -> DomainResult<LazyJsonValue<'a>> {
242        // SIMD validation of number format
243        if self.simd_enabled {
244            self.simd_validate_number(input)?;
245        }
246
247        Ok(LazyJsonValue::NumberSlice(input))
248    }
249
250    /// Parse boolean with SIMD acceleration
251    fn parse_simd_boolean(
252        &mut self,
253        input: &'a [u8],
254        _info: &SonicStructuralInfo,
255    ) -> DomainResult<LazyJsonValue<'a>> {
256        // SIMD comparison for "true" or "false"
257        if self.simd_enabled {
258            if input == b"true" {
259                return Ok(LazyJsonValue::Boolean(true));
260            } else if input == b"false" {
261                return Ok(LazyJsonValue::Boolean(false));
262            } else {
263                return Err(DomainError::InvalidInput(
264                    "Invalid boolean value".to_string(),
265                ));
266            }
267        }
268
269        // Fallback to regular parsing
270        match input {
271            b"true" => Ok(LazyJsonValue::Boolean(true)),
272            b"false" => Ok(LazyJsonValue::Boolean(false)),
273            _ => Err(DomainError::InvalidInput(
274                "Invalid boolean value".to_string(),
275            )),
276        }
277    }
278
279    // SIMD validation methods (simplified implementations)
280
281    fn simd_validate_object_structure(&self, input: &[u8]) -> DomainResult<()> {
282        // Simplified: just check that we have matching braces
283        // Real implementation would use SIMD to validate JSON structure
284        let open_count = input.iter().filter(|&&c| c == b'{').count();
285        let close_count = input.iter().filter(|&&c| c == b'}').count();
286
287        if open_count == close_count && open_count > 0 {
288            Ok(())
289        } else {
290            Err(DomainError::InvalidInput(
291                "Unmatched braces in object".to_string(),
292            ))
293        }
294    }
295
296    fn simd_validate_array_structure(&self, input: &[u8]) -> DomainResult<()> {
297        // Simplified: just check that we have matching brackets
298        let open_count = input.iter().filter(|&&c| c == b'[').count();
299        let close_count = input.iter().filter(|&&c| c == b']').count();
300
301        if open_count == close_count && open_count > 0 {
302            Ok(())
303        } else {
304            Err(DomainError::InvalidInput(
305                "Unmatched brackets in array".to_string(),
306            ))
307        }
308    }
309
310    fn simd_validate_number(&self, input: &[u8]) -> DomainResult<()> {
311        // Simplified number validation using SIMD concepts
312        // Real implementation would use SIMD instructions for fast validation
313
314        if input.is_empty() {
315            return Err(DomainError::InvalidInput("Empty number".to_string()));
316        }
317
318        // Quick ASCII digit check that could be SIMD-accelerated
319        let is_valid = input.iter().all(|&c| {
320            c.is_ascii_digit() || c == b'.' || c == b'-' || c == b'+' || c == b'e' || c == b'E'
321        });
322
323        if is_valid {
324            Ok(())
325        } else {
326            Err(DomainError::InvalidInput(
327                "Invalid number format".to_string(),
328            ))
329        }
330    }
331
332    fn simd_unescape_string(&self, input: &[u8]) -> DomainResult<String> {
333        // Simplified SIMD-style string unescaping
334        // Real implementation would use vector instructions for processing escapes
335
336        let mut result = Vec::with_capacity(input.len());
337        let mut i = 1; // Skip opening quote
338
339        while i < input.len() - 1 {
340            // Stop before closing quote
341            if input[i] == b'\\' && i + 1 < input.len() - 1 {
342                match input[i + 1] {
343                    b'n' => result.push(b'\n'),
344                    b'r' => result.push(b'\r'),
345                    b't' => result.push(b'\t'),
346                    b'\\' => result.push(b'\\'),
347                    b'"' => result.push(b'"'),
348                    c => result.push(c),
349                }
350                i += 2;
351            } else {
352                result.push(input[i]);
353                i += 1;
354            }
355        }
356
357        String::from_utf8(result)
358            .map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))
359    }
360
361    // Utility methods
362
363    fn detect_escapes(&self, input: &[u8]) -> bool {
364        input.contains(&b'\\')
365    }
366
367    fn is_simd_friendly(&self, input: &[u8]) -> bool {
368        // Check if input is large enough and aligned for SIMD processing
369        input.len() >= 32 && (input.as_ptr() as usize).is_multiple_of(32)
370    }
371
372    fn is_simd_available() -> bool {
373        // Check if SIMD instructions are available
374        #[cfg(target_arch = "x86_64")]
375        {
376            std::arch::is_x86_feature_detected!("avx2")
377        }
378        #[cfg(not(target_arch = "x86_64"))]
379        {
380            false
381        }
382    }
383
384    /// Get buffer from pool for intermediate processing
385    pub fn get_buffer(&mut self, min_size: usize) -> DomainResult<&mut PooledBuffer> {
386        if self.current_buffer.is_none()
387            || self.current_buffer.as_ref().unwrap().capacity() < min_size
388        {
389            let size = BufferSize::for_capacity(min_size);
390            self.current_buffer = Some(self.buffer_pool.get_buffer(size)?);
391        }
392
393        Ok(self.current_buffer.as_mut().unwrap())
394    }
395
396    /// Release current buffer back to pool
397    pub fn release_buffer(&mut self) {
398        self.current_buffer = None;
399    }
400}
401
402impl<'a> LazyParser<'a> for SimdZeroCopyParser<'a> {
403    type Output = SimdParseResult<'a>;
404    type Error = DomainError;
405
406    fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error> {
407        self.parse_simd(input)
408    }
409
410    fn remaining(&self) -> &'a [u8] {
411        if self.position < self.input.len() {
412            &self.input[self.position..]
413        } else {
414            &[]
415        }
416    }
417
418    fn is_complete(&self) -> bool {
419        self.position >= self.input.len()
420    }
421
422    fn reset(&mut self) {
423        self.input = &[];
424        self.position = 0;
425        self.depth = 0;
426        self.release_buffer();
427    }
428}
429
430/// Structural information from sonic-rs preprocessing
431#[derive(Debug, Clone)]
432struct SonicStructuralInfo {
433    value_type: ValueType,
434    start_pos: usize,
435    estimated_size: usize,
436    has_escapes: bool,
437    is_simd_friendly: bool,
438}
439
440impl Default for SimdZeroCopyConfig {
441    fn default() -> Self {
442        Self {
443            max_depth: 64,
444            enable_simd: true,
445            buffer_pool_config: None,
446            simd_threshold: 256,
447            track_memory_usage: true,
448        }
449    }
450}
451
452impl SimdZeroCopyConfig {
453    /// Configuration optimized for maximum performance
454    pub fn high_performance() -> Self {
455        Self {
456            max_depth: 128,
457            enable_simd: true,
458            buffer_pool_config: Some(crate::parser::buffer_pool::PoolConfig::simd_optimized()),
459            simd_threshold: 128,       // Lower threshold for more SIMD usage
460            track_memory_usage: false, // Disable for maximum speed
461        }
462    }
463
464    /// Configuration for memory-constrained environments
465    pub fn low_memory() -> Self {
466        Self {
467            max_depth: 32,
468            enable_simd: false,
469            buffer_pool_config: Some(crate::parser::buffer_pool::PoolConfig::low_memory()),
470            simd_threshold: 1024, // Higher threshold
471            track_memory_usage: true,
472        }
473    }
474}
475
476impl Default for SimdParsingStats {
477    fn default() -> Self {
478        Self {
479            total_parses: 0,
480            simd_accelerated_parses: 0,
481            total_bytes_processed: 0,
482            average_processing_time_ns: 0,
483            simd_efficiency: 0.0,
484        }
485    }
486}
487
488impl SimdParsingStats {
489    /// Calculate SIMD usage ratio
490    pub fn simd_usage_ratio(&self) -> f64 {
491        if self.total_parses == 0 {
492            0.0
493        } else {
494            self.simd_accelerated_parses as f64 / self.total_parses as f64
495        }
496    }
497
498    /// Calculate average throughput in MB/s
499    pub fn average_throughput_mbps(&self) -> f64 {
500        if self.average_processing_time_ns == 0 {
501            0.0
502        } else {
503            let seconds = self.average_processing_time_ns as f64 / 1_000_000_000.0;
504            let mb = self.total_bytes_processed as f64 / (1024.0 * 1024.0);
505            mb / seconds
506        }
507    }
508}
509
510#[cfg(test)]
511mod tests {
512    use super::*;
513
514    #[test]
515    fn test_simd_parser_creation() {
516        let parser = SimdZeroCopyParser::new();
517        assert!(!parser.simd_enabled || SimdZeroCopyParser::is_simd_available());
518    }
519
520    #[test]
521    fn test_simple_parsing() {
522        let mut parser = SimdZeroCopyParser::new();
523        let input = br#""hello world""#;
524
525        let result = parser.parse_simd(input).unwrap();
526        match result.value {
527            LazyJsonValue::StringBorrowed(s) => {
528                assert_eq!(s, b"hello world");
529            }
530            _ => panic!("Expected string"),
531        }
532    }
533
534    #[test]
535    fn test_number_parsing() {
536        let mut parser = SimdZeroCopyParser::new();
537        let input = b"123.456";
538
539        let result = parser.parse_simd(input).unwrap();
540        match result.value {
541            LazyJsonValue::NumberSlice(n) => {
542                assert_eq!(n, b"123.456");
543            }
544            _ => panic!("Expected number"),
545        }
546    }
547
548    #[test]
549    fn test_boolean_parsing() {
550        let mut parser = SimdZeroCopyParser::new();
551
552        let result = parser.parse_simd(b"true").unwrap();
553        assert_eq!(result.value, LazyJsonValue::Boolean(true));
554
555        parser.reset();
556        let result = parser.parse_simd(b"false").unwrap();
557        assert_eq!(result.value, LazyJsonValue::Boolean(false));
558    }
559
560    #[test]
561    fn test_object_parsing() {
562        let mut parser = SimdZeroCopyParser::new();
563        let input = br#"{"key": "value", "number": 42}"#;
564
565        let result = parser.parse_simd(input).unwrap();
566        match result.value {
567            LazyJsonValue::ObjectSlice(obj) => {
568                assert_eq!(obj, input);
569            }
570            _ => panic!("Expected object"),
571        }
572    }
573
574    #[test]
575    fn test_memory_usage_tracking() {
576        let mut parser = SimdZeroCopyParser::new();
577        let input = br#""test string""#;
578
579        let result = parser.parse_simd(input).unwrap();
580        assert_eq!(result.memory_usage.allocated_bytes, 0); // Zero-copy string
581        assert!(result.memory_usage.referenced_bytes > 0);
582    }
583
584    #[test]
585    fn test_buffer_pool_integration() {
586        let mut parser = SimdZeroCopyParser::new();
587        let buffer = parser.get_buffer(1024).unwrap();
588        assert!(buffer.capacity() >= 1024);
589
590        parser.release_buffer();
591        assert!(parser.current_buffer.is_none());
592    }
593}