pjson_rs/parser/
simd_zero_copy.rs

1//! SIMD-accelerated zero-copy parser using sonic-rs
2//!
3//! This module combines the benefits of SIMD acceleration from sonic-rs
4//! with zero-copy parsing techniques to achieve maximum performance.
5
6use crate::{
7    domain::{DomainResult, DomainError},
8    parser::{
9        buffer_pool::{BufferPool, PooledBuffer, BufferSize},
10        zero_copy::{LazyParser, LazyJsonValue, MemoryUsage},
11        ValueType,
12    },
13};
14use std::{
15    marker::PhantomData,
16    sync::Arc,
17};
18
19/// SIMD-accelerated zero-copy parser
20pub struct SimdZeroCopyParser<'a> {
21    buffer_pool: Arc<BufferPool>,
22    current_buffer: Option<PooledBuffer>,
23    input: &'a [u8],
24    position: usize,
25    depth: usize,
26    max_depth: usize,
27    simd_enabled: bool,
28    _phantom: PhantomData<&'a ()>,
29}
30
31/// Configuration for SIMD zero-copy parser
32#[derive(Debug, Clone)]
33pub struct SimdZeroCopyConfig {
34    /// Maximum nesting depth for safety
35    pub max_depth: usize,
36    /// Enable SIMD acceleration when available
37    pub enable_simd: bool,
38    /// Buffer pool configuration
39    pub buffer_pool_config: Option<crate::parser::buffer_pool::PoolConfig>,
40    /// Minimum size for SIMD processing
41    pub simd_threshold: usize,
42    /// Enable memory usage tracking
43    pub track_memory_usage: bool,
44}
45
46/// Parse result containing both the value and memory statistics
47#[derive(Debug)]
48pub struct SimdParseResult<'a> {
49    pub value: LazyJsonValue<'a>,
50    pub memory_usage: MemoryUsage,
51    pub simd_used: bool,
52    pub processing_time_ns: u64,
53}
54
55/// Statistics about SIMD parsing performance
56#[derive(Debug, Clone)]
57pub struct SimdParsingStats {
58    pub total_parses: u64,
59    pub simd_accelerated_parses: u64,
60    pub total_bytes_processed: u64,
61    pub average_processing_time_ns: u64,
62    pub simd_efficiency: f64,
63}
64
65impl<'a> SimdZeroCopyParser<'a> {
66    /// Create new SIMD zero-copy parser with default configuration
67    pub fn new() -> Self {
68        Self::with_config(SimdZeroCopyConfig::default())
69    }
70
71    /// Create parser with custom configuration
72    pub fn with_config(config: SimdZeroCopyConfig) -> Self {
73        let buffer_pool = if let Some(pool_config) = config.buffer_pool_config {
74            Arc::new(BufferPool::with_config(pool_config))
75        } else {
76            Arc::new(BufferPool::new())
77        };
78
79        Self {
80            buffer_pool,
81            current_buffer: None,
82            input: &[],
83            position: 0,
84            depth: 0,
85            max_depth: config.max_depth,
86            simd_enabled: config.enable_simd && Self::is_simd_available(),
87            _phantom: PhantomData,
88        }
89    }
90
91    /// Parse JSON with SIMD acceleration and zero-copy optimization
92    pub fn parse_simd(&mut self, input: &'a [u8]) -> DomainResult<SimdParseResult<'a>> {
93        let start_time = std::time::Instant::now();
94        
95        self.input = input;
96        self.position = 0;
97        self.depth = 0;
98
99        // Determine if we should use SIMD based on input size
100        let use_simd = self.simd_enabled && input.len() >= 256; // Threshold for SIMD benefit
101
102        let value = if use_simd {
103            self.parse_with_simd(input)?
104        } else {
105            self.parse_without_simd(input)?
106        };
107
108        let processing_time = start_time.elapsed().as_nanos() as u64;
109        let memory_usage = value.memory_usage();
110
111        Ok(SimdParseResult {
112            value,
113            memory_usage,
114            simd_used: use_simd,
115            processing_time_ns: processing_time,
116        })
117    }
118
119    /// Parse using sonic-rs SIMD acceleration
120    fn parse_with_simd(&mut self, input: &'a [u8]) -> DomainResult<LazyJsonValue<'a>> {
121        // First, use sonic-rs to validate and get structural information
122        let sonic_result = self.sonic_preprocess(input)?;
123        
124        // Then do zero-copy extraction based on sonic's findings
125        match sonic_result.value_type {
126            ValueType::Object => self.parse_simd_object(input, &sonic_result),
127            ValueType::Array => self.parse_simd_array(input, &sonic_result),
128            ValueType::String => self.parse_simd_string(input, &sonic_result),
129            ValueType::Number => self.parse_simd_number(input, &sonic_result),
130            ValueType::Boolean => self.parse_simd_boolean(input, &sonic_result),
131            ValueType::Null => Ok(LazyJsonValue::Null),
132        }
133    }
134
135    /// Parse without SIMD acceleration (fallback to pure zero-copy)
136    fn parse_without_simd(&mut self, input: &'a [u8]) -> DomainResult<LazyJsonValue<'a>> {
137        // Use the zero-copy parser directly
138        let mut zero_copy_parser = crate::parser::zero_copy::ZeroCopyParser::with_max_depth(self.max_depth);
139        zero_copy_parser.parse_lazy(input)
140    }
141
142    /// Use sonic-rs for structural analysis
143    fn sonic_preprocess(&self, input: &[u8]) -> DomainResult<SonicStructuralInfo> {
144        // This is a simplified version - actual implementation would use sonic-rs
145        // to get structural information about the JSON
146        
147        if input.is_empty() {
148            return Err(DomainError::InvalidInput("Empty input".to_string()));
149        }
150
151        // Detect value type from first non-whitespace character
152        let mut pos = 0;
153        while pos < input.len() && input[pos].is_ascii_whitespace() {
154            pos += 1;
155        }
156
157        if pos >= input.len() {
158            return Err(DomainError::InvalidInput("Only whitespace".to_string()));
159        }
160
161        let value_type = match input[pos] {
162            b'{' => ValueType::Object,
163            b'[' => ValueType::Array,
164            b'"' => ValueType::String,
165            b't' | b'f' => ValueType::Boolean,
166            b'n' => ValueType::Null,
167            b'-' | b'0'..=b'9' => ValueType::Number,
168            _ => {
169                let ch = input[pos] as char;
170                return Err(DomainError::InvalidInput(format!("Invalid JSON start character: {ch}")));
171            },
172        };
173
174        Ok(SonicStructuralInfo {
175            value_type,
176            start_pos: pos,
177            estimated_size: input.len(),
178            has_escapes: self.detect_escapes(input),
179            is_simd_friendly: self.is_simd_friendly(input),
180        })
181    }
182
183    /// Parse object with SIMD acceleration
184    fn parse_simd_object(&mut self, input: &'a [u8], info: &SonicStructuralInfo) -> DomainResult<LazyJsonValue<'a>> {
185        // For objects, we still return a slice but use SIMD for validation
186        if info.is_simd_friendly {
187            // Use SIMD for fast validation of structure
188            self.simd_validate_object_structure(input)?;
189        }
190        
191        // Return zero-copy slice
192        Ok(LazyJsonValue::ObjectSlice(input))
193    }
194
195    /// Parse array with SIMD acceleration
196    fn parse_simd_array(&mut self, input: &'a [u8], info: &SonicStructuralInfo) -> DomainResult<LazyJsonValue<'a>> {
197        if info.is_simd_friendly {
198            // Use SIMD for fast validation of array structure
199            self.simd_validate_array_structure(input)?;
200        }
201        
202        Ok(LazyJsonValue::ArraySlice(input))
203    }
204
205    /// Parse string with SIMD acceleration
206    fn parse_simd_string(&mut self, input: &'a [u8], info: &SonicStructuralInfo) -> DomainResult<LazyJsonValue<'a>> {
207        if !info.has_escapes {
208            // No escapes - pure zero copy
209            let start = info.start_pos + 1; // Skip opening quote
210            let end = input.len() - 1; // Skip closing quote
211            Ok(LazyJsonValue::StringBorrowed(&input[start..end]))
212        } else {
213            // Has escapes - need to process with SIMD-accelerated unescaping
214            let unescaped = self.simd_unescape_string(input)?;
215            Ok(LazyJsonValue::StringOwned(unescaped))
216        }
217    }
218
219    /// Parse number with SIMD acceleration
220    fn parse_simd_number(&mut self, input: &'a [u8], _info: &SonicStructuralInfo) -> DomainResult<LazyJsonValue<'a>> {
221        // SIMD validation of number format
222        if self.simd_enabled {
223            self.simd_validate_number(input)?;
224        }
225        
226        Ok(LazyJsonValue::NumberSlice(input))
227    }
228
229    /// Parse boolean with SIMD acceleration
230    fn parse_simd_boolean(&mut self, input: &'a [u8], _info: &SonicStructuralInfo) -> DomainResult<LazyJsonValue<'a>> {
231        // SIMD comparison for "true" or "false"
232        if self.simd_enabled {
233            if input == b"true" {
234                return Ok(LazyJsonValue::Boolean(true));
235            } else if input == b"false" {
236                return Ok(LazyJsonValue::Boolean(false));
237            } else {
238                return Err(DomainError::InvalidInput("Invalid boolean value".to_string()));
239            }
240        }
241
242        // Fallback to regular parsing
243        match input {
244            b"true" => Ok(LazyJsonValue::Boolean(true)),
245            b"false" => Ok(LazyJsonValue::Boolean(false)),
246            _ => Err(DomainError::InvalidInput("Invalid boolean value".to_string())),
247        }
248    }
249
250    // SIMD validation methods (simplified implementations)
251
252    fn simd_validate_object_structure(&self, input: &[u8]) -> DomainResult<()> {
253        // Simplified: just check that we have matching braces
254        // Real implementation would use SIMD to validate JSON structure
255        let open_count = input.iter().filter(|&&c| c == b'{').count();
256        let close_count = input.iter().filter(|&&c| c == b'}').count();
257        
258        if open_count == close_count && open_count > 0 {
259            Ok(())
260        } else {
261            Err(DomainError::InvalidInput("Unmatched braces in object".to_string()))
262        }
263    }
264
265    fn simd_validate_array_structure(&self, input: &[u8]) -> DomainResult<()> {
266        // Simplified: just check that we have matching brackets
267        let open_count = input.iter().filter(|&&c| c == b'[').count();
268        let close_count = input.iter().filter(|&&c| c == b']').count();
269        
270        if open_count == close_count && open_count > 0 {
271            Ok(())
272        } else {
273            Err(DomainError::InvalidInput("Unmatched brackets in array".to_string()))
274        }
275    }
276
277    fn simd_validate_number(&self, input: &[u8]) -> DomainResult<()> {
278        // Simplified number validation using SIMD concepts
279        // Real implementation would use SIMD instructions for fast validation
280        
281        if input.is_empty() {
282            return Err(DomainError::InvalidInput("Empty number".to_string()));
283        }
284
285        // Quick ASCII digit check that could be SIMD-accelerated
286        let is_valid = input.iter().all(|&c| {
287            c.is_ascii_digit() || c == b'.' || c == b'-' || c == b'+' || c == b'e' || c == b'E'
288        });
289
290        if is_valid {
291            Ok(())
292        } else {
293            Err(DomainError::InvalidInput("Invalid number format".to_string()))
294        }
295    }
296
297    fn simd_unescape_string(&self, input: &[u8]) -> DomainResult<String> {
298        // Simplified SIMD-style string unescaping
299        // Real implementation would use vector instructions for processing escapes
300        
301        let mut result = Vec::with_capacity(input.len());
302        let mut i = 1; // Skip opening quote
303        
304        while i < input.len() - 1 { // Stop before closing quote
305            if input[i] == b'\\' && i + 1 < input.len() - 1 {
306                match input[i + 1] {
307                    b'n' => result.push(b'\n'),
308                    b'r' => result.push(b'\r'),
309                    b't' => result.push(b'\t'),
310                    b'\\' => result.push(b'\\'),
311                    b'"' => result.push(b'"'),
312                    c => result.push(c),
313                }
314                i += 2;
315            } else {
316                result.push(input[i]);
317                i += 1;
318            }
319        }
320
321        String::from_utf8(result)
322            .map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))
323    }
324
325    // Utility methods
326
327    fn detect_escapes(&self, input: &[u8]) -> bool {
328        input.contains(&b'\\')
329    }
330
331    fn is_simd_friendly(&self, input: &[u8]) -> bool {
332        // Check if input is large enough and aligned for SIMD processing
333        input.len() >= 32 && (input.as_ptr() as usize) % 32 == 0
334    }
335
336    fn is_simd_available() -> bool {
337        // Check if SIMD instructions are available
338        #[cfg(target_arch = "x86_64")]
339        {
340            std::arch::is_x86_feature_detected!("avx2")
341        }
342        #[cfg(not(target_arch = "x86_64"))]
343        {
344            false
345        }
346    }
347
348    /// Get buffer from pool for intermediate processing
349    pub fn get_buffer(&mut self, min_size: usize) -> DomainResult<&mut PooledBuffer> {
350        if self.current_buffer.is_none() || 
351           self.current_buffer.as_ref().unwrap().capacity() < min_size {
352            let size = BufferSize::for_capacity(min_size);
353            self.current_buffer = Some(self.buffer_pool.get_buffer(size)?);
354        }
355        
356        Ok(self.current_buffer.as_mut().unwrap())
357    }
358
359    /// Release current buffer back to pool
360    pub fn release_buffer(&mut self) {
361        self.current_buffer = None;
362    }
363}
364
365impl<'a> LazyParser<'a> for SimdZeroCopyParser<'a> {
366    type Output = SimdParseResult<'a>;
367    type Error = DomainError;
368
369    fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error> {
370        self.parse_simd(input)
371    }
372
373    fn remaining(&self) -> &'a [u8] {
374        if self.position < self.input.len() {
375            &self.input[self.position..]
376        } else {
377            &[]
378        }
379    }
380
381    fn is_complete(&self) -> bool {
382        self.position >= self.input.len()
383    }
384
385    fn reset(&mut self) {
386        self.input = &[];
387        self.position = 0;
388        self.depth = 0;
389        self.release_buffer();
390    }
391}
392
393/// Structural information from sonic-rs preprocessing
394#[derive(Debug, Clone)]
395struct SonicStructuralInfo {
396    value_type: ValueType,
397    start_pos: usize,
398    estimated_size: usize,
399    has_escapes: bool,
400    is_simd_friendly: bool,
401}
402
403impl Default for SimdZeroCopyConfig {
404    fn default() -> Self {
405        Self {
406            max_depth: 64,
407            enable_simd: true,
408            buffer_pool_config: None,
409            simd_threshold: 256,
410            track_memory_usage: true,
411        }
412    }
413}
414
415impl SimdZeroCopyConfig {
416    /// Configuration optimized for maximum performance
417    pub fn high_performance() -> Self {
418        Self {
419            max_depth: 128,
420            enable_simd: true,
421            buffer_pool_config: Some(crate::parser::buffer_pool::PoolConfig::simd_optimized()),
422            simd_threshold: 128, // Lower threshold for more SIMD usage
423            track_memory_usage: false, // Disable for maximum speed
424        }
425    }
426
427    /// Configuration for memory-constrained environments
428    pub fn low_memory() -> Self {
429        Self {
430            max_depth: 32,
431            enable_simd: false,
432            buffer_pool_config: Some(crate::parser::buffer_pool::PoolConfig::low_memory()),
433            simd_threshold: 1024, // Higher threshold
434            track_memory_usage: true,
435        }
436    }
437}
438
439impl Default for SimdParsingStats {
440    fn default() -> Self {
441        Self {
442            total_parses: 0,
443            simd_accelerated_parses: 0,
444            total_bytes_processed: 0,
445            average_processing_time_ns: 0,
446            simd_efficiency: 0.0,
447        }
448    }
449}
450
451impl SimdParsingStats {
452    /// Calculate SIMD usage ratio
453    pub fn simd_usage_ratio(&self) -> f64 {
454        if self.total_parses == 0 {
455            0.0
456        } else {
457            self.simd_accelerated_parses as f64 / self.total_parses as f64
458        }
459    }
460
461    /// Calculate average throughput in MB/s
462    pub fn average_throughput_mbps(&self) -> f64 {
463        if self.average_processing_time_ns == 0 {
464            0.0
465        } else {
466            let seconds = self.average_processing_time_ns as f64 / 1_000_000_000.0;
467            let mb = self.total_bytes_processed as f64 / (1024.0 * 1024.0);
468            mb / seconds
469        }
470    }
471}
472
473#[cfg(test)]
474mod tests {
475    use super::*;
476
477    #[test]
478    fn test_simd_parser_creation() {
479        let parser = SimdZeroCopyParser::new();
480        assert!(!parser.simd_enabled || SimdZeroCopyParser::is_simd_available());
481    }
482
483    #[test]
484    fn test_simple_parsing() {
485        let mut parser = SimdZeroCopyParser::new();
486        let input = br#""hello world""#;
487        
488        let result = parser.parse_simd(input).unwrap();
489        match result.value {
490            LazyJsonValue::StringBorrowed(s) => {
491                assert_eq!(s, b"hello world");
492            }
493            _ => panic!("Expected string"),
494        }
495    }
496
497    #[test]
498    fn test_number_parsing() {
499        let mut parser = SimdZeroCopyParser::new();
500        let input = b"123.456";
501        
502        let result = parser.parse_simd(input).unwrap();
503        match result.value {
504            LazyJsonValue::NumberSlice(n) => {
505                assert_eq!(n, b"123.456");
506            }
507            _ => panic!("Expected number"),
508        }
509    }
510
511    #[test]
512    fn test_boolean_parsing() {
513        let mut parser = SimdZeroCopyParser::new();
514        
515        let result = parser.parse_simd(b"true").unwrap();
516        assert_eq!(result.value, LazyJsonValue::Boolean(true));
517        
518        parser.reset();
519        let result = parser.parse_simd(b"false").unwrap();
520        assert_eq!(result.value, LazyJsonValue::Boolean(false));
521    }
522
523    #[test]
524    fn test_object_parsing() {
525        let mut parser = SimdZeroCopyParser::new();
526        let input = br#"{"key": "value", "number": 42}"#;
527        
528        let result = parser.parse_simd(input).unwrap();
529        match result.value {
530            LazyJsonValue::ObjectSlice(obj) => {
531                assert_eq!(obj, input);
532            }
533            _ => panic!("Expected object"),
534        }
535    }
536
537    #[test]
538    fn test_memory_usage_tracking() {
539        let mut parser = SimdZeroCopyParser::new();
540        let input = br#""test string""#;
541        
542        let result = parser.parse_simd(input).unwrap();
543        assert_eq!(result.memory_usage.allocated_bytes, 0); // Zero-copy string
544        assert!(result.memory_usage.referenced_bytes > 0);
545    }
546
547    #[test]
548    fn test_buffer_pool_integration() {
549        let mut parser = SimdZeroCopyParser::new();
550        let buffer = parser.get_buffer(1024).unwrap();
551        assert!(buffer.capacity() >= 1024);
552        
553        parser.release_buffer();
554        assert!(parser.current_buffer.is_none());
555    }
556}