pjson_rs/parser/
simd_zero_copy.rs

1//! SIMD-accelerated zero-copy parser using sonic-rs
2//!
3//! This module combines the benefits of SIMD acceleration from sonic-rs
4//! with zero-copy parsing techniques to achieve maximum performance.
5
6use crate::{
7    domain::{DomainError, DomainResult},
8    parser::{
9        ValueType,
10        buffer_pool::{BufferPool, BufferSize, PooledBuffer},
11        zero_copy::{LazyJsonValue, LazyParser, MemoryUsage},
12    },
13};
14use std::{marker::PhantomData, sync::Arc};
15
16/// SIMD-accelerated zero-copy parser
17pub struct SimdZeroCopyParser<'a> {
18    buffer_pool: Arc<BufferPool>,
19    current_buffer: Option<PooledBuffer>,
20    input: &'a [u8],
21    position: usize,
22    depth: usize,
23    #[allow(dead_code)] // Future: depth limit enforcement
24    max_depth: usize,
25    simd_enabled: bool,
26    _phantom: PhantomData<&'a ()>,
27}
28
29/// Configuration for SIMD zero-copy parser
30#[derive(Debug, Clone)]
31pub struct SimdZeroCopyConfig {
32    /// Maximum nesting depth for safety
33    pub max_depth: usize,
34    /// Enable SIMD acceleration when available
35    pub enable_simd: bool,
36    /// Buffer pool configuration
37    pub buffer_pool_config: Option<crate::parser::buffer_pool::PoolConfig>,
38    /// Minimum size for SIMD processing
39    pub simd_threshold: usize,
40    /// Enable memory usage tracking
41    pub track_memory_usage: bool,
42}
43
44/// Parse result containing both the value and memory statistics
45#[derive(Debug)]
46pub struct SimdParseResult<'a> {
47    pub value: LazyJsonValue<'a>,
48    pub memory_usage: MemoryUsage,
49    pub simd_used: bool,
50    pub processing_time_ns: u64,
51}
52
53/// Statistics about SIMD parsing performance
54#[derive(Debug, Clone)]
55pub struct SimdParsingStats {
56    pub total_parses: u64,
57    pub simd_accelerated_parses: u64,
58    pub total_bytes_processed: u64,
59    pub average_processing_time_ns: u64,
60    pub simd_efficiency: f64,
61}
62
63impl<'a> Default for SimdZeroCopyParser<'a> {
64    fn default() -> Self {
65        Self::new()
66    }
67}
68
69impl<'a> SimdZeroCopyParser<'a> {
70    /// Create new SIMD zero-copy parser with default configuration
71    pub fn new() -> Self {
72        Self::with_config(SimdZeroCopyConfig::default())
73    }
74
75    /// Create parser with custom configuration
76    pub fn with_config(config: SimdZeroCopyConfig) -> Self {
77        let buffer_pool = if let Some(pool_config) = config.buffer_pool_config {
78            Arc::new(BufferPool::with_config(pool_config))
79        } else {
80            Arc::new(BufferPool::new())
81        };
82
83        Self {
84            buffer_pool,
85            current_buffer: None,
86            input: &[],
87            position: 0,
88            depth: 0,
89            max_depth: config.max_depth,
90            simd_enabled: config.enable_simd && Self::is_simd_available(),
91            _phantom: PhantomData,
92        }
93    }
94
95    /// Parse JSON with SIMD acceleration and zero-copy optimization
96    pub fn parse_simd(&mut self, input: &'a [u8]) -> DomainResult<SimdParseResult<'a>> {
97        let start_time = std::time::Instant::now();
98
99        self.input = input;
100        self.position = 0;
101        self.depth = 0;
102
103        // Determine if we should use SIMD based on input size
104        let use_simd = self.simd_enabled && input.len() >= 256; // Threshold for SIMD benefit
105
106        let value = if use_simd {
107            self.parse_with_simd(input)?
108        } else {
109            self.parse_without_simd(input)?
110        };
111
112        let processing_time = start_time.elapsed().as_nanos() as u64;
113        let memory_usage = value.memory_usage();
114
115        Ok(SimdParseResult {
116            value,
117            memory_usage,
118            simd_used: use_simd,
119            processing_time_ns: processing_time,
120        })
121    }
122
123    /// Parse using sonic-rs SIMD acceleration
124    fn parse_with_simd(&mut self, input: &'a [u8]) -> DomainResult<LazyJsonValue<'a>> {
125        // First, use sonic-rs to validate and get structural information
126        let sonic_result = self.sonic_preprocess(input)?;
127
128        // Then do zero-copy extraction based on sonic's findings
129        match sonic_result.value_type {
130            ValueType::Object => self.parse_simd_object(input, &sonic_result),
131            ValueType::Array => self.parse_simd_array(input, &sonic_result),
132            ValueType::String => self.parse_simd_string(input, &sonic_result),
133            ValueType::Number => self.parse_simd_number(input, &sonic_result),
134            ValueType::Boolean => self.parse_simd_boolean(input, &sonic_result),
135            ValueType::Null => Ok(LazyJsonValue::Null),
136        }
137    }
138
139    /// Parse without SIMD acceleration (fallback to pure zero-copy)
140    fn parse_without_simd(&mut self, input: &'a [u8]) -> DomainResult<LazyJsonValue<'a>> {
141        // Use the zero-copy parser directly
142        let mut zero_copy_parser = crate::parser::zero_copy::ZeroCopyParser::new();
143        zero_copy_parser.parse_lazy(input)
144    }
145
146    /// Use sonic-rs for structural analysis
147    fn sonic_preprocess(&self, input: &[u8]) -> DomainResult<SonicStructuralInfo> {
148        // This is a simplified version - actual implementation would use sonic-rs
149        // to get structural information about the JSON
150
151        if input.is_empty() {
152            return Err(DomainError::InvalidInput("Empty input".to_string()));
153        }
154
155        // Detect value type from first non-whitespace character
156        let mut pos = 0;
157        while pos < input.len() && input[pos].is_ascii_whitespace() {
158            pos += 1;
159        }
160
161        if pos >= input.len() {
162            return Err(DomainError::InvalidInput("Only whitespace".to_string()));
163        }
164
165        let value_type = match input[pos] {
166            b'{' => ValueType::Object,
167            b'[' => ValueType::Array,
168            b'"' => ValueType::String,
169            b't' | b'f' => ValueType::Boolean,
170            b'n' => ValueType::Null,
171            b'-' | b'0'..=b'9' => ValueType::Number,
172            _ => {
173                let ch = input[pos] as char;
174                return Err(DomainError::InvalidInput(format!(
175                    "Invalid JSON start character: {ch}"
176                )));
177            }
178        };
179
180        Ok(SonicStructuralInfo {
181            value_type,
182            start_pos: pos,
183            estimated_size: input.len(),
184            has_escapes: self.detect_escapes(input),
185            is_simd_friendly: self.is_simd_friendly(input),
186        })
187    }
188
189    /// Parse object with SIMD acceleration
190    fn parse_simd_object(
191        &mut self,
192        input: &'a [u8],
193        info: &SonicStructuralInfo,
194    ) -> DomainResult<LazyJsonValue<'a>> {
195        // For objects, we still return a slice but use SIMD for validation
196        if info.is_simd_friendly {
197            // Use SIMD for fast validation of structure
198            self.simd_validate_object_structure(input)?;
199        }
200
201        // Return zero-copy slice
202        Ok(LazyJsonValue::ObjectSlice(input))
203    }
204
205    /// Parse array with SIMD acceleration
206    fn parse_simd_array(
207        &mut self,
208        input: &'a [u8],
209        info: &SonicStructuralInfo,
210    ) -> DomainResult<LazyJsonValue<'a>> {
211        if info.is_simd_friendly {
212            // Use SIMD for fast validation of array structure
213            self.simd_validate_array_structure(input)?;
214        }
215
216        Ok(LazyJsonValue::ArraySlice(input))
217    }
218
219    /// Parse string with SIMD acceleration
220    fn parse_simd_string(
221        &mut self,
222        input: &'a [u8],
223        info: &SonicStructuralInfo,
224    ) -> DomainResult<LazyJsonValue<'a>> {
225        if !info.has_escapes {
226            // No escapes - pure zero copy
227            let start = info.start_pos + 1; // Skip opening quote
228            let end = input.len() - 1; // Skip closing quote
229            Ok(LazyJsonValue::StringBorrowed(&input[start..end]))
230        } else {
231            // Has escapes - need to process with SIMD-accelerated unescaping
232            let unescaped = self.simd_unescape_string(input)?;
233            Ok(LazyJsonValue::StringOwned(unescaped))
234        }
235    }
236
237    /// Parse number with SIMD acceleration
238    fn parse_simd_number(
239        &mut self,
240        input: &'a [u8],
241        _info: &SonicStructuralInfo,
242    ) -> DomainResult<LazyJsonValue<'a>> {
243        // SIMD validation of number format
244        if self.simd_enabled {
245            self.simd_validate_number(input)?;
246        }
247
248        Ok(LazyJsonValue::NumberSlice(input))
249    }
250
251    /// Parse boolean with SIMD acceleration
252    fn parse_simd_boolean(
253        &mut self,
254        input: &'a [u8],
255        _info: &SonicStructuralInfo,
256    ) -> DomainResult<LazyJsonValue<'a>> {
257        // SIMD comparison for "true" or "false"
258        if self.simd_enabled {
259            if input == b"true" {
260                return Ok(LazyJsonValue::Boolean(true));
261            } else if input == b"false" {
262                return Ok(LazyJsonValue::Boolean(false));
263            } else {
264                return Err(DomainError::InvalidInput(
265                    "Invalid boolean value".to_string(),
266                ));
267            }
268        }
269
270        // Fallback to regular parsing
271        match input {
272            b"true" => Ok(LazyJsonValue::Boolean(true)),
273            b"false" => Ok(LazyJsonValue::Boolean(false)),
274            _ => Err(DomainError::InvalidInput(
275                "Invalid boolean value".to_string(),
276            )),
277        }
278    }
279
280    // SIMD validation methods (simplified implementations)
281
282    fn simd_validate_object_structure(&self, input: &[u8]) -> DomainResult<()> {
283        // Simplified: just check that we have matching braces
284        // Real implementation would use SIMD to validate JSON structure
285        let open_count = input.iter().filter(|&&c| c == b'{').count();
286        let close_count = input.iter().filter(|&&c| c == b'}').count();
287
288        if open_count == close_count && open_count > 0 {
289            Ok(())
290        } else {
291            Err(DomainError::InvalidInput(
292                "Unmatched braces in object".to_string(),
293            ))
294        }
295    }
296
297    fn simd_validate_array_structure(&self, input: &[u8]) -> DomainResult<()> {
298        // Simplified: just check that we have matching brackets
299        let open_count = input.iter().filter(|&&c| c == b'[').count();
300        let close_count = input.iter().filter(|&&c| c == b']').count();
301
302        if open_count == close_count && open_count > 0 {
303            Ok(())
304        } else {
305            Err(DomainError::InvalidInput(
306                "Unmatched brackets in array".to_string(),
307            ))
308        }
309    }
310
311    fn simd_validate_number(&self, input: &[u8]) -> DomainResult<()> {
312        // Simplified number validation using SIMD concepts
313        // Real implementation would use SIMD instructions for fast validation
314
315        if input.is_empty() {
316            return Err(DomainError::InvalidInput("Empty number".to_string()));
317        }
318
319        // Quick ASCII digit check that could be SIMD-accelerated
320        let is_valid = input.iter().all(|&c| {
321            c.is_ascii_digit() || c == b'.' || c == b'-' || c == b'+' || c == b'e' || c == b'E'
322        });
323
324        if is_valid {
325            Ok(())
326        } else {
327            Err(DomainError::InvalidInput(
328                "Invalid number format".to_string(),
329            ))
330        }
331    }
332
333    fn simd_unescape_string(&self, input: &[u8]) -> DomainResult<String> {
334        // Simplified SIMD-style string unescaping
335        // Real implementation would use vector instructions for processing escapes
336
337        let mut result = Vec::with_capacity(input.len());
338        let mut i = 1; // Skip opening quote
339
340        while i < input.len() - 1 {
341            // Stop before closing quote
342            if input[i] == b'\\' && i + 1 < input.len() - 1 {
343                match input[i + 1] {
344                    b'n' => result.push(b'\n'),
345                    b'r' => result.push(b'\r'),
346                    b't' => result.push(b'\t'),
347                    b'\\' => result.push(b'\\'),
348                    b'"' => result.push(b'"'),
349                    c => result.push(c),
350                }
351                i += 2;
352            } else {
353                result.push(input[i]);
354                i += 1;
355            }
356        }
357
358        String::from_utf8(result)
359            .map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))
360    }
361
362    // Utility methods
363
364    fn detect_escapes(&self, input: &[u8]) -> bool {
365        input.contains(&b'\\')
366    }
367
368    fn is_simd_friendly(&self, input: &[u8]) -> bool {
369        // Check if input is large enough and aligned for SIMD processing
370        input.len() >= 32 && (input.as_ptr() as usize).is_multiple_of(32)
371    }
372
373    fn is_simd_available() -> bool {
374        // Check if SIMD instructions are available
375        #[cfg(target_arch = "x86_64")]
376        {
377            std::arch::is_x86_feature_detected!("avx2")
378        }
379        #[cfg(not(target_arch = "x86_64"))]
380        {
381            false
382        }
383    }
384
385    /// Get buffer from pool for intermediate processing
386    pub fn get_buffer(&mut self, min_size: usize) -> DomainResult<&mut PooledBuffer> {
387        if self.current_buffer.is_none()
388            || self.current_buffer.as_ref().unwrap().capacity() < min_size
389        {
390            let size = BufferSize::for_capacity(min_size);
391            self.current_buffer = Some(self.buffer_pool.get_buffer(size)?);
392        }
393
394        Ok(self.current_buffer.as_mut().unwrap())
395    }
396
397    /// Release current buffer back to pool
398    pub fn release_buffer(&mut self) {
399        self.current_buffer = None;
400    }
401}
402
403impl<'a> LazyParser<'a> for SimdZeroCopyParser<'a> {
404    type Output = SimdParseResult<'a>;
405    type Error = DomainError;
406
407    fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error> {
408        self.parse_simd(input)
409    }
410
411    fn remaining(&self) -> &'a [u8] {
412        if self.position < self.input.len() {
413            &self.input[self.position..]
414        } else {
415            &[]
416        }
417    }
418
419    fn is_complete(&self) -> bool {
420        self.position >= self.input.len()
421    }
422
423    fn reset(&mut self) {
424        self.input = &[];
425        self.position = 0;
426        self.depth = 0;
427        self.release_buffer();
428    }
429}
430
431/// Structural information from sonic-rs preprocessing
432#[derive(Debug, Clone)]
433struct SonicStructuralInfo {
434    value_type: ValueType,
435    start_pos: usize,
436    #[allow(dead_code)] // Future: used for pre-allocation optimization
437    estimated_size: usize,
438    has_escapes: bool,
439    is_simd_friendly: bool,
440}
441
442impl Default for SimdZeroCopyConfig {
443    fn default() -> Self {
444        Self {
445            max_depth: 64,
446            enable_simd: true,
447            buffer_pool_config: None,
448            simd_threshold: 256,
449            track_memory_usage: true,
450        }
451    }
452}
453
454impl SimdZeroCopyConfig {
455    /// Configuration optimized for maximum performance
456    pub fn high_performance() -> Self {
457        Self {
458            max_depth: 128,
459            enable_simd: true,
460            buffer_pool_config: Some(crate::parser::buffer_pool::PoolConfig::simd_optimized()),
461            simd_threshold: 128,       // Lower threshold for more SIMD usage
462            track_memory_usage: false, // Disable for maximum speed
463        }
464    }
465
466    /// Configuration for memory-constrained environments
467    pub fn low_memory() -> Self {
468        Self {
469            max_depth: 32,
470            enable_simd: false,
471            buffer_pool_config: Some(crate::parser::buffer_pool::PoolConfig::low_memory()),
472            simd_threshold: 1024, // Higher threshold
473            track_memory_usage: true,
474        }
475    }
476}
477
478impl Default for SimdParsingStats {
479    fn default() -> Self {
480        Self {
481            total_parses: 0,
482            simd_accelerated_parses: 0,
483            total_bytes_processed: 0,
484            average_processing_time_ns: 0,
485            simd_efficiency: 0.0,
486        }
487    }
488}
489
490impl SimdParsingStats {
491    /// Calculate SIMD usage ratio
492    pub fn simd_usage_ratio(&self) -> f64 {
493        if self.total_parses == 0 {
494            0.0
495        } else {
496            self.simd_accelerated_parses as f64 / self.total_parses as f64
497        }
498    }
499
500    /// Calculate average throughput in MB/s
501    pub fn average_throughput_mbps(&self) -> f64 {
502        if self.average_processing_time_ns == 0 {
503            0.0
504        } else {
505            let seconds = self.average_processing_time_ns as f64 / 1_000_000_000.0;
506            let mb = self.total_bytes_processed as f64 / (1024.0 * 1024.0);
507            mb / seconds
508        }
509    }
510}
511
512#[cfg(test)]
513mod tests {
514    use super::*;
515
516    #[test]
517    fn test_simd_parser_creation() {
518        let parser = SimdZeroCopyParser::new();
519        assert!(!parser.simd_enabled || SimdZeroCopyParser::is_simd_available());
520    }
521
522    #[test]
523    fn test_simple_parsing() {
524        let mut parser = SimdZeroCopyParser::new();
525        let input = br#""hello world""#;
526
527        let result = parser.parse_simd(input).unwrap();
528        match result.value {
529            LazyJsonValue::StringBorrowed(s) => {
530                assert_eq!(s, b"hello world");
531            }
532            _ => panic!("Expected string"),
533        }
534    }
535
536    #[test]
537    fn test_number_parsing() {
538        let mut parser = SimdZeroCopyParser::new();
539        let input = b"123.456";
540
541        let result = parser.parse_simd(input).unwrap();
542        match result.value {
543            LazyJsonValue::NumberSlice(n) => {
544                assert_eq!(n, b"123.456");
545            }
546            _ => panic!("Expected number"),
547        }
548    }
549
550    #[test]
551    fn test_boolean_parsing() {
552        let mut parser = SimdZeroCopyParser::new();
553
554        let result = parser.parse_simd(b"true").unwrap();
555        assert_eq!(result.value, LazyJsonValue::Boolean(true));
556
557        parser.reset();
558        let result = parser.parse_simd(b"false").unwrap();
559        assert_eq!(result.value, LazyJsonValue::Boolean(false));
560    }
561
562    #[test]
563    fn test_object_parsing() {
564        let mut parser = SimdZeroCopyParser::new();
565        let input = br#"{"key": "value", "number": 42}"#;
566
567        let result = parser.parse_simd(input).unwrap();
568        match result.value {
569            LazyJsonValue::ObjectSlice(obj) => {
570                assert_eq!(obj, input);
571            }
572            _ => panic!("Expected object"),
573        }
574    }
575
576    #[test]
577    fn test_memory_usage_tracking() {
578        let mut parser = SimdZeroCopyParser::new();
579        let input = br#""test string""#;
580
581        let result = parser.parse_simd(input).unwrap();
582        assert_eq!(result.memory_usage.allocated_bytes, 0); // Zero-copy string
583        assert!(result.memory_usage.referenced_bytes > 0);
584    }
585
586    #[test]
587    fn test_buffer_pool_integration() {
588        let mut parser = SimdZeroCopyParser::new();
589        let buffer = parser.get_buffer(1024).unwrap();
590        assert!(buffer.capacity() >= 1024);
591
592        parser.release_buffer();
593        assert!(parser.current_buffer.is_none());
594    }
595}