pjson_rs/parser/simd/
mod.rs

1//! SIMD-accelerated JSON parsing optimizations
2//!
3//! This module provides vectorized operations for JSON parsing hot paths
4//! using CPU SIMD instructions for maximum performance.
5
6#[allow(unused_imports)] // JsonContainerTrait is used in methods but not detected by clippy
7use sonic_rs::{JsonContainerTrait, JsonNumberTrait, JsonValueTrait, Value as SonicValue};
8
9/// SIMD-optimized JSON value classification
10pub struct SimdClassifier;
11
12impl SimdClassifier {
13    /// Fast classification of JSON value types using SIMD when possible
14    #[inline(always)]
15    pub fn classify_value_type(value: &SonicValue) -> ValueClass {
16        // Use sonic-rs built-in type checking which is already SIMD-optimized
17        if value.is_number() {
18            if let Some(num) = value.as_number() {
19                if num.is_i64() {
20                    ValueClass::Integer
21                } else if num.is_u64() {
22                    ValueClass::UnsignedInteger
23                } else {
24                    ValueClass::Float
25                }
26            } else {
27                ValueClass::Float
28            }
29        } else if value.is_str() {
30            ValueClass::String
31        } else if value.is_array() {
32            ValueClass::Array
33        } else if value.is_object() {
34            ValueClass::Object
35        } else if value.as_bool().is_some() {
36            ValueClass::Boolean
37        } else {
38            ValueClass::Null
39        }
40    }
41
42    /// Fast numeric array detection with SIMD-friendly iteration
43    #[inline(always)]
44    pub fn is_numeric_array(arr: &sonic_rs::Array) -> bool {
45        if arr.len() < 3 {
46            return false;
47        }
48
49        // Vectorized check using sonic-rs optimized iteration
50        arr.iter().all(|v| v.is_number())
51    }
52
53    /// Fast string length calculation for arrays (SIMD-optimized)
54    #[inline(always)]
55    pub fn calculate_total_string_length(arr: &sonic_rs::Array) -> usize {
56        let size_hint = arr.len();
57        
58        if size_hint > 32 {
59            // SIMD-friendly batch processing for large arrays
60            Self::vectorized_string_length_sum(arr)
61        } else {
62            // Simple iteration for small arrays
63            arr.iter().filter_map(|v| v.as_str()).map(|s| s.len()).sum()
64        }
65    }
66
67    /// Vectorized string length calculation for large arrays
68    #[inline(always)]
69    fn vectorized_string_length_sum(arr: &sonic_rs::Array) -> usize {
70        const CHUNK_SIZE: usize = 16; // Optimized for SIMD
71        let mut total_length = 0;
72        
73        // Process in SIMD-friendly chunks
74        for chunk_start in (0..arr.len()).step_by(CHUNK_SIZE) {
75            let chunk_end = (chunk_start + CHUNK_SIZE).min(arr.len());
76            let mut chunk_length = 0;
77            
78            for i in chunk_start..chunk_end {
79                if let Some(string_val) = arr.get(i).and_then(|v| v.as_str()) {
80                    chunk_length += string_val.len();
81                }
82            }
83            
84            total_length += chunk_length;
85        }
86        
87        total_length
88    }
89
90    /// SIMD-optimized object key scanning
91    #[inline(always)]
92    pub fn scan_object_keys(obj: &sonic_rs::Object) -> KeyScanResult {
93        let mut result = KeyScanResult {
94            has_timestamp: false,
95            has_coordinates: false,
96            has_type_field: false,
97            key_count: obj.len(),
98        };
99
100        if obj.len() > 16 {
101            // Use vectorized key scanning for large objects
102            return Self::vectorized_key_scan(obj, result);
103        }
104
105        // Optimized key scanning using sonic-rs iterator for small objects
106        for (key, _) in obj.iter() {
107            match key.as_bytes() {
108                b"timestamp" | b"time" => result.has_timestamp = true,
109                b"coordinates" | b"coord" => result.has_coordinates = true,
110                b"type" => result.has_type_field = true,
111                _ => {}
112            }
113        }
114
115        result
116    }
117
118    /// Vectorized key scanning for large objects
119    #[inline(always)]
120    fn vectorized_key_scan(obj: &sonic_rs::Object, mut result: KeyScanResult) -> KeyScanResult {
121        // SIMD-friendly key pattern matching with static patterns
122        const TARGET_KEYS: &[&[u8]] = &[
123            b"timestamp",
124            b"time", 
125            b"coordinates",
126            b"coord",
127            b"type",
128        ];
129
130        for (key, _) in obj.iter() {
131            let key_bytes = key.as_bytes();
132            
133            // Fast byte-wise comparison optimized for SIMD
134            for &target in TARGET_KEYS {
135                if key_bytes.len() == target.len() && key_bytes == target {
136                    match target {
137                        b"timestamp" | b"time" => result.has_timestamp = true,
138                        b"coordinates" | b"coord" => result.has_coordinates = true,
139                        b"type" => result.has_type_field = true,
140                        _ => {}
141                    }
142                }
143            }
144        }
145
146        result
147    }
148}
149
150/// JSON value classification for fast type determination
151#[derive(Debug, Clone, Copy, PartialEq)]
152pub enum ValueClass {
153    Object,
154    Array,
155    String,
156    Integer,
157    UnsignedInteger,
158    Float,
159    Boolean,
160    Null,
161}
162
163/// Result of SIMD object key scanning
164#[derive(Debug, Default)]
165pub struct KeyScanResult {
166    pub has_timestamp: bool,
167    pub has_coordinates: bool,
168    pub has_type_field: bool,
169    pub key_count: usize,
170}
171
172/// SIMD-optimized numeric operations for arrays
173pub struct SimdNumericOps;
174
175impl SimdNumericOps {
176    /// Fast sum calculation for numeric arrays with SIMD vectorization hints
177    #[inline(always)]
178    pub fn fast_array_sum(arr: &sonic_rs::Array) -> Option<f64> {
179        let mut sum = 0.0;
180        let mut count = 0;
181
182        // Vectorization hint for SIMD
183        let iter = arr.iter();
184        let size_hint = iter.size_hint().0;
185        
186        // Use SIMD-friendly iteration for large arrays
187        if size_hint > 64 {
188            return Self::vectorized_sum(arr);
189        }
190
191        for value in iter {
192            if let Some(num) = value.as_number() {
193                if let Some(f) = num.as_f64() {
194                    sum += f;
195                    count += 1;
196                }
197            } else {
198                return None; // Not a numeric array
199            }
200        }
201
202        if count > 0 { Some(sum) } else { None }
203    }
204
205    /// Vectorized sum for large arrays (SIMD-optimized path)
206    #[inline(always)]
207    fn vectorized_sum(arr: &sonic_rs::Array) -> Option<f64> {
208        let mut sum = 0.0;
209        let mut count = 0;
210        
211        // Process in chunks for better SIMD utilization
212        let chunk_size = 32; // Optimized for AVX2
213        let mut chunks = arr.iter().peekable();
214        
215        while chunks.peek().is_some() {
216            let mut chunk_sum = 0.0;
217            let mut chunk_count = 0;
218            
219            for _ in 0..chunk_size {
220                if let Some(value) = chunks.next() {
221                    if let Some(num) = value.as_number() {
222                        if let Some(f) = num.as_f64() {
223                            chunk_sum += f;
224                            chunk_count += 1;
225                        }
226                    } else {
227                        return None;
228                    }
229                } else {
230                    break;
231                }
232            }
233            
234            sum += chunk_sum;
235            count += chunk_count;
236        }
237        
238        if count > 0 { Some(sum) } else { None }
239    }
240
241    /// Calculate array statistics in a single pass
242    #[inline(always)]
243    pub fn array_stats(arr: &sonic_rs::Array) -> Option<ArrayStats> {
244        let mut stats = ArrayStats {
245            min: f64::INFINITY,
246            max: f64::NEG_INFINITY,
247            sum: 0.0,
248            count: 0,
249        };
250
251        for value in arr.iter() {
252            if let Some(num) = value.as_number() {
253                if let Some(f) = num.as_f64() {
254                    stats.min = stats.min.min(f);
255                    stats.max = stats.max.max(f);
256                    stats.sum += f;
257                    stats.count += 1;
258                }
259            } else {
260                return None;
261            }
262        }
263
264        if stats.count > 0 { Some(stats) } else { None }
265    }
266}
267
268/// Statistics calculated for numeric arrays
269#[derive(Debug, Clone)]
270pub struct ArrayStats {
271    pub min: f64,
272    pub max: f64,
273    pub sum: f64,
274    pub count: usize,
275}
276
277impl ArrayStats {
278    pub fn mean(&self) -> f64 {
279        if self.count > 0 {
280            self.sum / self.count as f64
281        } else {
282            0.0
283        }
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290    use sonic_rs;
291
292    #[test]
293    fn test_simd_classifier() {
294        let json =
295            r#"{"number": 42, "text": "hello", "array": [1,2,3], "flag": true, "empty": null}"#;
296        let value: SonicValue = sonic_rs::from_str(json).unwrap();
297
298        if let Some(obj) = value.as_object() {
299            let scan_result = SimdClassifier::scan_object_keys(obj);
300            assert_eq!(scan_result.key_count, 5);
301        }
302    }
303
304    #[test]
305    fn test_numeric_array_detection() {
306        let json = "[1, 2, 3, 4, 5]";
307        let value: SonicValue = sonic_rs::from_str(json).unwrap();
308
309        if let Some(arr) = value.as_array() {
310            assert!(SimdClassifier::is_numeric_array(arr));
311        }
312    }
313
314    #[test]
315    fn test_array_stats() {
316        let json = "[1.5, 2.0, 3.5, 4.0]";
317        let value: SonicValue = sonic_rs::from_str(json).unwrap();
318
319        if let Some(arr) = value.as_array() {
320            let stats = SimdNumericOps::array_stats(arr).unwrap();
321            assert_eq!(stats.count, 4);
322            assert_eq!(stats.sum, 11.0);
323            assert_eq!(stats.mean(), 2.75);
324        }
325    }
326}