pjson_rs/parser/simd/
mod.rs

1//! SIMD-accelerated JSON parsing optimizations
2//!
3//! This module provides vectorized operations for JSON parsing hot paths
4//! using CPU SIMD instructions for maximum performance.
5
6#[allow(unused_imports)] // JsonContainerTrait is used in methods but not detected by clippy
7use sonic_rs::{JsonContainerTrait, JsonNumberTrait, JsonValueTrait, Value as SonicValue};
8
9/// SIMD-optimized JSON value classification
10pub struct SimdClassifier;
11
12impl SimdClassifier {
13    /// Fast classification of JSON value types using SIMD when possible
14    #[inline(always)]
15    pub fn classify_value_type(value: &SonicValue) -> ValueClass {
16        // Use sonic-rs built-in type checking which is already SIMD-optimized
17        if value.is_number() {
18            if let Some(num) = value.as_number() {
19                if num.is_i64() {
20                    ValueClass::Integer
21                } else if num.is_u64() {
22                    ValueClass::UnsignedInteger
23                } else {
24                    ValueClass::Float
25                }
26            } else {
27                ValueClass::Float
28            }
29        } else if value.is_str() {
30            ValueClass::String
31        } else if value.is_array() {
32            ValueClass::Array
33        } else if value.is_object() {
34            ValueClass::Object
35        } else if value.as_bool().is_some() {
36            ValueClass::Boolean
37        } else {
38            ValueClass::Null
39        }
40    }
41
42    /// Fast numeric array detection with SIMD-friendly iteration
43    #[inline(always)]
44    pub fn is_numeric_array(arr: &sonic_rs::Array) -> bool {
45        if arr.len() < 3 {
46            return false;
47        }
48
49        // Vectorized check using sonic-rs optimized iteration
50        arr.iter().all(|v| v.is_number())
51    }
52
53    /// Fast string length calculation for arrays (SIMD-optimized)
54    #[inline(always)]
55    pub fn calculate_total_string_length(arr: &sonic_rs::Array) -> usize {
56        let size_hint = arr.len();
57
58        if size_hint > 32 {
59            // SIMD-friendly batch processing for large arrays
60            Self::vectorized_string_length_sum(arr)
61        } else {
62            // Simple iteration for small arrays
63            arr.iter().filter_map(|v| v.as_str()).map(|s| s.len()).sum()
64        }
65    }
66
67    /// Vectorized string length calculation for large arrays
68    #[inline(always)]
69    fn vectorized_string_length_sum(arr: &sonic_rs::Array) -> usize {
70        const CHUNK_SIZE: usize = 16; // Optimized for SIMD
71        let mut total_length = 0;
72
73        // Process in SIMD-friendly chunks
74        for chunk_start in (0..arr.len()).step_by(CHUNK_SIZE) {
75            let chunk_end = (chunk_start + CHUNK_SIZE).min(arr.len());
76            let mut chunk_length = 0;
77
78            for i in chunk_start..chunk_end {
79                if let Some(string_val) = arr.get(i).and_then(|v| v.as_str()) {
80                    chunk_length += string_val.len();
81                }
82            }
83
84            total_length += chunk_length;
85        }
86
87        total_length
88    }
89
90    /// SIMD-optimized object key scanning
91    #[inline(always)]
92    pub fn scan_object_keys(obj: &sonic_rs::Object) -> KeyScanResult {
93        let mut result = KeyScanResult {
94            has_timestamp: false,
95            has_coordinates: false,
96            has_type_field: false,
97            key_count: obj.len(),
98        };
99
100        if obj.len() > 16 {
101            // Use vectorized key scanning for large objects
102            return Self::vectorized_key_scan(obj, result);
103        }
104
105        // Optimized key scanning using sonic-rs iterator for small objects
106        for (key, _) in obj.iter() {
107            match key.as_bytes() {
108                b"timestamp" | b"time" => result.has_timestamp = true,
109                b"coordinates" | b"coord" => result.has_coordinates = true,
110                b"type" => result.has_type_field = true,
111                _ => {}
112            }
113        }
114
115        result
116    }
117
118    /// Vectorized key scanning for large objects
119    #[inline(always)]
120    fn vectorized_key_scan(obj: &sonic_rs::Object, mut result: KeyScanResult) -> KeyScanResult {
121        // SIMD-friendly key pattern matching with static patterns
122        const TARGET_KEYS: &[&[u8]] = &[b"timestamp", b"time", b"coordinates", b"coord", b"type"];
123
124        for (key, _) in obj.iter() {
125            let key_bytes = key.as_bytes();
126
127            // Fast byte-wise comparison optimized for SIMD
128            for &target in TARGET_KEYS {
129                if key_bytes.len() == target.len() && key_bytes == target {
130                    match target {
131                        b"timestamp" | b"time" => result.has_timestamp = true,
132                        b"coordinates" | b"coord" => result.has_coordinates = true,
133                        b"type" => result.has_type_field = true,
134                        _ => {}
135                    }
136                }
137            }
138        }
139
140        result
141    }
142}
143
144/// JSON value classification for fast type determination
145#[derive(Debug, Clone, Copy, PartialEq)]
146pub enum ValueClass {
147    Object,
148    Array,
149    String,
150    Integer,
151    UnsignedInteger,
152    Float,
153    Boolean,
154    Null,
155}
156
157/// Result of SIMD object key scanning
158#[derive(Debug, Default)]
159pub struct KeyScanResult {
160    pub has_timestamp: bool,
161    pub has_coordinates: bool,
162    pub has_type_field: bool,
163    pub key_count: usize,
164}
165
166/// SIMD-optimized numeric operations for arrays
167pub struct SimdNumericOps;
168
169impl SimdNumericOps {
170    /// Fast sum calculation for numeric arrays with SIMD vectorization hints
171    #[inline(always)]
172    pub fn fast_array_sum(arr: &sonic_rs::Array) -> Option<f64> {
173        let mut sum = 0.0;
174        let mut count = 0;
175
176        // Vectorization hint for SIMD
177        let iter = arr.iter();
178        let size_hint = iter.size_hint().0;
179
180        // Use SIMD-friendly iteration for large arrays
181        if size_hint > 64 {
182            return Self::vectorized_sum(arr);
183        }
184
185        for value in iter {
186            if let Some(num) = value.as_number() {
187                if let Some(f) = num.as_f64() {
188                    sum += f;
189                    count += 1;
190                }
191            } else {
192                return None; // Not a numeric array
193            }
194        }
195
196        if count > 0 { Some(sum) } else { None }
197    }
198
199    /// Vectorized sum for large arrays (SIMD-optimized path)
200    #[inline(always)]
201    fn vectorized_sum(arr: &sonic_rs::Array) -> Option<f64> {
202        let mut sum = 0.0;
203        let mut count = 0;
204
205        // Process in chunks for better SIMD utilization
206        let chunk_size = 32; // Optimized for AVX2
207        let mut chunks = arr.iter().peekable();
208
209        while chunks.peek().is_some() {
210            let mut chunk_sum = 0.0;
211            let mut chunk_count = 0;
212
213            for _ in 0..chunk_size {
214                if let Some(value) = chunks.next() {
215                    if let Some(num) = value.as_number() {
216                        if let Some(f) = num.as_f64() {
217                            chunk_sum += f;
218                            chunk_count += 1;
219                        }
220                    } else {
221                        return None;
222                    }
223                } else {
224                    break;
225                }
226            }
227
228            sum += chunk_sum;
229            count += chunk_count;
230        }
231
232        if count > 0 { Some(sum) } else { None }
233    }
234
235    /// Calculate array statistics in a single pass
236    #[inline(always)]
237    pub fn array_stats(arr: &sonic_rs::Array) -> Option<ArrayStats> {
238        let mut stats = ArrayStats {
239            min: f64::INFINITY,
240            max: f64::NEG_INFINITY,
241            sum: 0.0,
242            count: 0,
243        };
244
245        for value in arr.iter() {
246            if let Some(num) = value.as_number() {
247                if let Some(f) = num.as_f64() {
248                    stats.min = stats.min.min(f);
249                    stats.max = stats.max.max(f);
250                    stats.sum += f;
251                    stats.count += 1;
252                }
253            } else {
254                return None;
255            }
256        }
257
258        if stats.count > 0 { Some(stats) } else { None }
259    }
260}
261
262/// Statistics calculated for numeric arrays
263#[derive(Debug, Clone)]
264pub struct ArrayStats {
265    pub min: f64,
266    pub max: f64,
267    pub sum: f64,
268    pub count: usize,
269}
270
271impl ArrayStats {
272    pub fn mean(&self) -> f64 {
273        if self.count > 0 {
274            self.sum / self.count as f64
275        } else {
276            0.0
277        }
278    }
279}
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284    use sonic_rs;
285
286    #[test]
287    fn test_simd_classifier() {
288        let json =
289            r#"{"number": 42, "text": "hello", "array": [1,2,3], "flag": true, "empty": null}"#;
290        let value: SonicValue = sonic_rs::from_str(json).unwrap();
291
292        if let Some(obj) = value.as_object() {
293            let scan_result = SimdClassifier::scan_object_keys(obj);
294            assert_eq!(scan_result.key_count, 5);
295        }
296    }
297
298    #[test]
299    fn test_numeric_array_detection() {
300        let json = "[1, 2, 3, 4, 5]";
301        let value: SonicValue = sonic_rs::from_str(json).unwrap();
302
303        if let Some(arr) = value.as_array() {
304            assert!(SimdClassifier::is_numeric_array(arr));
305        }
306    }
307
308    #[test]
309    fn test_array_stats() {
310        let json = "[1.5, 2.0, 3.5, 4.0]";
311        let value: SonicValue = sonic_rs::from_str(json).unwrap();
312
313        if let Some(arr) = value.as_array() {
314            let stats = SimdNumericOps::array_stats(arr).unwrap();
315            assert_eq!(stats.count, 4);
316            assert_eq!(stats.sum, 11.0);
317            assert_eq!(stats.mean(), 2.75);
318        }
319    }
320}