Skip to main content

pjson_rs/parser/simd/
mod.rs

1//! SIMD-accelerated JSON parsing optimizations
2//!
3//! This module provides vectorized operations for JSON parsing hot paths
4//! using CPU SIMD instructions for maximum performance.
5
6#[allow(unused_imports)] // JsonContainerTrait is used in methods but not detected by clippy
7use sonic_rs::{JsonContainerTrait, JsonNumberTrait, JsonValueTrait, Value as SonicValue};
8
9/// SIMD-optimized JSON value classification
10pub struct SimdClassifier;
11
12impl SimdClassifier {
13    /// Fast classification of JSON value types using SIMD when possible
14    #[inline(always)]
15    pub fn classify_value_type(value: &SonicValue) -> ValueClass {
16        // Use sonic-rs built-in type checking which is already SIMD-optimized
17        if value.is_number() {
18            if let Some(num) = value.as_number() {
19                if num.is_i64() {
20                    ValueClass::Integer
21                } else if num.is_u64() {
22                    ValueClass::UnsignedInteger
23                } else {
24                    ValueClass::Float
25                }
26            } else {
27                ValueClass::Float
28            }
29        } else if value.is_str() {
30            ValueClass::String
31        } else if value.is_array() {
32            ValueClass::Array
33        } else if value.is_object() {
34            ValueClass::Object
35        } else if value.as_bool().is_some() {
36            ValueClass::Boolean
37        } else {
38            ValueClass::Null
39        }
40    }
41
42    /// Fast numeric array detection with SIMD-friendly iteration
43    #[inline(always)]
44    pub fn is_numeric_array(arr: &sonic_rs::Array) -> bool {
45        if arr.len() < 3 {
46            return false;
47        }
48
49        // Vectorized check using sonic-rs optimized iteration
50        arr.iter().all(|v| v.is_number())
51    }
52
53    /// Fast string length calculation for arrays (SIMD-optimized)
54    #[inline(always)]
55    pub fn calculate_total_string_length(arr: &sonic_rs::Array) -> usize {
56        let size_hint = arr.len();
57
58        if size_hint > 32 {
59            // SIMD-friendly batch processing for large arrays
60            Self::vectorized_string_length_sum(arr)
61        } else {
62            // Simple iteration for small arrays
63            arr.iter().filter_map(|v| v.as_str()).map(|s| s.len()).sum()
64        }
65    }
66
67    /// Vectorized string length calculation for large arrays
68    #[inline(always)]
69    fn vectorized_string_length_sum(arr: &sonic_rs::Array) -> usize {
70        const CHUNK_SIZE: usize = 16; // Optimized for SIMD
71        let mut total_length = 0;
72
73        // Process in SIMD-friendly chunks
74        for chunk_start in (0..arr.len()).step_by(CHUNK_SIZE) {
75            let chunk_end = (chunk_start + CHUNK_SIZE).min(arr.len());
76            let mut chunk_length = 0;
77
78            for i in chunk_start..chunk_end {
79                if let Some(string_val) = arr.get(i).and_then(|v| v.as_str()) {
80                    chunk_length += string_val.len();
81                }
82            }
83
84            total_length += chunk_length;
85        }
86
87        total_length
88    }
89
90    /// SIMD-optimized object key scanning
91    #[inline(always)]
92    pub fn scan_object_keys(obj: &sonic_rs::Object) -> KeyScanResult {
93        let mut result = KeyScanResult {
94            has_timestamp: false,
95            has_coordinates: false,
96            has_type_field: false,
97            key_count: obj.len(),
98        };
99
100        if obj.len() > 16 {
101            // Use vectorized key scanning for large objects
102            return Self::vectorized_key_scan(obj, result);
103        }
104
105        // Optimized key scanning using sonic-rs iterator for small objects
106        for (key, _) in obj.iter() {
107            match key.as_bytes() {
108                b"timestamp" | b"time" => result.has_timestamp = true,
109                b"coordinates" | b"coord" => result.has_coordinates = true,
110                b"type" => result.has_type_field = true,
111                _ => {}
112            }
113        }
114
115        result
116    }
117
118    /// Vectorized key scanning for large objects
119    #[inline(always)]
120    fn vectorized_key_scan(obj: &sonic_rs::Object, mut result: KeyScanResult) -> KeyScanResult {
121        // SIMD-friendly key pattern matching with static patterns
122        const TARGET_KEYS: &[&[u8]] = &[b"timestamp", b"time", b"coordinates", b"coord", b"type"];
123
124        for (key, _) in obj.iter() {
125            let key_bytes = key.as_bytes();
126
127            // Fast byte-wise comparison optimized for SIMD
128            for &target in TARGET_KEYS {
129                if key_bytes.len() == target.len() && key_bytes == target {
130                    match target {
131                        b"timestamp" | b"time" => result.has_timestamp = true,
132                        b"coordinates" | b"coord" => result.has_coordinates = true,
133                        b"type" => result.has_type_field = true,
134                        _ => {}
135                    }
136                }
137            }
138        }
139
140        result
141    }
142}
143
144/// JSON value classification for fast type determination
145#[derive(Debug, Clone, Copy, PartialEq)]
146pub enum ValueClass {
147    /// JSON object.
148    Object,
149    /// JSON array.
150    Array,
151    /// JSON string.
152    String,
153    /// Signed integer that fits into `i64`.
154    Integer,
155    /// Unsigned integer that fits into `u64`.
156    UnsignedInteger,
157    /// Floating-point number.
158    Float,
159    /// JSON boolean.
160    Boolean,
161    /// JSON null.
162    Null,
163}
164
165/// Result of SIMD object key scanning
166#[derive(Debug, Default)]
167pub struct KeyScanResult {
168    /// Object contained a `timestamp`/`time` key.
169    pub has_timestamp: bool,
170    /// Object contained a `coordinates`/`coord` key.
171    pub has_coordinates: bool,
172    /// Object contained a `type` key.
173    pub has_type_field: bool,
174    /// Total number of keys observed in the object.
175    pub key_count: usize,
176}
177
178/// SIMD-optimized numeric operations for arrays
179pub struct SimdNumericOps;
180
181impl SimdNumericOps {
182    /// Fast sum calculation for numeric arrays with SIMD vectorization hints
183    #[inline(always)]
184    pub fn fast_array_sum(arr: &sonic_rs::Array) -> Option<f64> {
185        let mut sum = 0.0;
186        let mut count = 0;
187
188        // Vectorization hint for SIMD
189        let iter = arr.iter();
190        let size_hint = iter.size_hint().0;
191
192        // Use SIMD-friendly iteration for large arrays
193        if size_hint > 64 {
194            return Self::vectorized_sum(arr);
195        }
196
197        for value in iter {
198            let num = value.as_number()?;
199            if let Some(f) = num.as_f64() {
200                sum += f;
201                count += 1;
202            }
203        }
204
205        if count > 0 { Some(sum) } else { None }
206    }
207
208    /// Vectorized sum for large arrays (SIMD-optimized path)
209    #[inline(always)]
210    fn vectorized_sum(arr: &sonic_rs::Array) -> Option<f64> {
211        let mut sum = 0.0;
212        let mut count = 0;
213
214        // Process in chunks for better SIMD utilization
215        let chunk_size = 32; // Optimized for AVX2
216        let mut chunks = arr.iter().peekable();
217
218        while chunks.peek().is_some() {
219            let mut chunk_sum = 0.0;
220            let mut chunk_count = 0;
221
222            for _ in 0..chunk_size {
223                if let Some(value) = chunks.next() {
224                    let num = value.as_number()?;
225                    if let Some(f) = num.as_f64() {
226                        chunk_sum += f;
227                        chunk_count += 1;
228                    }
229                } else {
230                    break;
231                }
232            }
233
234            sum += chunk_sum;
235            count += chunk_count;
236        }
237
238        if count > 0 { Some(sum) } else { None }
239    }
240
241    /// Calculate array statistics in a single pass
242    #[inline(always)]
243    pub fn array_stats(arr: &sonic_rs::Array) -> Option<ArrayStats> {
244        let mut stats = ArrayStats {
245            min: f64::INFINITY,
246            max: f64::NEG_INFINITY,
247            sum: 0.0,
248            count: 0,
249        };
250
251        for value in arr.iter() {
252            let num = value.as_number()?;
253            if let Some(f) = num.as_f64() {
254                stats.min = stats.min.min(f);
255                stats.max = stats.max.max(f);
256                stats.sum += f;
257                stats.count += 1;
258            }
259        }
260
261        if stats.count > 0 { Some(stats) } else { None }
262    }
263}
264
265/// Statistics calculated for numeric arrays
266#[derive(Debug, Clone)]
267pub struct ArrayStats {
268    /// Smallest value observed.
269    pub min: f64,
270    /// Largest value observed.
271    pub max: f64,
272    /// Sum of all observed values.
273    pub sum: f64,
274    /// Number of numeric values observed.
275    pub count: usize,
276}
277
278impl ArrayStats {
279    /// Arithmetic mean of the observed values, or `0.0` when `count` is zero.
280    pub fn mean(&self) -> f64 {
281        if self.count > 0 {
282            self.sum / self.count as f64
283        } else {
284            0.0
285        }
286    }
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292    use sonic_rs;
293
294    #[test]
295    fn test_simd_classifier() {
296        let json =
297            r#"{"number": 42, "text": "hello", "array": [1,2,3], "flag": true, "empty": null}"#;
298        let value: SonicValue = sonic_rs::from_str(json).unwrap();
299
300        if let Some(obj) = value.as_object() {
301            let scan_result = SimdClassifier::scan_object_keys(obj);
302            assert_eq!(scan_result.key_count, 5);
303        }
304    }
305
306    #[test]
307    fn test_numeric_array_detection() {
308        let json = "[1, 2, 3, 4, 5]";
309        let value: SonicValue = sonic_rs::from_str(json).unwrap();
310
311        if let Some(arr) = value.as_array() {
312            assert!(SimdClassifier::is_numeric_array(arr));
313        }
314    }
315
316    #[test]
317    fn test_array_stats() {
318        let json = "[1.5, 2.0, 3.5, 4.0]";
319        let value: SonicValue = sonic_rs::from_str(json).unwrap();
320
321        if let Some(arr) = value.as_array() {
322            let stats = SimdNumericOps::array_stats(arr).unwrap();
323            assert_eq!(stats.count, 4);
324            assert_eq!(stats.sum, 11.0);
325            assert_eq!(stats.mean(), 2.75);
326        }
327    }
328}