Skip to main content

pjson_rs/parser/
mod.rs

1//! High-performance JSON parsing module with hybrid approach
2//!
3//! This module provides both SIMD-optimized parsing and serde fallback,
4//! allowing rapid MVP development while building towards maximum performance.
5
6#[cfg(feature = "partial-parse")]
7pub mod partial;
8
9#[cfg(feature = "partial-parse")]
10pub use partial::{
11    JiterConfig, JiterPartialParser, ParseDiagnostic, PartialJsonParser, PartialParseResult,
12    StreamingHint,
13};
14
15pub mod aligned_alloc;
16pub mod buffer_pool;
17pub mod scanner;
18pub mod simd;
19pub mod simple;
20pub mod sonic;
21pub mod value;
22pub mod zero_copy;
23
24pub use aligned_alloc::{AlignedAllocator, aligned_allocator};
25pub use buffer_pool::{
26    BufferPool, BufferSize, PoolConfig, PooledBuffer, SimdType, global_buffer_pool,
27};
28pub use scanner::{JsonScanner, ScanResult, StringLocation};
29pub use simple::{ParseConfig, ParseStats, SimpleParser};
30pub use sonic::{SonicConfig, SonicParser};
31pub use value::{JsonValue, LazyArray, LazyObject};
32pub use zero_copy::{IncrementalParser, LazyJsonValue, LazyParser, MemoryUsage, ZeroCopyParser};
33
34use crate::{Result, SemanticMeta};
35
36/// High-performance hybrid parser with SIMD acceleration
37pub struct Parser {
38    sonic: SonicParser,
39    simple: SimpleParser,
40    use_sonic: bool,
41}
42
43impl Parser {
44    /// Create new parser with default configuration.
45    ///
46    /// Selects the sonic-rs SIMD backend when any `simd-*` Cargo feature is
47    /// enabled (which is the default via `simd-auto`). Without any `simd-*`
48    /// feature, falls back to the portable serde-based parser.
49    pub fn new() -> Self {
50        Self {
51            sonic: SonicParser::new(),
52            simple: SimpleParser::new(),
53            use_sonic: cfg!(pjs_simd),
54        }
55    }
56
57    /// Create parser with custom configuration
58    pub fn with_config(config: ParseConfig) -> Self {
59        let sonic_config = SonicConfig {
60            detect_semantics: config.detect_semantics,
61            max_input_size: config.max_size_mb * 1024 * 1024,
62        };
63
64        Self {
65            sonic: SonicParser::with_config(sonic_config),
66            simple: SimpleParser::with_config(config),
67            use_sonic: cfg!(pjs_simd),
68        }
69    }
70
71    /// Create parser with serde fallback (for compatibility)
72    pub fn with_serde_fallback() -> Self {
73        Self {
74            sonic: SonicParser::new(),
75            simple: SimpleParser::new(),
76            use_sonic: false,
77        }
78    }
79
80    /// Create parser optimized for zero-copy performance
81    pub fn zero_copy_optimized() -> Self {
82        Self {
83            sonic: SonicParser::new(),
84            simple: SimpleParser::new(),
85            use_sonic: false,
86        }
87    }
88
89    /// Parse JSON bytes into PJS Frame using optimal strategy
90    pub fn parse(&self, input: &[u8]) -> Result<crate::Frame> {
91        if self.use_sonic {
92            // Try sonic-rs first for performance
93            match self.sonic.parse(input) {
94                Ok(frame) => Ok(frame),
95                Err(_) => {
96                    // Fallback to serde for compatibility
97                    self.simple.parse(input)
98                }
99            }
100        } else {
101            self.simple.parse(input)
102        }
103    }
104
105    /// Parse with explicit semantic hints
106    pub fn parse_with_semantics(
107        &self,
108        input: &[u8],
109        semantics: &SemanticMeta,
110    ) -> Result<crate::Frame> {
111        if self.use_sonic {
112            // Sonic parser doesn't support explicit semantics yet
113            // Use simple parser for this case
114            self.simple.parse_with_semantics(input, semantics)
115        } else {
116            self.simple.parse_with_semantics(input, semantics)
117        }
118    }
119
120    /// Parse the largest valid JSON prefix from `input`, tolerating truncation.
121    ///
122    /// Delegates to [`JiterPartialParser`] with default configuration.
123    ///
124    /// Returns `Ok(None)` when `consumed == 0` (no structurally complete prefix
125    /// could be recovered — e.g. input `[` or `-`). Returns `Ok(Some(_))` when
126    /// at least one byte was committed.
127    ///
128    /// # Errors
129    ///
130    /// Returns [`crate::error::Error::InvalidJson`] for syntactically invalid
131    /// input (e.g. stray `}`). Returns [`crate::error::Error::Buffer`] when the
132    /// input exceeds the default `max_input_size` (100 MiB).
133    ///
134    /// # Examples
135    ///
136    /// ```rust,no_run
137    /// use pjson_rs::parser::Parser;
138    ///
139    /// let parser = Parser::new();
140    /// let result = parser.parse_partial(b"{\"a\":1,\"b\":[2,3").unwrap();
141    /// assert!(result.is_some());
142    /// ```
143    #[cfg(feature = "partial-parse")]
144    pub fn parse_partial(&self, input: &[u8]) -> crate::Result<Option<PartialParseResult>> {
145        use partial::PartialJsonParser as _;
146        let result = JiterPartialParser::default().parse_partial(input)?;
147        if result.consumed == 0 {
148            Ok(None)
149        } else {
150            Ok(Some(result))
151        }
152    }
153
154    /// Get parser statistics
155    pub fn stats(&self) -> ParseStats {
156        if self.use_sonic {
157            let sonic_stats = self.sonic.get_stats();
158            ParseStats {
159                total_parses: sonic_stats.total_parses,
160                semantic_detections: sonic_stats.sonic_successes,
161                avg_parse_time_ms: sonic_stats.avg_parse_time_ns as f64 / 1_000_000.0,
162            }
163        } else {
164            self.simple.stats()
165        }
166    }
167}
168
169impl Default for Parser {
170    fn default() -> Self {
171        Self::new()
172    }
173}
174
175/// JSON value types for initial classification
176#[derive(Debug, Clone, Copy, PartialEq)]
177pub enum ValueType {
178    /// JSON object.
179    Object,
180    /// JSON array.
181    Array,
182    /// JSON string.
183    String,
184    /// JSON number (integer or float).
185    Number,
186    /// JSON boolean.
187    Boolean,
188    /// JSON null.
189    Null,
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195
196    #[test]
197    fn test_parser_creation() {
198        let parser = Parser::new();
199        assert_eq!(parser.stats().total_parses, 0);
200    }
201
202    #[test]
203    fn test_simple_parsing() {
204        let parser = Parser::new();
205        let input = br#"{"hello": "world"}"#;
206        let result = parser.parse(input);
207        assert!(result.is_ok());
208
209        let frame = result.unwrap();
210        // Simple JSON may not have semantic metadata
211        assert_eq!(frame.payload.len(), input.len());
212    }
213
214    #[test]
215    fn test_numeric_array_parsing() {
216        let parser = Parser::new();
217        let input = b"[1.0, 2.0, 3.0, 4.0]";
218        let result = parser.parse(input);
219        assert!(result.is_ok());
220    }
221
222    #[test]
223    fn test_semantic_parsing() {
224        let parser = Parser::new();
225        let input = b"[1, 2, 3, 4]";
226
227        let semantics = crate::SemanticMeta::new(crate::semantic::SemanticType::NumericArray {
228            dtype: crate::semantic::NumericDType::I32,
229            length: Some(4),
230        });
231
232        let result = parser.parse_with_semantics(input, &semantics);
233        assert!(result.is_ok());
234    }
235
236    #[test]
237    fn test_custom_config() {
238        let config = ParseConfig {
239            detect_semantics: false,
240            max_size_mb: 50,
241            stream_large_arrays: false,
242            stream_threshold: 500,
243        };
244
245        let parser = Parser::with_config(config);
246        let input = br#"{"test": "data"}"#;
247        let result = parser.parse(input);
248        assert!(result.is_ok());
249    }
250}