pjson_rs/parser/
value.rs

1//! Lazy JSON value types for zero-copy parsing
2
3use crate::parser::scanner::{Range, ScanResult};
4use crate::{Error, Result};
5use smallvec::SmallVec;
6
7/// Zero-copy JSON value representation
8#[derive(Debug, Clone)]
9pub enum JsonValue<'a> {
10    /// Raw bytes slice (not parsed yet)
11    Raw(&'a [u8]),
12    /// Parsed string (zero-copy)
13    String(&'a str),
14    /// Number stored as bytes for lazy parsing
15    Number(&'a [u8]),
16    /// Boolean value
17    Bool(bool),
18    /// Null value
19    Null,
20    /// Array with lazy evaluation
21    Array(LazyArray<'a>),
22    /// Object with lazy evaluation
23    Object(LazyObject<'a>),
24}
25
26/// Lazy array that parses elements on-demand
27#[derive(Debug, Clone)]
28pub struct LazyArray<'a> {
29    /// Raw JSON bytes
30    raw: &'a [u8],
31    /// Pre-computed element boundaries using SIMD scanning
32    boundaries: SmallVec<[Range; 32]>,
33    /// Cache for parsed elements
34    cache: std::collections::HashMap<usize, JsonValue<'a>>,
35}
36
37/// Lazy object that parses fields on-demand
38#[derive(Debug, Clone)]
39pub struct LazyObject<'a> {
40    /// Raw JSON bytes
41    raw: &'a [u8],
42    /// Pre-computed key-value boundaries
43    fields: SmallVec<[FieldRange; 16]>,
44    /// Cache for parsed fields
45    cache: std::collections::HashMap<String, JsonValue<'a>>,
46}
47
48/// Field boundary information
49#[derive(Debug, Clone)]
50pub struct FieldRange {
51    /// Key range (without quotes)
52    key: Range,
53    /// Value range
54    value: Range,
55}
56
57impl<'a> JsonValue<'a> {
58    /// Get value as string if it's a string type
59    pub fn as_str(&self) -> Option<&str> {
60        match self {
61            JsonValue::String(s) => Some(s),
62            _ => None,
63        }
64    }
65
66    /// Get value as f64 if it's a number
67    pub fn as_f64(&self) -> Option<f64> {
68        match self {
69            JsonValue::Number(bytes) => std::str::from_utf8(bytes).ok()?.parse().ok(),
70            _ => None,
71        }
72    }
73
74    /// Get value as i64 if it's an integer number
75    pub fn as_i64(&self) -> Option<i64> {
76        match self {
77            JsonValue::Number(bytes) => std::str::from_utf8(bytes).ok()?.parse().ok(),
78            _ => None,
79        }
80    }
81
82    /// Get value as bool if it's a boolean
83    pub fn as_bool(&self) -> Option<bool> {
84        match self {
85            JsonValue::Bool(b) => Some(*b),
86            _ => None,
87        }
88    }
89
90    /// Check if value is null
91    pub fn is_null(&self) -> bool {
92        matches!(self, JsonValue::Null)
93    }
94
95    /// Get value as array
96    pub fn as_array(&self) -> Option<&LazyArray<'a>> {
97        match self {
98            JsonValue::Array(arr) => Some(arr),
99            _ => None,
100        }
101    }
102
103    /// Get value as object
104    pub fn as_object(&self) -> Option<&LazyObject<'a>> {
105        match self {
106            JsonValue::Object(obj) => Some(obj),
107            _ => None,
108        }
109    }
110
111    /// Force parse raw bytes into structured value
112    pub fn parse_raw(&mut self) -> Result<()> {
113        match self {
114            JsonValue::Raw(_bytes) => {
115                // This would use the main parser to parse the raw bytes
116                // For now, we'll leave this as a placeholder
117                *self = JsonValue::Null; // Simplified
118                Ok(())
119            }
120            _ => Ok(()),
121        }
122    }
123}
124
125impl<'a> LazyArray<'a> {
126    /// Create new lazy array from scan result
127    pub fn from_scan(raw: &'a [u8], scan_result: ScanResult) -> Self {
128        // Extract array element boundaries from scan result
129        let boundaries = Self::extract_element_boundaries(raw, &scan_result);
130
131        Self {
132            raw,
133            boundaries,
134            cache: std::collections::HashMap::new(),
135        }
136    }
137
138    /// Get array length
139    pub fn len(&self) -> usize {
140        self.boundaries.len()
141    }
142
143    /// Check if array is empty
144    pub fn is_empty(&self) -> bool {
145        self.boundaries.is_empty()
146    }
147
148    /// Get element at index (simplified - returns raw bytes)
149    pub fn get(&self, index: usize) -> Option<&'a [u8]> {
150        if index >= self.boundaries.len() {
151            return None;
152        }
153
154        let range = self.boundaries[index];
155        Some(&self.raw[range.start..range.end])
156    }
157
158    /// Get element at index, parsing if necessary (simplified)
159    pub fn get_parsed(&self, index: usize) -> Option<JsonValue<'a>> {
160        self.get(index).map(JsonValue::Raw)
161    }
162
163    /// Iterator over array elements (lazy)
164    pub fn iter(&'a self) -> LazyArrayIter<'a> {
165        LazyArrayIter {
166            array: self,
167            index: 0,
168        }
169    }
170
171    /// Extract element boundaries from structural analysis
172    fn extract_element_boundaries(_raw: &[u8], _scan_result: &ScanResult) -> SmallVec<[Range; 32]> {
173        // This would analyze the structural characters to find array element boundaries
174        // For now, return empty boundaries as placeholder
175        SmallVec::new()
176    }
177
178    /// Check if this appears to be a numeric array for SIMD optimization
179    pub fn is_numeric(&self) -> bool {
180        // Heuristic: check first few elements
181        self.boundaries.len() > 4
182            && self.boundaries.iter().take(3).all(|range| {
183                let slice = &self.raw[range.start..range.end];
184                self.looks_like_number(slice)
185            })
186    }
187
188    fn looks_like_number(&self, bytes: &[u8]) -> bool {
189        if bytes.is_empty() {
190            return false;
191        }
192
193        bytes.iter().all(|&b| {
194            b.is_ascii_digit() || b == b'.' || b == b'-' || b == b'+' || b == b'e' || b == b'E'
195        })
196    }
197}
198
199impl<'a> LazyObject<'a> {
200    /// Create new lazy object from scan result
201    pub fn from_scan(raw: &'a [u8], scan_result: ScanResult) -> Self {
202        let fields = Self::extract_field_boundaries(raw, &scan_result);
203
204        Self {
205            raw,
206            fields,
207            cache: std::collections::HashMap::new(),
208        }
209    }
210
211    /// Get number of fields
212    pub fn len(&self) -> usize {
213        self.fields.len()
214    }
215
216    /// Check if object is empty
217    pub fn is_empty(&self) -> bool {
218        self.fields.is_empty()
219    }
220
221    /// Get field value by key (simplified)
222    pub fn get(&self, key: &str) -> Option<&'a [u8]> {
223        // Find field by key
224        let field_range = self.fields.iter().find(|field| {
225            let key_bytes = &self.raw[field.key.start..field.key.end];
226            std::str::from_utf8(key_bytes) == Ok(key)
227        })?;
228
229        // Return value bytes
230        Some(&self.raw[field_range.value.start..field_range.value.end])
231    }
232
233    /// Get all field keys
234    pub fn keys(&self) -> Result<Vec<&str>> {
235        self.fields
236            .iter()
237            .map(|field| {
238                let key_bytes = &self.raw[field.key.start..field.key.end];
239                std::str::from_utf8(key_bytes).map_err(Error::from)
240            })
241            .collect()
242    }
243
244    /// Extract field boundaries from structural analysis
245    fn extract_field_boundaries(
246        _raw: &[u8],
247        _scan_result: &ScanResult,
248    ) -> SmallVec<[FieldRange; 16]> {
249        // This would analyze the structural characters to find object field boundaries
250        // For now, return empty fields as placeholder
251        SmallVec::new()
252    }
253}
254
255/// Iterator for lazy array elements
256pub struct LazyArrayIter<'a> {
257    array: &'a LazyArray<'a>,
258    index: usize,
259}
260
261impl<'a> Iterator for LazyArrayIter<'a> {
262    type Item = &'a [u8]; // Raw element bytes
263
264    fn next(&mut self) -> Option<Self::Item> {
265        if self.index >= self.array.boundaries.len() {
266            return None;
267        }
268
269        let range = self.array.boundaries[self.index];
270        self.index += 1;
271
272        Some(&self.array.raw[range.start..range.end])
273    }
274}
275
276impl FieldRange {
277    /// Create new field range
278    pub fn new(key: Range, value: Range) -> Self {
279        Self { key, value }
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    #[test]
288    fn test_json_value_types() {
289        let val = JsonValue::String("hello");
290        assert_eq!(val.as_str(), Some("hello"));
291        assert!(val.as_f64().is_none());
292    }
293
294    #[test]
295    fn test_lazy_array_creation() {
296        let raw = b"[1, 2, 3]";
297        let scan_result = ScanResult::new();
298        let array = LazyArray::from_scan(raw, scan_result);
299
300        assert_eq!(array.len(), 0); // Empty boundaries in placeholder
301    }
302
303    #[test]
304    fn test_number_detection() {
305        let raw = b"[1.0, 2.5, 3.14]";
306        let scan_result = ScanResult::new();
307        let array = LazyArray::from_scan(raw, scan_result);
308
309        assert!(array.looks_like_number(b"123.45"));
310        assert!(!array.looks_like_number(b"\"string\""));
311    }
312}