pjson_rs/parser/
value.rs

1//! Lazy JSON value types for zero-copy parsing
2
3use crate::parser::scanner::{Range, ScanResult};
4use crate::{Error, Result};
5use smallvec::SmallVec;
6
7/// Zero-copy JSON value representation
8#[derive(Debug, Clone)]
9pub enum JsonValue<'a> {
10    /// Raw bytes slice (not parsed yet)
11    Raw(&'a [u8]),
12    /// Parsed string (zero-copy)
13    String(&'a str),
14    /// Number stored as bytes for lazy parsing
15    Number(&'a [u8]),
16    /// Boolean value
17    Bool(bool),
18    /// Null value
19    Null,
20    /// Array with lazy evaluation
21    Array(LazyArray<'a>),
22    /// Object with lazy evaluation
23    Object(LazyObject<'a>),
24}
25
26/// Lazy array that parses elements on-demand
27#[derive(Debug, Clone)]
28pub struct LazyArray<'a> {
29    /// Raw JSON bytes
30    raw: &'a [u8],
31    /// Pre-computed element boundaries using SIMD scanning
32    boundaries: SmallVec<[Range; 32]>,
33    /// Cache for parsed elements
34    #[allow(dead_code)] // Future: caching for repeated element access
35    cache: std::collections::HashMap<usize, JsonValue<'a>>,
36}
37
38/// Lazy object that parses fields on-demand
39#[derive(Debug, Clone)]
40pub struct LazyObject<'a> {
41    /// Raw JSON bytes
42    raw: &'a [u8],
43    /// Pre-computed key-value boundaries
44    fields: SmallVec<[FieldRange; 16]>,
45    /// Cache for parsed fields
46    #[allow(dead_code)] // Future: caching for repeated field access
47    cache: std::collections::HashMap<String, JsonValue<'a>>,
48}
49
50/// Field boundary information
51#[derive(Debug, Clone)]
52pub struct FieldRange {
53    /// Key range (without quotes)
54    key: Range,
55    /// Value range
56    value: Range,
57}
58
59impl<'a> JsonValue<'a> {
60    /// Get value as string if it's a string type
61    pub fn as_str(&self) -> Option<&str> {
62        match self {
63            JsonValue::String(s) => Some(s),
64            _ => None,
65        }
66    }
67
68    /// Get value as f64 if it's a number
69    pub fn as_f64(&self) -> Option<f64> {
70        match self {
71            JsonValue::Number(bytes) => std::str::from_utf8(bytes).ok()?.parse().ok(),
72            _ => None,
73        }
74    }
75
76    /// Get value as i64 if it's an integer number
77    pub fn as_i64(&self) -> Option<i64> {
78        match self {
79            JsonValue::Number(bytes) => std::str::from_utf8(bytes).ok()?.parse().ok(),
80            _ => None,
81        }
82    }
83
84    /// Get value as bool if it's a boolean
85    pub fn as_bool(&self) -> Option<bool> {
86        match self {
87            JsonValue::Bool(b) => Some(*b),
88            _ => None,
89        }
90    }
91
92    /// Check if value is null
93    pub fn is_null(&self) -> bool {
94        matches!(self, JsonValue::Null)
95    }
96
97    /// Get value as array
98    pub fn as_array(&self) -> Option<&LazyArray<'a>> {
99        match self {
100            JsonValue::Array(arr) => Some(arr),
101            _ => None,
102        }
103    }
104
105    /// Get value as object
106    pub fn as_object(&self) -> Option<&LazyObject<'a>> {
107        match self {
108            JsonValue::Object(obj) => Some(obj),
109            _ => None,
110        }
111    }
112
113    /// Force parse raw bytes into structured value
114    pub fn parse_raw(&mut self) -> Result<()> {
115        match self {
116            JsonValue::Raw(_bytes) => {
117                // This would use the main parser to parse the raw bytes
118                // For now, we'll leave this as a placeholder
119                *self = JsonValue::Null; // Simplified
120                Ok(())
121            }
122            _ => Ok(()),
123        }
124    }
125}
126
127impl<'a> LazyArray<'a> {
128    /// Create new lazy array from scan result
129    pub fn from_scan(raw: &'a [u8], scan_result: ScanResult) -> Self {
130        // Extract array element boundaries from scan result
131        let boundaries = Self::extract_element_boundaries(raw, &scan_result);
132
133        Self {
134            raw,
135            boundaries,
136            cache: std::collections::HashMap::new(),
137        }
138    }
139
140    /// Get array length
141    pub fn len(&self) -> usize {
142        self.boundaries.len()
143    }
144
145    /// Check if array is empty
146    pub fn is_empty(&self) -> bool {
147        self.boundaries.is_empty()
148    }
149
150    /// Get element at index (simplified - returns raw bytes)
151    pub fn get(&self, index: usize) -> Option<&'a [u8]> {
152        if index >= self.boundaries.len() {
153            return None;
154        }
155
156        let range = self.boundaries[index];
157        Some(&self.raw[range.start..range.end])
158    }
159
160    /// Get element at index, parsing if necessary (simplified)
161    pub fn get_parsed(&self, index: usize) -> Option<JsonValue<'a>> {
162        self.get(index).map(JsonValue::Raw)
163    }
164
165    /// Iterator over array elements (lazy)
166    pub fn iter(&'a self) -> LazyArrayIter<'a> {
167        LazyArrayIter {
168            array: self,
169            index: 0,
170        }
171    }
172
173    /// Extract element boundaries from structural analysis
174    fn extract_element_boundaries(_raw: &[u8], _scan_result: &ScanResult) -> SmallVec<[Range; 32]> {
175        // This would analyze the structural characters to find array element boundaries
176        // For now, return empty boundaries as placeholder
177        SmallVec::new()
178    }
179
180    /// Check if this appears to be a numeric array for SIMD optimization
181    pub fn is_numeric(&self) -> bool {
182        // Heuristic: check first few elements
183        self.boundaries.len() > 4
184            && self.boundaries.iter().take(3).all(|range| {
185                let slice = &self.raw[range.start..range.end];
186                self.looks_like_number(slice)
187            })
188    }
189
190    fn looks_like_number(&self, bytes: &[u8]) -> bool {
191        if bytes.is_empty() {
192            return false;
193        }
194
195        bytes.iter().all(|&b| {
196            b.is_ascii_digit() || b == b'.' || b == b'-' || b == b'+' || b == b'e' || b == b'E'
197        })
198    }
199}
200
201impl<'a> LazyObject<'a> {
202    /// Create new lazy object from scan result
203    pub fn from_scan(raw: &'a [u8], scan_result: ScanResult) -> Self {
204        let fields = Self::extract_field_boundaries(raw, &scan_result);
205
206        Self {
207            raw,
208            fields,
209            cache: std::collections::HashMap::new(),
210        }
211    }
212
213    /// Get number of fields
214    pub fn len(&self) -> usize {
215        self.fields.len()
216    }
217
218    /// Check if object is empty
219    pub fn is_empty(&self) -> bool {
220        self.fields.is_empty()
221    }
222
223    /// Get field value by key (simplified)
224    pub fn get(&self, key: &str) -> Option<&'a [u8]> {
225        // Find field by key
226        let field_range = self.fields.iter().find(|field| {
227            let key_bytes = &self.raw[field.key.start..field.key.end];
228            std::str::from_utf8(key_bytes) == Ok(key)
229        })?;
230
231        // Return value bytes
232        Some(&self.raw[field_range.value.start..field_range.value.end])
233    }
234
235    /// Get all field keys
236    pub fn keys(&self) -> Result<Vec<&str>> {
237        self.fields
238            .iter()
239            .map(|field| {
240                let key_bytes = &self.raw[field.key.start..field.key.end];
241                std::str::from_utf8(key_bytes).map_err(Error::from)
242            })
243            .collect()
244    }
245
246    /// Extract field boundaries from structural analysis
247    fn extract_field_boundaries(
248        _raw: &[u8],
249        _scan_result: &ScanResult,
250    ) -> SmallVec<[FieldRange; 16]> {
251        // This would analyze the structural characters to find object field boundaries
252        // For now, return empty fields as placeholder
253        SmallVec::new()
254    }
255}
256
257/// Iterator for lazy array elements
258pub struct LazyArrayIter<'a> {
259    array: &'a LazyArray<'a>,
260    index: usize,
261}
262
263impl<'a> Iterator for LazyArrayIter<'a> {
264    type Item = &'a [u8]; // Raw element bytes
265
266    fn next(&mut self) -> Option<Self::Item> {
267        if self.index >= self.array.boundaries.len() {
268            return None;
269        }
270
271        let range = self.array.boundaries[self.index];
272        self.index += 1;
273
274        Some(&self.array.raw[range.start..range.end])
275    }
276}
277
278impl FieldRange {
279    /// Create new field range
280    pub fn new(key: Range, value: Range) -> Self {
281        Self { key, value }
282    }
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn test_json_value_types() {
291        let val = JsonValue::String("hello");
292        assert_eq!(val.as_str(), Some("hello"));
293        assert!(val.as_f64().is_none());
294    }
295
296    #[test]
297    fn test_lazy_array_creation() {
298        let raw = b"[1, 2, 3]";
299        let scan_result = ScanResult::new();
300        let array = LazyArray::from_scan(raw, scan_result);
301
302        assert_eq!(array.len(), 0); // Empty boundaries in placeholder
303    }
304
305    #[test]
306    fn test_number_detection() {
307        let raw = b"[1.0, 2.5, 3.14]";
308        let scan_result = ScanResult::new();
309        let array = LazyArray::from_scan(raw, scan_result);
310
311        assert!(array.looks_like_number(b"123.45"));
312        assert!(!array.looks_like_number(b"\"string\""));
313    }
314}