1use crate::parser::scanner::{Range, ScanResult};
4use crate::{Error, Result};
5use smallvec::SmallVec;
6
7#[derive(Debug, Clone)]
9pub enum JsonValue<'a> {
10 Raw(&'a [u8]),
12 String(&'a str),
14 Number(&'a [u8]),
16 Bool(bool),
18 Null,
20 Array(LazyArray<'a>),
22 Object(LazyObject<'a>),
24}
25
26#[derive(Debug, Clone)]
28pub struct LazyArray<'a> {
29 raw: &'a [u8],
31 boundaries: SmallVec<[Range; 32]>,
33}
34
35#[derive(Debug, Clone)]
37pub struct LazyObject<'a> {
38 raw: &'a [u8],
40 fields: SmallVec<[FieldRange; 16]>,
42}
43
44#[derive(Debug, Clone)]
46pub struct FieldRange {
47 key: Range,
49 value: Range,
51}
52
53impl<'a> JsonValue<'a> {
54 pub fn as_str(&self) -> Option<&str> {
56 match self {
57 JsonValue::String(s) => Some(s),
58 _ => None,
59 }
60 }
61
62 pub fn as_f64(&self) -> Option<f64> {
64 match self {
65 JsonValue::Number(bytes) => std::str::from_utf8(bytes).ok()?.parse().ok(),
66 _ => None,
67 }
68 }
69
70 pub fn as_i64(&self) -> Option<i64> {
72 match self {
73 JsonValue::Number(bytes) => std::str::from_utf8(bytes).ok()?.parse().ok(),
74 _ => None,
75 }
76 }
77
78 pub fn as_bool(&self) -> Option<bool> {
80 match self {
81 JsonValue::Bool(b) => Some(*b),
82 _ => None,
83 }
84 }
85
86 pub fn is_null(&self) -> bool {
88 matches!(self, JsonValue::Null)
89 }
90
91 pub fn as_array(&self) -> Option<&LazyArray<'a>> {
93 match self {
94 JsonValue::Array(arr) => Some(arr),
95 _ => None,
96 }
97 }
98
99 pub fn as_object(&self) -> Option<&LazyObject<'a>> {
101 match self {
102 JsonValue::Object(obj) => Some(obj),
103 _ => None,
104 }
105 }
106
107 pub fn parse_raw(&mut self) -> Result<()> {
139 let bytes = if let JsonValue::Raw(bytes) = self {
140 *bytes
141 } else {
142 return Ok(());
143 };
144
145 let Some(start) = bytes.iter().position(|b| !b.is_ascii_whitespace()) else {
146 return Err(Error::invalid_json(0, "empty input"));
147 };
148 let end = bytes
149 .iter()
150 .rposition(|b| !b.is_ascii_whitespace())
151 .map(|i| i + 1)
152 .unwrap_or(bytes.len());
153 let trimmed = &bytes[start..end];
154
155 *self = match trimmed[0] {
156 b'n' if trimmed == b"null" => JsonValue::Null,
157 b't' if trimmed == b"true" => JsonValue::Bool(true),
158 b'f' if trimmed == b"false" => JsonValue::Bool(false),
159 b'"' => {
160 if trimmed.len() < 2 || trimmed[trimmed.len() - 1] != b'"' {
161 return Err(Error::invalid_json(start, "unterminated string"));
162 }
163 let inner = &trimmed[1..trimmed.len() - 1];
164 if inner.contains(&b'\\') {
165 return Err(Error::invalid_json(
166 start,
167 "escaped strings cannot be represented zero-copy",
168 ));
169 }
170 JsonValue::String(std::str::from_utf8(inner)?)
171 }
172 b'[' => JsonValue::Array(LazyArray::from_scan(trimmed, ScanResult::new())),
173 b'{' => JsonValue::Object(LazyObject::from_scan(trimmed, ScanResult::new())),
174 b'-' | b'0'..=b'9' => JsonValue::Number(trimmed),
175 _ => return Err(Error::invalid_json(start, "unrecognised JSON value")),
176 };
177
178 Ok(())
179 }
180}
181
182impl<'a> LazyArray<'a> {
183 pub fn from_scan(raw: &'a [u8], scan_result: ScanResult) -> Self {
185 let boundaries = Self::extract_element_boundaries(raw, &scan_result);
187
188 Self { raw, boundaries }
189 }
190
191 pub fn len(&self) -> usize {
193 self.boundaries.len()
194 }
195
196 pub fn is_empty(&self) -> bool {
198 self.boundaries.is_empty()
199 }
200
201 pub fn get(&self, index: usize) -> Option<&'a [u8]> {
203 if index >= self.boundaries.len() {
204 return None;
205 }
206
207 let range = self.boundaries[index];
208 Some(&self.raw[range.start..range.end])
209 }
210
211 pub fn get_parsed(&self, index: usize) -> Option<JsonValue<'a>> {
213 self.get(index).map(JsonValue::Raw)
214 }
215
216 pub fn iter(&'a self) -> LazyArrayIter<'a> {
218 LazyArrayIter {
219 array: self,
220 index: 0,
221 }
222 }
223
224 fn extract_element_boundaries(raw: &[u8], _scan_result: &ScanResult) -> SmallVec<[Range; 32]> {
236 let mut result = SmallVec::new();
237 let len = raw.len();
238
239 let mut pos = 0;
241 while pos < len && raw[pos] != b'[' {
242 pos += 1;
243 }
244 if pos == len {
245 return result;
246 }
247 pos += 1; let mut depth: usize = 1;
250 let mut in_string = false;
251 let mut elem_start: Option<usize> = None;
252
253 while pos < len {
254 let b = raw[pos];
255
256 if in_string {
257 if b == b'\\' {
258 pos += 1;
260 } else if b == b'"' {
261 in_string = false;
262 }
263 pos += 1;
264 continue;
265 }
266
267 match b {
268 b'"' => {
269 in_string = true;
270 if elem_start.is_none() {
271 elem_start = Some(pos);
272 }
273 }
274 b'[' | b'{' => {
275 depth += 1;
276 if elem_start.is_none() {
277 elem_start = Some(pos);
278 }
279 }
280 b']' | b'}' => {
281 depth -= 1;
282 if depth == 0 {
283 if let Some(start) = elem_start {
285 let end = trim_end(raw, start, pos);
286 if end > start {
287 result.push(Range::new(start, end));
288 }
289 }
290 break;
291 }
292 }
293 b',' if depth == 1 => {
294 if let Some(start) = elem_start {
296 let end = trim_end(raw, start, pos);
297 if end > start {
298 result.push(Range::new(start, end));
299 }
300 }
301 elem_start = None;
302 }
303 b' ' | b'\t' | b'\n' | b'\r' => {
304 pos += 1;
306 continue;
307 }
308 _ => {
309 if elem_start.is_none() {
310 elem_start = Some(pos);
311 }
312 }
313 }
314 pos += 1;
315 }
316
317 result
318 }
319
320 pub fn is_numeric(&self) -> bool {
322 self.boundaries.len() > 4
324 && self.boundaries.iter().take(3).all(|range| {
325 let slice = &self.raw[range.start..range.end];
326 self.looks_like_number(slice)
327 })
328 }
329
330 fn looks_like_number(&self, bytes: &[u8]) -> bool {
331 if bytes.is_empty() {
332 return false;
333 }
334
335 bytes.iter().all(|&b| {
336 b.is_ascii_digit() || b == b'.' || b == b'-' || b == b'+' || b == b'e' || b == b'E'
337 })
338 }
339}
340
341impl<'a> LazyObject<'a> {
342 pub fn from_scan(raw: &'a [u8], scan_result: ScanResult) -> Self {
344 let fields = Self::extract_field_boundaries(raw, &scan_result);
345
346 Self { raw, fields }
347 }
348
349 pub fn len(&self) -> usize {
351 self.fields.len()
352 }
353
354 pub fn is_empty(&self) -> bool {
356 self.fields.is_empty()
357 }
358
359 pub fn get(&self, key: &str) -> Option<&'a [u8]> {
361 let field_range = self.fields.iter().find(|field| {
363 let key_bytes = &self.raw[field.key.start..field.key.end];
364 std::str::from_utf8(key_bytes) == Ok(key)
365 })?;
366
367 Some(&self.raw[field_range.value.start..field_range.value.end])
369 }
370
371 pub fn keys(&self) -> Result<Vec<&str>> {
373 self.fields
374 .iter()
375 .map(|field| {
376 let key_bytes = &self.raw[field.key.start..field.key.end];
377 std::str::from_utf8(key_bytes).map_err(Error::from)
378 })
379 .collect()
380 }
381
382 fn extract_field_boundaries(
394 raw: &[u8],
395 _scan_result: &ScanResult,
396 ) -> SmallVec<[FieldRange; 16]> {
397 let mut result = SmallVec::new();
398 let len = raw.len();
399
400 let mut pos = 0;
402 while pos < len && raw[pos] != b'{' {
403 pos += 1;
404 }
405 if pos == len {
406 return result;
407 }
408 pos += 1; loop {
411 while pos < len && raw[pos].is_ascii_whitespace() {
413 pos += 1;
414 }
415 if pos >= len || raw[pos] == b'}' {
416 break;
417 }
418 if raw[pos] != b'"' {
419 break;
421 }
422 pos += 1; let key_start = pos;
424 while pos < len && raw[pos] != b'"' {
426 if raw[pos] == b'\\' {
427 pos += 1; }
429 pos += 1;
430 }
431 let key_end = pos;
432 if pos < len {
433 pos += 1; }
435
436 while pos < len && (raw[pos].is_ascii_whitespace() || raw[pos] == b':') {
438 pos += 1;
439 }
440 if pos >= len {
441 break;
442 }
443
444 let value_start = pos;
446 let mut depth: usize = 0;
447 let mut in_str = false;
448
449 while pos < len {
450 let b = raw[pos];
451 if in_str {
452 if b == b'\\' {
453 pos += 1; } else if b == b'"' {
455 in_str = false;
456 if depth == 0 {
457 pos += 1;
458 break;
459 }
460 }
461 pos += 1;
462 continue;
463 }
464 match b {
465 b'"' => {
466 in_str = true;
467 }
468 b'[' | b'{' => depth += 1,
469 b']' | b'}' => {
470 if depth == 0 {
471 break;
473 }
474 depth -= 1;
475 if depth == 0 {
476 pos += 1;
477 break;
478 }
479 }
480 b',' if depth == 0 => {
481 break;
483 }
484 _ => {}
485 }
486 pos += 1;
487 }
488
489 let value_end = trim_end(raw, value_start, pos);
490 if value_end > value_start {
491 result.push(FieldRange::new(
492 Range::new(key_start, key_end),
493 Range::new(value_start, value_end),
494 ));
495 }
496
497 while pos < len && (raw[pos].is_ascii_whitespace() || raw[pos] == b',') {
499 pos += 1;
500 }
501 }
502
503 result
504 }
505}
506
507pub struct LazyArrayIter<'a> {
509 array: &'a LazyArray<'a>,
510 index: usize,
511}
512
513impl<'a> Iterator for LazyArrayIter<'a> {
514 type Item = &'a [u8]; fn next(&mut self) -> Option<Self::Item> {
517 if self.index >= self.array.boundaries.len() {
518 return None;
519 }
520
521 let range = self.array.boundaries[self.index];
522 self.index += 1;
523
524 Some(&self.array.raw[range.start..range.end])
525 }
526}
527
528impl FieldRange {
529 pub fn new(key: Range, value: Range) -> Self {
531 Self { key, value }
532 }
533}
534
535fn trim_end(raw: &[u8], start: usize, end: usize) -> usize {
539 let mut e = end;
540 while e > start && raw[e - 1].is_ascii_whitespace() {
541 e -= 1;
542 }
543 e
544}
545
546#[cfg(test)]
547mod tests {
548 use super::*;
549
550 #[test]
551 fn test_json_value_types() {
552 let val = JsonValue::String("hello");
553 assert_eq!(val.as_str(), Some("hello"));
554 assert!(val.as_f64().is_none());
555 }
556
557 #[test]
558 fn test_lazy_array_creation() {
559 let raw = b"[1, 2, 3]";
560 let scan_result = ScanResult::new();
561 let array = LazyArray::from_scan(raw, scan_result);
562
563 assert_eq!(array.len(), 3);
564 assert_eq!(array.get(0), Some(b"1".as_ref()));
565 assert_eq!(array.get(1), Some(b"2".as_ref()));
566 assert_eq!(array.get(2), Some(b"3".as_ref()));
567 }
568
569 #[test]
570 fn test_lazy_array_empty() {
571 let array = LazyArray::from_scan(b"[]", ScanResult::new());
572 assert_eq!(array.len(), 0);
573 assert!(array.is_empty());
574 }
575
576 #[test]
577 fn test_lazy_array_strings() {
578 let raw = b"[\"hello\", \"world\"]";
579 let array = LazyArray::from_scan(raw, ScanResult::new());
580 assert_eq!(array.len(), 2);
581 assert_eq!(array.get(0), Some(b"\"hello\"".as_ref()));
582 }
583
584 #[test]
585 fn test_lazy_array_nested() {
586 let raw = b"[1, [2, 3], {\"a\": 4}]";
587 let array = LazyArray::from_scan(raw, ScanResult::new());
588 assert_eq!(array.len(), 3);
589 assert_eq!(array.get(0), Some(b"1".as_ref()));
590 assert_eq!(array.get(1), Some(b"[2, 3]".as_ref()));
591 assert_eq!(array.get(2), Some(b"{\"a\": 4}".as_ref()));
592 }
593
594 #[test]
595 fn test_lazy_array_escaped_string() {
596 let raw = br#"["say \"hi\"", "bye"]"#;
597 let array = LazyArray::from_scan(raw, ScanResult::new());
598 assert_eq!(array.len(), 2);
599 }
600
601 #[test]
602 fn test_lazy_object_creation() {
603 let obj = LazyObject::from_scan(b"{\"a\": 1, \"b\": 2}", ScanResult::new());
604 assert_eq!(obj.len(), 2);
605 assert_eq!(obj.get("a"), Some(b"1".as_ref()));
606 assert_eq!(obj.get("b"), Some(b"2".as_ref()));
607 }
608
609 #[test]
610 fn test_lazy_object_empty() {
611 let obj = LazyObject::from_scan(b"{}", ScanResult::new());
612 assert_eq!(obj.len(), 0);
613 assert!(obj.is_empty());
614 }
615
616 #[test]
617 fn test_lazy_object_string_value() {
618 let raw = b"{\"name\": \"alice\"}";
619 let obj = LazyObject::from_scan(raw, ScanResult::new());
620 assert_eq!(obj.len(), 1);
621 assert_eq!(obj.get("name"), Some(b"\"alice\"".as_ref()));
622 }
623
624 #[test]
625 fn test_lazy_object_nested_value() {
626 let raw = b"{\"arr\": [1, 2], \"n\": 42}";
627 let obj = LazyObject::from_scan(raw, ScanResult::new());
628 assert_eq!(obj.len(), 2);
629 assert_eq!(obj.get("arr"), Some(b"[1, 2]".as_ref()));
630 assert_eq!(obj.get("n"), Some(b"42".as_ref()));
631 }
632
633 #[test]
634 fn test_number_detection() {
635 let raw = b"[1.0, 2.5, 3.14]";
636 let scan_result = ScanResult::new();
637 let array = LazyArray::from_scan(raw, scan_result);
638
639 assert!(array.looks_like_number(b"123.45"));
640 assert!(!array.looks_like_number(b"\"string\""));
641 }
642}