1use crate::{
7 domain::{DomainResult, DomainError},
8 parser::ValueType,
9};
10use std::{
11 marker::PhantomData,
12 str::from_utf8,
13};
14
15pub trait LazyParser<'a> {
20 type Output;
21 type Error;
22
23 fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error>;
25
26 fn remaining(&self) -> &'a [u8];
28
29 fn is_complete(&self) -> bool;
31
32 fn reset(&mut self);
34}
35
36pub struct ZeroCopyParser<'a> {
38 input: &'a [u8],
39 position: usize,
40 depth: usize,
41 max_depth: usize,
42 _phantom: PhantomData<&'a ()>,
43}
44
45impl<'a> ZeroCopyParser<'a> {
46 pub fn new() -> Self {
48 Self {
49 input: &[],
50 position: 0,
51 depth: 0,
52 max_depth: 64, _phantom: PhantomData,
54 }
55 }
56
57 pub fn with_max_depth(max_depth: usize) -> Self {
59 Self {
60 input: &[],
61 position: 0,
62 depth: 0,
63 max_depth,
64 _phantom: PhantomData,
65 }
66 }
67
68 pub fn parse_value(&mut self) -> DomainResult<LazyJsonValue<'a>> {
70 self.skip_whitespace();
71
72 if self.position >= self.input.len() {
73 return Err(DomainError::InvalidInput("Unexpected end of input".to_string()));
74 }
75
76 let ch = self.input[self.position];
77 match ch {
78 b'"' => self.parse_string(),
79 b'{' => self.parse_object(),
80 b'[' => self.parse_array(),
81 b't' | b'f' => self.parse_boolean(),
82 b'n' => self.parse_null(),
83 b'-' | b'0'..=b'9' => self.parse_number(),
84 _ => {
85 let ch_char = ch as char;
86 Err(DomainError::InvalidInput(format!("Unexpected character: {ch_char}")))
87 },
88 }
89 }
90
91 fn parse_string(&mut self) -> DomainResult<LazyJsonValue<'a>> {
93 if self.position >= self.input.len() || self.input[self.position] != b'"' {
94 return Err(DomainError::InvalidInput("Expected '\"'".to_string()));
95 }
96
97 let start = self.position + 1; self.position += 1;
99
100 while self.position < self.input.len() {
102 match self.input[self.position] {
103 b'"' => {
104 let string_slice = &self.input[start..self.position];
105 self.position += 1; if string_slice.contains(&b'\\') {
109 let unescaped = self.unescape_string(string_slice)?;
111 return Ok(LazyJsonValue::StringOwned(unescaped));
112 } else {
113 return Ok(LazyJsonValue::StringBorrowed(string_slice));
115 }
116 }
117 b'\\' => {
118 self.position += 2;
120 }
121 _ => {
122 self.position += 1;
123 }
124 }
125 }
126
127 Err(DomainError::InvalidInput("Unterminated string".to_string()))
128 }
129
130 fn parse_object(&mut self) -> DomainResult<LazyJsonValue<'a>> {
132 if self.depth >= self.max_depth {
133 return Err(DomainError::InvalidInput("Maximum nesting depth exceeded".to_string()));
134 }
135
136 if self.position >= self.input.len() || self.input[self.position] != b'{' {
137 return Err(DomainError::InvalidInput("Expected '{'".to_string()));
138 }
139
140 let start = self.position;
141 self.position += 1; self.depth += 1;
143
144 self.skip_whitespace();
145
146 if self.position < self.input.len() && self.input[self.position] == b'}' {
148 self.position += 1;
149 self.depth -= 1;
150 return Ok(LazyJsonValue::ObjectSlice(&self.input[start..self.position]));
151 }
152
153 let mut first = true;
154 while self.position < self.input.len() && self.input[self.position] != b'}' {
155 if !first {
156 self.expect_char(b',')?;
157 self.skip_whitespace();
158 }
159 first = false;
160
161 let _key = self.parse_value()?;
163 self.skip_whitespace();
164 self.expect_char(b':')?;
165 self.skip_whitespace();
166
167 let _value = self.parse_value()?;
169 self.skip_whitespace();
170 }
171
172 self.expect_char(b'}')?;
173 self.depth -= 1;
174
175 Ok(LazyJsonValue::ObjectSlice(&self.input[start..self.position]))
176 }
177
178 fn parse_array(&mut self) -> DomainResult<LazyJsonValue<'a>> {
180 if self.depth >= self.max_depth {
181 return Err(DomainError::InvalidInput("Maximum nesting depth exceeded".to_string()));
182 }
183
184 if self.position >= self.input.len() || self.input[self.position] != b'[' {
185 return Err(DomainError::InvalidInput("Expected '['".to_string()));
186 }
187
188 let start = self.position;
189 self.position += 1; self.depth += 1;
191
192 self.skip_whitespace();
193
194 if self.position < self.input.len() && self.input[self.position] == b']' {
196 self.position += 1;
197 self.depth -= 1;
198 return Ok(LazyJsonValue::ArraySlice(&self.input[start..self.position]));
199 }
200
201 let mut first = true;
202 while self.position < self.input.len() && self.input[self.position] != b']' {
203 if !first {
204 self.expect_char(b',')?;
205 self.skip_whitespace();
206 }
207 first = false;
208
209 let _element = self.parse_value()?;
211 self.skip_whitespace();
212 }
213
214 self.expect_char(b']')?;
215 self.depth -= 1;
216
217 Ok(LazyJsonValue::ArraySlice(&self.input[start..self.position]))
218 }
219
220 fn parse_boolean(&mut self) -> DomainResult<LazyJsonValue<'a>> {
222 if self.position + 4 <= self.input.len() && &self.input[self.position..self.position + 4] == b"true" {
223 self.position += 4;
224 Ok(LazyJsonValue::Boolean(true))
225 } else if self.position + 5 <= self.input.len() && &self.input[self.position..self.position + 5] == b"false" {
226 self.position += 5;
227 Ok(LazyJsonValue::Boolean(false))
228 } else {
229 Err(DomainError::InvalidInput("Invalid boolean value".to_string()))
230 }
231 }
232
233 fn parse_null(&mut self) -> DomainResult<LazyJsonValue<'a>> {
235 if self.position + 4 <= self.input.len() && &self.input[self.position..self.position + 4] == b"null" {
236 self.position += 4;
237 Ok(LazyJsonValue::Null)
238 } else {
239 Err(DomainError::InvalidInput("Invalid null value".to_string()))
240 }
241 }
242
243 fn parse_number(&mut self) -> DomainResult<LazyJsonValue<'a>> {
245 let start = self.position;
246
247 if self.input[self.position] == b'-' {
249 self.position += 1;
250 }
251
252 if self.position >= self.input.len() {
254 return Err(DomainError::InvalidInput("Invalid number".to_string()));
255 }
256
257 if self.input[self.position] == b'0' {
258 self.position += 1;
259 } else if self.input[self.position].is_ascii_digit() {
260 while self.position < self.input.len() && self.input[self.position].is_ascii_digit() {
261 self.position += 1;
262 }
263 } else {
264 return Err(DomainError::InvalidInput("Invalid number".to_string()));
265 }
266
267 if self.position < self.input.len() && self.input[self.position] == b'.' {
269 self.position += 1;
270 if self.position >= self.input.len() || !self.input[self.position].is_ascii_digit() {
271 return Err(DomainError::InvalidInput("Invalid number: missing digits after decimal".to_string()));
272 }
273 while self.position < self.input.len() && self.input[self.position].is_ascii_digit() {
274 self.position += 1;
275 }
276 }
277
278 if self.position < self.input.len() && (self.input[self.position] == b'e' || self.input[self.position] == b'E') {
280 self.position += 1;
281 if self.position < self.input.len() && (self.input[self.position] == b'+' || self.input[self.position] == b'-') {
282 self.position += 1;
283 }
284 if self.position >= self.input.len() || !self.input[self.position].is_ascii_digit() {
285 return Err(DomainError::InvalidInput("Invalid number: missing digits in exponent".to_string()));
286 }
287 while self.position < self.input.len() && self.input[self.position].is_ascii_digit() {
288 self.position += 1;
289 }
290 }
291
292 let number_slice = &self.input[start..self.position];
293 Ok(LazyJsonValue::NumberSlice(number_slice))
294 }
295
296 fn skip_whitespace(&mut self) {
298 while self.position < self.input.len() {
299 match self.input[self.position] {
300 b' ' | b'\t' | b'\n' | b'\r' => {
301 self.position += 1;
302 }
303 _ => break,
304 }
305 }
306 }
307
308 fn expect_char(&mut self, ch: u8) -> DomainResult<()> {
310 if self.position >= self.input.len() || self.input[self.position] != ch {
311 let ch_char = ch as char;
312 return Err(DomainError::InvalidInput(format!("Expected '{ch_char}'")));
313 }
314 self.position += 1;
315 Ok(())
316 }
317
318 fn unescape_string(&self, input: &[u8]) -> DomainResult<String> {
320 let mut result = Vec::with_capacity(input.len());
321 let mut i = 0;
322
323 while i < input.len() {
324 if input[i] == b'\\' && i + 1 < input.len() {
325 match input[i + 1] {
326 b'"' => result.push(b'"'),
327 b'\\' => result.push(b'\\'),
328 b'/' => result.push(b'/'),
329 b'b' => result.push(b'\x08'),
330 b'f' => result.push(b'\x0C'),
331 b'n' => result.push(b'\n'),
332 b'r' => result.push(b'\r'),
333 b't' => result.push(b'\t'),
334 b'u' => {
335 if i + 5 < input.len() {
337 i += 6;
339 continue;
340 } else {
341 return Err(DomainError::InvalidInput("Invalid unicode escape".to_string()));
342 }
343 }
344 _ => return Err(DomainError::InvalidInput("Invalid escape sequence".to_string())),
345 }
346 i += 2;
347 } else {
348 result.push(input[i]);
349 i += 1;
350 }
351 }
352
353 String::from_utf8(result)
354 .map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))
355 }
356}
357
358impl<'a> LazyParser<'a> for ZeroCopyParser<'a> {
359 type Output = LazyJsonValue<'a>;
360 type Error = DomainError;
361
362 fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error> {
363 self.input = input;
364 self.position = 0;
365 self.depth = 0;
366
367 self.parse_value()
368 }
369
370 fn remaining(&self) -> &'a [u8] {
371 if self.position < self.input.len() {
372 &self.input[self.position..]
373 } else {
374 &[]
375 }
376 }
377
378 fn is_complete(&self) -> bool {
379 self.position >= self.input.len()
380 }
381
382 fn reset(&mut self) {
383 self.input = &[];
384 self.position = 0;
385 self.depth = 0;
386 }
387}
388
389#[derive(Debug, Clone, PartialEq)]
391pub enum LazyJsonValue<'a> {
392 StringBorrowed(&'a [u8]),
394 StringOwned(String),
396 NumberSlice(&'a [u8]),
398 Boolean(bool),
400 Null,
402 ObjectSlice(&'a [u8]),
404 ArraySlice(&'a [u8]),
406}
407
408impl<'a> LazyJsonValue<'a> {
409 pub fn value_type(&self) -> ValueType {
411 match self {
412 LazyJsonValue::StringBorrowed(_) | LazyJsonValue::StringOwned(_) => ValueType::String,
413 LazyJsonValue::NumberSlice(_) => ValueType::Number,
414 LazyJsonValue::Boolean(_) => ValueType::Boolean,
415 LazyJsonValue::Null => ValueType::Null,
416 LazyJsonValue::ObjectSlice(_) => ValueType::Object,
417 LazyJsonValue::ArraySlice(_) => ValueType::Array,
418 }
419 }
420
421 pub fn to_string_lossy(&self) -> String {
423 match self {
424 LazyJsonValue::StringBorrowed(bytes) => {
425 String::from_utf8_lossy(bytes).to_string()
426 }
427 LazyJsonValue::StringOwned(s) => s.clone(),
428 LazyJsonValue::NumberSlice(bytes) => {
429 String::from_utf8_lossy(bytes).to_string()
430 }
431 LazyJsonValue::Boolean(b) => b.to_string(),
432 LazyJsonValue::Null => "null".to_string(),
433 LazyJsonValue::ObjectSlice(bytes) => {
434 String::from_utf8_lossy(bytes).to_string()
435 }
436 LazyJsonValue::ArraySlice(bytes) => {
437 String::from_utf8_lossy(bytes).to_string()
438 }
439 }
440 }
441
442 pub fn as_str(&self) -> DomainResult<&str> {
444 match self {
445 LazyJsonValue::StringBorrowed(bytes) => {
446 from_utf8(bytes).map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))
447 }
448 LazyJsonValue::StringOwned(s) => Ok(s.as_str()),
449 _ => Err(DomainError::InvalidInput("Value is not a string".to_string())),
450 }
451 }
452
453 pub fn as_number(&self) -> DomainResult<f64> {
455 match self {
456 LazyJsonValue::NumberSlice(bytes) => {
457 let s = from_utf8(bytes)
458 .map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))?;
459 s.parse::<f64>()
460 .map_err(|e| DomainError::InvalidInput(format!("Invalid number: {e}")))
461 }
462 _ => Err(DomainError::InvalidInput("Value is not a number".to_string())),
463 }
464 }
465
466 pub fn as_boolean(&self) -> DomainResult<bool> {
468 match self {
469 LazyJsonValue::Boolean(b) => Ok(*b),
470 _ => Err(DomainError::InvalidInput("Value is not a boolean".to_string())),
471 }
472 }
473
474 pub fn is_null(&self) -> bool {
476 matches!(self, LazyJsonValue::Null)
477 }
478
479 pub fn as_bytes(&self) -> Option<&'a [u8]> {
481 match self {
482 LazyJsonValue::StringBorrowed(bytes) => Some(bytes),
483 LazyJsonValue::NumberSlice(bytes) => Some(bytes),
484 LazyJsonValue::ObjectSlice(bytes) => Some(bytes),
485 LazyJsonValue::ArraySlice(bytes) => Some(bytes),
486 _ => None,
487 }
488 }
489
490 pub fn memory_usage(&self) -> MemoryUsage {
492 match self {
493 LazyJsonValue::StringBorrowed(bytes) => MemoryUsage {
494 allocated_bytes: 0,
495 referenced_bytes: bytes.len(),
496 },
497 LazyJsonValue::StringOwned(s) => MemoryUsage {
498 allocated_bytes: s.len(),
499 referenced_bytes: 0,
500 },
501 LazyJsonValue::NumberSlice(bytes) => MemoryUsage {
502 allocated_bytes: 0,
503 referenced_bytes: bytes.len(),
504 },
505 LazyJsonValue::Boolean(val) => MemoryUsage {
506 allocated_bytes: 0,
507 referenced_bytes: if *val { 4 } else { 5 }, },
509 LazyJsonValue::Null => MemoryUsage {
510 allocated_bytes: 0,
511 referenced_bytes: 4, },
513 LazyJsonValue::ObjectSlice(bytes) => MemoryUsage {
514 allocated_bytes: 0,
515 referenced_bytes: bytes.len(),
516 },
517 LazyJsonValue::ArraySlice(bytes) => MemoryUsage {
518 allocated_bytes: 0,
519 referenced_bytes: bytes.len(),
520 },
521 }
522 }
523}
524
525#[derive(Debug, Clone, PartialEq)]
527pub struct MemoryUsage {
528 pub allocated_bytes: usize,
530 pub referenced_bytes: usize,
532}
533
534impl MemoryUsage {
535 pub fn total(&self) -> usize {
537 self.allocated_bytes + self.referenced_bytes
538 }
539
540 pub fn efficiency(&self) -> f64 {
542 if self.total() == 0 {
543 1.0
544 } else {
545 self.referenced_bytes as f64 / self.total() as f64
546 }
547 }
548}
549
550pub struct IncrementalParser<'a> {
552 base: ZeroCopyParser<'a>,
553 buffer: Vec<u8>,
554 complete_values: Vec<LazyJsonValue<'a>>,
555}
556
557impl<'a> Default for IncrementalParser<'a> {
558 fn default() -> Self {
559 Self::new()
560 }
561}
562
563impl<'a> IncrementalParser<'a> {
564 pub fn new() -> Self {
565 Self {
566 base: ZeroCopyParser::new(),
567 buffer: Vec::with_capacity(8192), complete_values: Vec::new(),
569 }
570 }
571
572 pub fn feed(&mut self, data: &[u8]) -> DomainResult<()> {
574 self.buffer.extend_from_slice(data);
575 Ok(())
576 }
577
578 pub fn parse_available(&mut self) -> DomainResult<Vec<LazyJsonValue<'_>>> {
580 if !self.buffer.is_empty() {
583 let mut parser = ZeroCopyParser::new();
584 match parser.parse_lazy(&self.buffer) {
585 Ok(_value) => {
586 self.buffer.clear();
589 Ok(vec![])
590 }
591 Err(_e) => Ok(vec![]), }
593 } else {
594 Ok(vec![])
595 }
596 }
597
598 pub fn has_complete_value(&self) -> bool {
600 !self.buffer.is_empty()
602 }
603}
604
605impl<'a> Default for ZeroCopyParser<'a> {
606 fn default() -> Self {
607 Self::new()
608 }
609}
610
611#[cfg(test)]
612mod tests {
613 use super::*;
614
615 #[test]
616 fn test_parse_string() {
617 let mut parser = ZeroCopyParser::new();
618 let input = br#""hello world""#;
619
620 let result = parser.parse_lazy(input).unwrap();
621 match result {
622 LazyJsonValue::StringBorrowed(bytes) => {
623 assert_eq!(bytes, b"hello world");
624 }
625 _ => panic!("Expected string"),
626 }
627 }
628
629 #[test]
630 fn test_parse_escaped_string() {
631 let mut parser = ZeroCopyParser::new();
632 let input = br#""hello \"world\"""#;
633
634 let result = parser.parse_lazy(input).unwrap();
635 match result {
636 LazyJsonValue::StringOwned(s) => {
637 assert_eq!(s, "hello \"world\"");
638 }
639 _ => panic!("Expected owned string due to escapes"),
640 }
641 }
642
643 #[test]
644 fn test_parse_number() {
645 let mut parser = ZeroCopyParser::new();
646 let input = b"123.45";
647
648 let result = parser.parse_lazy(input).unwrap();
649 match result {
650 LazyJsonValue::NumberSlice(bytes) => {
651 assert_eq!(bytes, b"123.45");
652 assert_eq!(result.as_number().unwrap(), 123.45);
653 }
654 _ => panic!("Expected number"),
655 }
656 }
657
658 #[test]
659 fn test_parse_boolean() {
660 let mut parser = ZeroCopyParser::new();
661
662 let result = parser.parse_lazy(b"true").unwrap();
663 assert_eq!(result, LazyJsonValue::Boolean(true));
664
665 parser.reset();
666 let result = parser.parse_lazy(b"false").unwrap();
667 assert_eq!(result, LazyJsonValue::Boolean(false));
668 }
669
670 #[test]
671 fn test_parse_null() {
672 let mut parser = ZeroCopyParser::new();
673 let result = parser.parse_lazy(b"null").unwrap();
674 assert_eq!(result, LazyJsonValue::Null);
675 assert!(result.is_null());
676 }
677
678 #[test]
679 fn test_parse_empty_object() {
680 let mut parser = ZeroCopyParser::new();
681 let result = parser.parse_lazy(b"{}").unwrap();
682
683 match result {
684 LazyJsonValue::ObjectSlice(bytes) => {
685 assert_eq!(bytes, b"{}");
686 }
687 _ => panic!("Expected object"),
688 }
689 }
690
691 #[test]
692 fn test_parse_empty_array() {
693 let mut parser = ZeroCopyParser::new();
694 let result = parser.parse_lazy(b"[]").unwrap();
695
696 match result {
697 LazyJsonValue::ArraySlice(bytes) => {
698 assert_eq!(bytes, b"[]");
699 }
700 _ => panic!("Expected array"),
701 }
702 }
703
704 #[test]
705 fn test_memory_usage() {
706 let mut parser = ZeroCopyParser::new();
707
708 let result1 = parser.parse_lazy(br#""hello""#).unwrap();
710 let usage1 = result1.memory_usage();
711 assert_eq!(usage1.allocated_bytes, 0);
712 assert_eq!(usage1.referenced_bytes, 5);
713 assert_eq!(usage1.efficiency(), 1.0);
714
715 parser.reset();
717 let result2 = parser.parse_lazy(br#""he\"llo""#).unwrap();
718 let usage2 = result2.memory_usage();
719 assert!(usage2.allocated_bytes > 0);
720 assert_eq!(usage2.referenced_bytes, 0);
721 assert_eq!(usage2.efficiency(), 0.0);
722 }
723
724 #[test]
725 fn test_complex_object() {
726 let mut parser = ZeroCopyParser::new();
727 let input = br#"{"name": "test", "value": 42, "active": true}"#;
728
729 let result = parser.parse_lazy(input).unwrap();
730 match result {
731 LazyJsonValue::ObjectSlice(bytes) => {
732 assert_eq!(bytes.len(), input.len());
733 }
734 _ => panic!("Expected object"),
735 }
736 }
737
738 #[test]
739 fn test_parser_reuse() {
740 let mut parser = ZeroCopyParser::new();
741
742 let result1 = parser.parse_lazy(b"123").unwrap();
744 assert!(matches!(result1, LazyJsonValue::NumberSlice(_)));
745
746 parser.reset();
748 let result2 = parser.parse_lazy(br#""hello""#).unwrap();
749 assert!(matches!(result2, LazyJsonValue::StringBorrowed(_)));
750 }
751}