1use crate::{
7 config::SecurityConfig,
8 domain::{DomainError, DomainResult},
9 parser::ValueType,
10 security::SecurityValidator,
11};
12use std::{marker::PhantomData, str::from_utf8};
13
14pub trait LazyParser<'a> {
19 type Output;
20 type Error;
21
22 fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error>;
24
25 fn remaining(&self) -> &'a [u8];
27
28 fn is_complete(&self) -> bool;
30
31 fn reset(&mut self);
33}
34
35pub struct ZeroCopyParser<'a> {
37 input: &'a [u8],
38 position: usize,
39 depth: usize,
40 validator: SecurityValidator,
41 _phantom: PhantomData<&'a ()>,
42}
43
44impl<'a> ZeroCopyParser<'a> {
45 pub fn new() -> Self {
47 Self {
48 input: &[],
49 position: 0,
50 depth: 0,
51 validator: SecurityValidator::default(),
52 _phantom: PhantomData,
53 }
54 }
55
56 pub fn with_security_config(security_config: SecurityConfig) -> Self {
58 Self {
59 input: &[],
60 position: 0,
61 depth: 0,
62 validator: SecurityValidator::new(security_config),
63 _phantom: PhantomData,
64 }
65 }
66
67 pub fn parse_value(&mut self) -> DomainResult<LazyJsonValue<'a>> {
69 self.skip_whitespace();
70
71 if self.position >= self.input.len() {
72 return Err(DomainError::InvalidInput(
73 "Unexpected end of input".to_string(),
74 ));
75 }
76
77 let ch = self.input[self.position];
78 match ch {
79 b'"' => self.parse_string(),
80 b'{' => self.parse_object(),
81 b'[' => self.parse_array(),
82 b't' | b'f' => self.parse_boolean(),
83 b'n' => self.parse_null(),
84 b'-' | b'0'..=b'9' => self.parse_number(),
85 _ => {
86 let ch_char = ch as char;
87 Err(DomainError::InvalidInput(format!(
88 "Unexpected character: {ch_char}"
89 )))
90 }
91 }
92 }
93
94 fn parse_string(&mut self) -> DomainResult<LazyJsonValue<'a>> {
96 if self.position >= self.input.len() || self.input[self.position] != b'"' {
97 return Err(DomainError::InvalidInput("Expected '\"'".to_string()));
98 }
99
100 let start = self.position + 1; self.position += 1;
102
103 while self.position < self.input.len() {
105 match self.input[self.position] {
106 b'"' => {
107 let string_slice = &self.input[start..self.position];
108 self.position += 1; if string_slice.contains(&b'\\') {
112 let unescaped = self.unescape_string(string_slice)?;
114 return Ok(LazyJsonValue::StringOwned(unescaped));
115 } else {
116 return Ok(LazyJsonValue::StringBorrowed(string_slice));
118 }
119 }
120 b'\\' => {
121 self.position += 2;
123 }
124 _ => {
125 self.position += 1;
126 }
127 }
128 }
129
130 Err(DomainError::InvalidInput("Unterminated string".to_string()))
131 }
132
133 fn parse_object(&mut self) -> DomainResult<LazyJsonValue<'a>> {
135 self.validator
136 .validate_json_depth(self.depth + 1)
137 .map_err(|e| DomainError::SecurityViolation(e.to_string()))?;
138
139 if self.position >= self.input.len() || self.input[self.position] != b'{' {
140 return Err(DomainError::InvalidInput("Expected '{'".to_string()));
141 }
142
143 let start = self.position;
144 self.position += 1; self.depth += 1;
146
147 self.skip_whitespace();
148
149 if self.position < self.input.len() && self.input[self.position] == b'}' {
151 self.position += 1;
152 self.depth -= 1;
153 return Ok(LazyJsonValue::ObjectSlice(
154 &self.input[start..self.position],
155 ));
156 }
157
158 let mut first = true;
159 while self.position < self.input.len() && self.input[self.position] != b'}' {
160 if !first {
161 self.expect_char(b',')?;
162 self.skip_whitespace();
163 }
164 first = false;
165
166 let _key = self.parse_value()?;
168 self.skip_whitespace();
169 self.expect_char(b':')?;
170 self.skip_whitespace();
171
172 let _value = self.parse_value()?;
174 self.skip_whitespace();
175 }
176
177 self.expect_char(b'}')?;
178 self.depth -= 1;
179
180 Ok(LazyJsonValue::ObjectSlice(
181 &self.input[start..self.position],
182 ))
183 }
184
185 fn parse_array(&mut self) -> DomainResult<LazyJsonValue<'a>> {
187 self.validator
188 .validate_json_depth(self.depth + 1)
189 .map_err(|e| DomainError::SecurityViolation(e.to_string()))?;
190
191 if self.position >= self.input.len() || self.input[self.position] != b'[' {
192 return Err(DomainError::InvalidInput("Expected '['".to_string()));
193 }
194
195 let start = self.position;
196 self.position += 1; self.depth += 1;
198
199 self.skip_whitespace();
200
201 if self.position < self.input.len() && self.input[self.position] == b']' {
203 self.position += 1;
204 self.depth -= 1;
205 return Ok(LazyJsonValue::ArraySlice(&self.input[start..self.position]));
206 }
207
208 let mut first = true;
209 while self.position < self.input.len() && self.input[self.position] != b']' {
210 if !first {
211 self.expect_char(b',')?;
212 self.skip_whitespace();
213 }
214 first = false;
215
216 let _element = self.parse_value()?;
218 self.skip_whitespace();
219 }
220
221 self.expect_char(b']')?;
222 self.depth -= 1;
223
224 Ok(LazyJsonValue::ArraySlice(&self.input[start..self.position]))
225 }
226
227 fn parse_boolean(&mut self) -> DomainResult<LazyJsonValue<'a>> {
229 if self.position + 4 <= self.input.len()
230 && &self.input[self.position..self.position + 4] == b"true"
231 {
232 self.position += 4;
233 Ok(LazyJsonValue::Boolean(true))
234 } else if self.position + 5 <= self.input.len()
235 && &self.input[self.position..self.position + 5] == b"false"
236 {
237 self.position += 5;
238 Ok(LazyJsonValue::Boolean(false))
239 } else {
240 Err(DomainError::InvalidInput(
241 "Invalid boolean value".to_string(),
242 ))
243 }
244 }
245
246 fn parse_null(&mut self) -> DomainResult<LazyJsonValue<'a>> {
248 if self.position + 4 <= self.input.len()
249 && &self.input[self.position..self.position + 4] == b"null"
250 {
251 self.position += 4;
252 Ok(LazyJsonValue::Null)
253 } else {
254 Err(DomainError::InvalidInput("Invalid null value".to_string()))
255 }
256 }
257
258 fn parse_number(&mut self) -> DomainResult<LazyJsonValue<'a>> {
260 let start = self.position;
261
262 if self.input[self.position] == b'-' {
264 self.position += 1;
265 }
266
267 if self.position >= self.input.len() {
269 return Err(DomainError::InvalidInput("Invalid number".to_string()));
270 }
271
272 if self.input[self.position] == b'0' {
273 self.position += 1;
274 } else if self.input[self.position].is_ascii_digit() {
275 while self.position < self.input.len() && self.input[self.position].is_ascii_digit() {
276 self.position += 1;
277 }
278 } else {
279 return Err(DomainError::InvalidInput("Invalid number".to_string()));
280 }
281
282 if self.position < self.input.len() && self.input[self.position] == b'.' {
284 self.position += 1;
285 if self.position >= self.input.len() || !self.input[self.position].is_ascii_digit() {
286 return Err(DomainError::InvalidInput(
287 "Invalid number: missing digits after decimal".to_string(),
288 ));
289 }
290 while self.position < self.input.len() && self.input[self.position].is_ascii_digit() {
291 self.position += 1;
292 }
293 }
294
295 if self.position < self.input.len()
297 && (self.input[self.position] == b'e' || self.input[self.position] == b'E')
298 {
299 self.position += 1;
300 if self.position < self.input.len()
301 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
302 {
303 self.position += 1;
304 }
305 if self.position >= self.input.len() || !self.input[self.position].is_ascii_digit() {
306 return Err(DomainError::InvalidInput(
307 "Invalid number: missing digits in exponent".to_string(),
308 ));
309 }
310 while self.position < self.input.len() && self.input[self.position].is_ascii_digit() {
311 self.position += 1;
312 }
313 }
314
315 let number_slice = &self.input[start..self.position];
316 Ok(LazyJsonValue::NumberSlice(number_slice))
317 }
318
319 fn skip_whitespace(&mut self) {
321 while self.position < self.input.len() {
322 match self.input[self.position] {
323 b' ' | b'\t' | b'\n' | b'\r' => {
324 self.position += 1;
325 }
326 _ => break,
327 }
328 }
329 }
330
331 fn expect_char(&mut self, ch: u8) -> DomainResult<()> {
333 if self.position >= self.input.len() || self.input[self.position] != ch {
334 let ch_char = ch as char;
335 return Err(DomainError::InvalidInput(format!("Expected '{ch_char}'")));
336 }
337 self.position += 1;
338 Ok(())
339 }
340
341 fn unescape_string(&self, input: &[u8]) -> DomainResult<String> {
343 let mut result = Vec::with_capacity(input.len());
344 let mut i = 0;
345
346 while i < input.len() {
347 if input[i] == b'\\' && i + 1 < input.len() {
348 match input[i + 1] {
349 b'"' => result.push(b'"'),
350 b'\\' => result.push(b'\\'),
351 b'/' => result.push(b'/'),
352 b'b' => result.push(b'\x08'),
353 b'f' => result.push(b'\x0C'),
354 b'n' => result.push(b'\n'),
355 b'r' => result.push(b'\r'),
356 b't' => result.push(b'\t'),
357 b'u' => {
358 if i + 5 < input.len() {
360 i += 6;
362 continue;
363 } else {
364 return Err(DomainError::InvalidInput(
365 "Invalid unicode escape".to_string(),
366 ));
367 }
368 }
369 _ => {
370 return Err(DomainError::InvalidInput(
371 "Invalid escape sequence".to_string(),
372 ));
373 }
374 }
375 i += 2;
376 } else {
377 result.push(input[i]);
378 i += 1;
379 }
380 }
381
382 String::from_utf8(result)
383 .map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))
384 }
385}
386
387impl<'a> LazyParser<'a> for ZeroCopyParser<'a> {
388 type Output = LazyJsonValue<'a>;
389 type Error = DomainError;
390
391 fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error> {
392 self.validator
394 .validate_input_size(input.len())
395 .map_err(|e| DomainError::SecurityViolation(e.to_string()))?;
396
397 self.input = input;
398 self.position = 0;
399 self.depth = 0;
400
401 self.parse_value()
402 }
403
404 fn remaining(&self) -> &'a [u8] {
405 if self.position < self.input.len() {
406 &self.input[self.position..]
407 } else {
408 &[]
409 }
410 }
411
412 fn is_complete(&self) -> bool {
413 self.position >= self.input.len()
414 }
415
416 fn reset(&mut self) {
417 self.input = &[];
418 self.position = 0;
419 self.depth = 0;
420 }
421}
422
423#[derive(Debug, Clone, PartialEq)]
425pub enum LazyJsonValue<'a> {
426 StringBorrowed(&'a [u8]),
428 StringOwned(String),
430 NumberSlice(&'a [u8]),
432 Boolean(bool),
434 Null,
436 ObjectSlice(&'a [u8]),
438 ArraySlice(&'a [u8]),
440}
441
442impl<'a> LazyJsonValue<'a> {
443 pub fn value_type(&self) -> ValueType {
445 match self {
446 LazyJsonValue::StringBorrowed(_) | LazyJsonValue::StringOwned(_) => ValueType::String,
447 LazyJsonValue::NumberSlice(_) => ValueType::Number,
448 LazyJsonValue::Boolean(_) => ValueType::Boolean,
449 LazyJsonValue::Null => ValueType::Null,
450 LazyJsonValue::ObjectSlice(_) => ValueType::Object,
451 LazyJsonValue::ArraySlice(_) => ValueType::Array,
452 }
453 }
454
455 pub fn to_string_lossy(&self) -> String {
457 match self {
458 LazyJsonValue::StringBorrowed(bytes) => String::from_utf8_lossy(bytes).to_string(),
459 LazyJsonValue::StringOwned(s) => s.clone(),
460 LazyJsonValue::NumberSlice(bytes) => String::from_utf8_lossy(bytes).to_string(),
461 LazyJsonValue::Boolean(b) => b.to_string(),
462 LazyJsonValue::Null => "null".to_string(),
463 LazyJsonValue::ObjectSlice(bytes) => String::from_utf8_lossy(bytes).to_string(),
464 LazyJsonValue::ArraySlice(bytes) => String::from_utf8_lossy(bytes).to_string(),
465 }
466 }
467
468 pub fn as_str(&self) -> DomainResult<&str> {
470 match self {
471 LazyJsonValue::StringBorrowed(bytes) => from_utf8(bytes)
472 .map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}"))),
473 LazyJsonValue::StringOwned(s) => Ok(s.as_str()),
474 _ => Err(DomainError::InvalidInput(
475 "Value is not a string".to_string(),
476 )),
477 }
478 }
479
480 pub fn as_number(&self) -> DomainResult<f64> {
482 match self {
483 LazyJsonValue::NumberSlice(bytes) => {
484 let s = from_utf8(bytes)
485 .map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))?;
486 s.parse::<f64>()
487 .map_err(|e| DomainError::InvalidInput(format!("Invalid number: {e}")))
488 }
489 _ => Err(DomainError::InvalidInput(
490 "Value is not a number".to_string(),
491 )),
492 }
493 }
494
495 pub fn as_boolean(&self) -> DomainResult<bool> {
497 match self {
498 LazyJsonValue::Boolean(b) => Ok(*b),
499 _ => Err(DomainError::InvalidInput(
500 "Value is not a boolean".to_string(),
501 )),
502 }
503 }
504
505 pub fn is_null(&self) -> bool {
507 matches!(self, LazyJsonValue::Null)
508 }
509
510 pub fn as_bytes(&self) -> Option<&'a [u8]> {
512 match self {
513 LazyJsonValue::StringBorrowed(bytes) => Some(bytes),
514 LazyJsonValue::NumberSlice(bytes) => Some(bytes),
515 LazyJsonValue::ObjectSlice(bytes) => Some(bytes),
516 LazyJsonValue::ArraySlice(bytes) => Some(bytes),
517 _ => None,
518 }
519 }
520
521 pub fn memory_usage(&self) -> MemoryUsage {
523 match self {
524 LazyJsonValue::StringBorrowed(bytes) => MemoryUsage {
525 allocated_bytes: 0,
526 referenced_bytes: bytes.len(),
527 },
528 LazyJsonValue::StringOwned(s) => MemoryUsage {
529 allocated_bytes: s.len(),
530 referenced_bytes: 0,
531 },
532 LazyJsonValue::NumberSlice(bytes) => MemoryUsage {
533 allocated_bytes: 0,
534 referenced_bytes: bytes.len(),
535 },
536 LazyJsonValue::Boolean(val) => MemoryUsage {
537 allocated_bytes: 0,
538 referenced_bytes: if *val { 4 } else { 5 }, },
540 LazyJsonValue::Null => MemoryUsage {
541 allocated_bytes: 0,
542 referenced_bytes: 4, },
544 LazyJsonValue::ObjectSlice(bytes) => MemoryUsage {
545 allocated_bytes: 0,
546 referenced_bytes: bytes.len(),
547 },
548 LazyJsonValue::ArraySlice(bytes) => MemoryUsage {
549 allocated_bytes: 0,
550 referenced_bytes: bytes.len(),
551 },
552 }
553 }
554}
555
556#[derive(Debug, Clone, PartialEq)]
558pub struct MemoryUsage {
559 pub allocated_bytes: usize,
561 pub referenced_bytes: usize,
563}
564
565impl MemoryUsage {
566 pub fn total(&self) -> usize {
568 self.allocated_bytes + self.referenced_bytes
569 }
570
571 pub fn efficiency(&self) -> f64 {
573 if self.total() == 0 {
574 1.0
575 } else {
576 self.referenced_bytes as f64 / self.total() as f64
577 }
578 }
579}
580
581pub struct IncrementalParser<'a> {
583 base: ZeroCopyParser<'a>,
584 buffer: Vec<u8>,
585 complete_values: Vec<LazyJsonValue<'a>>,
586}
587
588impl<'a> Default for IncrementalParser<'a> {
589 fn default() -> Self {
590 Self::new()
591 }
592}
593
594impl<'a> IncrementalParser<'a> {
595 pub fn new() -> Self {
596 Self {
597 base: ZeroCopyParser::new(),
598 buffer: Vec::with_capacity(8192), complete_values: Vec::new(),
600 }
601 }
602
603 pub fn feed(&mut self, data: &[u8]) -> DomainResult<()> {
605 self.buffer.extend_from_slice(data);
606 Ok(())
607 }
608
609 pub fn parse_available(&mut self) -> DomainResult<Vec<LazyJsonValue<'_>>> {
611 if !self.buffer.is_empty() {
614 let mut parser = ZeroCopyParser::new();
615 match parser.parse_lazy(&self.buffer) {
616 Ok(_value) => {
617 self.buffer.clear();
620 Ok(vec![])
621 }
622 Err(_e) => Ok(vec![]), }
624 } else {
625 Ok(vec![])
626 }
627 }
628
629 pub fn has_complete_value(&self) -> bool {
631 !self.buffer.is_empty()
633 }
634}
635
636impl<'a> Default for ZeroCopyParser<'a> {
637 fn default() -> Self {
638 Self::new()
639 }
640}
641
642#[cfg(test)]
643mod tests {
644 use super::*;
645
646 #[test]
647 fn test_parse_string() {
648 let mut parser = ZeroCopyParser::new();
649 let input = br#""hello world""#;
650
651 let result = parser.parse_lazy(input).unwrap();
652 match result {
653 LazyJsonValue::StringBorrowed(bytes) => {
654 assert_eq!(bytes, b"hello world");
655 }
656 _ => panic!("Expected string"),
657 }
658 }
659
660 #[test]
661 fn test_parse_escaped_string() {
662 let mut parser = ZeroCopyParser::new();
663 let input = br#""hello \"world\"""#;
664
665 let result = parser.parse_lazy(input).unwrap();
666 match result {
667 LazyJsonValue::StringOwned(s) => {
668 assert_eq!(s, "hello \"world\"");
669 }
670 _ => panic!("Expected owned string due to escapes"),
671 }
672 }
673
674 #[test]
675 fn test_parse_number() {
676 let mut parser = ZeroCopyParser::new();
677 let input = b"123.45";
678
679 let result = parser.parse_lazy(input).unwrap();
680 match result {
681 LazyJsonValue::NumberSlice(bytes) => {
682 assert_eq!(bytes, b"123.45");
683 assert_eq!(result.as_number().unwrap(), 123.45);
684 }
685 _ => panic!("Expected number"),
686 }
687 }
688
689 #[test]
690 fn test_parse_boolean() {
691 let mut parser = ZeroCopyParser::new();
692
693 let result = parser.parse_lazy(b"true").unwrap();
694 assert_eq!(result, LazyJsonValue::Boolean(true));
695
696 parser.reset();
697 let result = parser.parse_lazy(b"false").unwrap();
698 assert_eq!(result, LazyJsonValue::Boolean(false));
699 }
700
701 #[test]
702 fn test_parse_null() {
703 let mut parser = ZeroCopyParser::new();
704 let result = parser.parse_lazy(b"null").unwrap();
705 assert_eq!(result, LazyJsonValue::Null);
706 assert!(result.is_null());
707 }
708
709 #[test]
710 fn test_parse_empty_object() {
711 let mut parser = ZeroCopyParser::new();
712 let result = parser.parse_lazy(b"{}").unwrap();
713
714 match result {
715 LazyJsonValue::ObjectSlice(bytes) => {
716 assert_eq!(bytes, b"{}");
717 }
718 _ => panic!("Expected object"),
719 }
720 }
721
722 #[test]
723 fn test_parse_empty_array() {
724 let mut parser = ZeroCopyParser::new();
725 let result = parser.parse_lazy(b"[]").unwrap();
726
727 match result {
728 LazyJsonValue::ArraySlice(bytes) => {
729 assert_eq!(bytes, b"[]");
730 }
731 _ => panic!("Expected array"),
732 }
733 }
734
735 #[test]
736 fn test_memory_usage() {
737 let mut parser = ZeroCopyParser::new();
738
739 let result1 = parser.parse_lazy(br#""hello""#).unwrap();
741 let usage1 = result1.memory_usage();
742 assert_eq!(usage1.allocated_bytes, 0);
743 assert_eq!(usage1.referenced_bytes, 5);
744 assert_eq!(usage1.efficiency(), 1.0);
745
746 parser.reset();
748 let result2 = parser.parse_lazy(br#""he\"llo""#).unwrap();
749 let usage2 = result2.memory_usage();
750 assert!(usage2.allocated_bytes > 0);
751 assert_eq!(usage2.referenced_bytes, 0);
752 assert_eq!(usage2.efficiency(), 0.0);
753 }
754
755 #[test]
756 fn test_complex_object() {
757 let mut parser = ZeroCopyParser::new();
758 let input = br#"{"name": "test", "value": 42, "active": true}"#;
759
760 let result = parser.parse_lazy(input).unwrap();
761 match result {
762 LazyJsonValue::ObjectSlice(bytes) => {
763 assert_eq!(bytes.len(), input.len());
764 }
765 _ => panic!("Expected object"),
766 }
767 }
768
769 #[test]
770 fn test_parser_reuse() {
771 let mut parser = ZeroCopyParser::new();
772
773 let result1 = parser.parse_lazy(b"123").unwrap();
775 assert!(matches!(result1, LazyJsonValue::NumberSlice(_)));
776
777 parser.reset();
779 let result2 = parser.parse_lazy(br#""hello""#).unwrap();
780 assert!(matches!(result2, LazyJsonValue::StringBorrowed(_)));
781 }
782}