use crate::{
config::SecurityConfig,
domain::{DomainError, DomainResult},
parser::ValueType,
security::SecurityValidator,
};
use std::{marker::PhantomData, str::from_utf8};
pub trait LazyParser<'a> {
type Output;
type Error;
fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error>;
fn remaining(&self) -> &'a [u8];
fn is_complete(&self) -> bool;
fn reset(&mut self);
}
pub struct ZeroCopyParser<'a> {
input: &'a [u8],
position: usize,
depth: usize,
validator: SecurityValidator,
_phantom: PhantomData<&'a ()>,
}
impl<'a> ZeroCopyParser<'a> {
pub fn new() -> Self {
Self {
input: &[],
position: 0,
depth: 0,
validator: SecurityValidator::default(),
_phantom: PhantomData,
}
}
pub fn with_security_config(security_config: SecurityConfig) -> Self {
Self {
input: &[],
position: 0,
depth: 0,
validator: SecurityValidator::new(security_config),
_phantom: PhantomData,
}
}
pub fn parse_value(&mut self) -> DomainResult<LazyJsonValue<'a>> {
self.skip_whitespace();
if self.position >= self.input.len() {
return Err(DomainError::InvalidInput(
"Unexpected end of input".to_string(),
));
}
let ch = self.input[self.position];
match ch {
b'"' => self.parse_string(),
b'{' => self.parse_object(),
b'[' => self.parse_array(),
b't' | b'f' => self.parse_boolean(),
b'n' => self.parse_null(),
b'-' | b'0'..=b'9' => self.parse_number(),
_ => {
let ch_char = ch as char;
Err(DomainError::InvalidInput(format!(
"Unexpected character: {ch_char}"
)))
}
}
}
fn parse_string(&mut self) -> DomainResult<LazyJsonValue<'a>> {
if self.position >= self.input.len() || self.input[self.position] != b'"' {
return Err(DomainError::InvalidInput("Expected '\"'".to_string()));
}
let start = self.position + 1; self.position += 1;
while self.position < self.input.len() {
match self.input[self.position] {
b'"' => {
let string_slice = &self.input[start..self.position];
self.position += 1;
if string_slice.contains(&b'\\') {
let unescaped = self.unescape_string(string_slice)?;
return Ok(LazyJsonValue::StringOwned(unescaped));
} else {
return Ok(LazyJsonValue::StringBorrowed(string_slice));
}
}
b'\\' => {
self.position += 2;
}
_ => {
self.position += 1;
}
}
}
Err(DomainError::InvalidInput("Unterminated string".to_string()))
}
fn parse_object(&mut self) -> DomainResult<LazyJsonValue<'a>> {
self.validator
.validate_json_depth(self.depth + 1)
.map_err(|e| DomainError::SecurityViolation(e.to_string()))?;
if self.position >= self.input.len() || self.input[self.position] != b'{' {
return Err(DomainError::InvalidInput("Expected '{'".to_string()));
}
let start = self.position;
self.position += 1; self.depth += 1;
self.skip_whitespace();
if self.position < self.input.len() && self.input[self.position] == b'}' {
self.position += 1;
self.depth -= 1;
return Ok(LazyJsonValue::ObjectSlice(
&self.input[start..self.position],
));
}
let mut first = true;
while self.position < self.input.len() && self.input[self.position] != b'}' {
if !first {
self.expect_char(b',')?;
self.skip_whitespace();
}
first = false;
let _key = self.parse_value()?;
self.skip_whitespace();
self.expect_char(b':')?;
self.skip_whitespace();
let _value = self.parse_value()?;
self.skip_whitespace();
}
self.expect_char(b'}')?;
self.depth -= 1;
Ok(LazyJsonValue::ObjectSlice(
&self.input[start..self.position],
))
}
fn parse_array(&mut self) -> DomainResult<LazyJsonValue<'a>> {
self.validator
.validate_json_depth(self.depth + 1)
.map_err(|e| DomainError::SecurityViolation(e.to_string()))?;
if self.position >= self.input.len() || self.input[self.position] != b'[' {
return Err(DomainError::InvalidInput("Expected '['".to_string()));
}
let start = self.position;
self.position += 1; self.depth += 1;
self.skip_whitespace();
if self.position < self.input.len() && self.input[self.position] == b']' {
self.position += 1;
self.depth -= 1;
return Ok(LazyJsonValue::ArraySlice(&self.input[start..self.position]));
}
let mut first = true;
while self.position < self.input.len() && self.input[self.position] != b']' {
if !first {
self.expect_char(b',')?;
self.skip_whitespace();
}
first = false;
let _element = self.parse_value()?;
self.skip_whitespace();
}
self.expect_char(b']')?;
self.depth -= 1;
Ok(LazyJsonValue::ArraySlice(&self.input[start..self.position]))
}
fn parse_boolean(&mut self) -> DomainResult<LazyJsonValue<'a>> {
if self.position + 4 <= self.input.len()
&& &self.input[self.position..self.position + 4] == b"true"
{
self.position += 4;
Ok(LazyJsonValue::Boolean(true))
} else if self.position + 5 <= self.input.len()
&& &self.input[self.position..self.position + 5] == b"false"
{
self.position += 5;
Ok(LazyJsonValue::Boolean(false))
} else {
Err(DomainError::InvalidInput(
"Invalid boolean value".to_string(),
))
}
}
fn parse_null(&mut self) -> DomainResult<LazyJsonValue<'a>> {
if self.position + 4 <= self.input.len()
&& &self.input[self.position..self.position + 4] == b"null"
{
self.position += 4;
Ok(LazyJsonValue::Null)
} else {
Err(DomainError::InvalidInput("Invalid null value".to_string()))
}
}
fn parse_number(&mut self) -> DomainResult<LazyJsonValue<'a>> {
let start = self.position;
if self.input[self.position] == b'-' {
self.position += 1;
}
if self.position >= self.input.len() {
return Err(DomainError::InvalidInput("Invalid number".to_string()));
}
if self.input[self.position] == b'0' {
self.position += 1;
} else if self.input[self.position].is_ascii_digit() {
while self.position < self.input.len() && self.input[self.position].is_ascii_digit() {
self.position += 1;
}
} else {
return Err(DomainError::InvalidInput("Invalid number".to_string()));
}
if self.position < self.input.len() && self.input[self.position] == b'.' {
self.position += 1;
if self.position >= self.input.len() || !self.input[self.position].is_ascii_digit() {
return Err(DomainError::InvalidInput(
"Invalid number: missing digits after decimal".to_string(),
));
}
while self.position < self.input.len() && self.input[self.position].is_ascii_digit() {
self.position += 1;
}
}
if self.position < self.input.len()
&& (self.input[self.position] == b'e' || self.input[self.position] == b'E')
{
self.position += 1;
if self.position < self.input.len()
&& (self.input[self.position] == b'+' || self.input[self.position] == b'-')
{
self.position += 1;
}
if self.position >= self.input.len() || !self.input[self.position].is_ascii_digit() {
return Err(DomainError::InvalidInput(
"Invalid number: missing digits in exponent".to_string(),
));
}
while self.position < self.input.len() && self.input[self.position].is_ascii_digit() {
self.position += 1;
}
}
let number_slice = &self.input[start..self.position];
Ok(LazyJsonValue::NumberSlice(number_slice))
}
fn skip_whitespace(&mut self) {
while self.position < self.input.len() {
match self.input[self.position] {
b' ' | b'\t' | b'\n' | b'\r' => {
self.position += 1;
}
_ => break,
}
}
}
fn expect_char(&mut self, ch: u8) -> DomainResult<()> {
if self.position >= self.input.len() || self.input[self.position] != ch {
let ch_char = ch as char;
return Err(DomainError::InvalidInput(format!("Expected '{ch_char}'")));
}
self.position += 1;
Ok(())
}
fn unescape_string(&self, input: &[u8]) -> DomainResult<String> {
let mut result = Vec::with_capacity(input.len());
let mut i = 0;
while i < input.len() {
if input[i] == b'\\' && i + 1 < input.len() {
match input[i + 1] {
b'"' => result.push(b'"'),
b'\\' => result.push(b'\\'),
b'/' => result.push(b'/'),
b'b' => result.push(b'\x08'),
b'f' => result.push(b'\x0C'),
b'n' => result.push(b'\n'),
b'r' => result.push(b'\r'),
b't' => result.push(b'\t'),
b'u' => {
if i + 5 < input.len() {
i += 6;
continue;
} else {
return Err(DomainError::InvalidInput(
"Invalid unicode escape".to_string(),
));
}
}
_ => {
return Err(DomainError::InvalidInput(
"Invalid escape sequence".to_string(),
));
}
}
i += 2;
} else {
result.push(input[i]);
i += 1;
}
}
String::from_utf8(result)
.map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))
}
}
impl<'a> LazyParser<'a> for ZeroCopyParser<'a> {
type Output = LazyJsonValue<'a>;
type Error = DomainError;
fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error> {
self.validator
.validate_input_size(input.len())
.map_err(|e| DomainError::SecurityViolation(e.to_string()))?;
self.input = input;
self.position = 0;
self.depth = 0;
self.parse_value()
}
fn remaining(&self) -> &'a [u8] {
if self.position < self.input.len() {
&self.input[self.position..]
} else {
&[]
}
}
fn is_complete(&self) -> bool {
self.position >= self.input.len()
}
fn reset(&mut self) {
self.input = &[];
self.position = 0;
self.depth = 0;
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum LazyJsonValue<'a> {
StringBorrowed(&'a [u8]),
StringOwned(String),
NumberSlice(&'a [u8]),
Boolean(bool),
Null,
ObjectSlice(&'a [u8]),
ArraySlice(&'a [u8]),
}
impl<'a> LazyJsonValue<'a> {
pub fn value_type(&self) -> ValueType {
match self {
LazyJsonValue::StringBorrowed(_) | LazyJsonValue::StringOwned(_) => ValueType::String,
LazyJsonValue::NumberSlice(_) => ValueType::Number,
LazyJsonValue::Boolean(_) => ValueType::Boolean,
LazyJsonValue::Null => ValueType::Null,
LazyJsonValue::ObjectSlice(_) => ValueType::Object,
LazyJsonValue::ArraySlice(_) => ValueType::Array,
}
}
pub fn to_string_lossy(&self) -> String {
match self {
LazyJsonValue::StringBorrowed(bytes) => String::from_utf8_lossy(bytes).to_string(),
LazyJsonValue::StringOwned(s) => s.clone(),
LazyJsonValue::NumberSlice(bytes) => String::from_utf8_lossy(bytes).to_string(),
LazyJsonValue::Boolean(b) => b.to_string(),
LazyJsonValue::Null => "null".to_string(),
LazyJsonValue::ObjectSlice(bytes) => String::from_utf8_lossy(bytes).to_string(),
LazyJsonValue::ArraySlice(bytes) => String::from_utf8_lossy(bytes).to_string(),
}
}
pub fn as_str(&self) -> DomainResult<&str> {
match self {
LazyJsonValue::StringBorrowed(bytes) => from_utf8(bytes)
.map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}"))),
LazyJsonValue::StringOwned(s) => Ok(s.as_str()),
_ => Err(DomainError::InvalidInput(
"Value is not a string".to_string(),
)),
}
}
pub fn as_number(&self) -> DomainResult<f64> {
match self {
LazyJsonValue::NumberSlice(bytes) => {
let s = from_utf8(bytes)
.map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))?;
s.parse::<f64>()
.map_err(|e| DomainError::InvalidInput(format!("Invalid number: {e}")))
}
_ => Err(DomainError::InvalidInput(
"Value is not a number".to_string(),
)),
}
}
pub fn as_boolean(&self) -> DomainResult<bool> {
match self {
LazyJsonValue::Boolean(b) => Ok(*b),
_ => Err(DomainError::InvalidInput(
"Value is not a boolean".to_string(),
)),
}
}
pub fn is_null(&self) -> bool {
matches!(self, LazyJsonValue::Null)
}
pub fn as_bytes(&self) -> Option<&'a [u8]> {
match self {
LazyJsonValue::StringBorrowed(bytes) => Some(bytes),
LazyJsonValue::NumberSlice(bytes) => Some(bytes),
LazyJsonValue::ObjectSlice(bytes) => Some(bytes),
LazyJsonValue::ArraySlice(bytes) => Some(bytes),
_ => None,
}
}
pub fn memory_usage(&self) -> MemoryUsage {
match self {
LazyJsonValue::StringBorrowed(bytes) => MemoryUsage {
allocated_bytes: 0,
referenced_bytes: bytes.len(),
},
LazyJsonValue::StringOwned(s) => MemoryUsage {
allocated_bytes: s.len(),
referenced_bytes: 0,
},
LazyJsonValue::NumberSlice(bytes) => MemoryUsage {
allocated_bytes: 0,
referenced_bytes: bytes.len(),
},
LazyJsonValue::Boolean(val) => MemoryUsage {
allocated_bytes: 0,
referenced_bytes: if *val { 4 } else { 5 }, },
LazyJsonValue::Null => MemoryUsage {
allocated_bytes: 0,
referenced_bytes: 4, },
LazyJsonValue::ObjectSlice(bytes) => MemoryUsage {
allocated_bytes: 0,
referenced_bytes: bytes.len(),
},
LazyJsonValue::ArraySlice(bytes) => MemoryUsage {
allocated_bytes: 0,
referenced_bytes: bytes.len(),
},
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct MemoryUsage {
pub allocated_bytes: usize,
pub referenced_bytes: usize,
}
impl MemoryUsage {
pub fn total(&self) -> usize {
self.allocated_bytes + self.referenced_bytes
}
pub fn efficiency(&self) -> f64 {
if self.total() == 0 {
1.0
} else {
self.referenced_bytes as f64 / self.total() as f64
}
}
}
pub struct IncrementalParser<'a> {
buffer: Vec<u8>,
_phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> Default for IncrementalParser<'a> {
fn default() -> Self {
Self::new()
}
}
impl<'a> IncrementalParser<'a> {
pub fn new() -> Self {
Self {
buffer: Vec::with_capacity(8192), _phantom: std::marker::PhantomData,
}
}
pub fn feed(&mut self, data: &[u8]) -> DomainResult<()> {
self.buffer.extend_from_slice(data);
Ok(())
}
pub fn parse_available(&mut self) -> DomainResult<Vec<LazyJsonValue<'_>>> {
if !self.buffer.is_empty() {
let mut parser = ZeroCopyParser::new();
match parser.parse_lazy(&self.buffer) {
Ok(_value) => {
self.buffer.clear();
Ok(vec![])
}
Err(_e) => Ok(vec![]), }
} else {
Ok(vec![])
}
}
pub fn has_complete_value(&self) -> bool {
!self.buffer.is_empty()
}
}
impl<'a> Default for ZeroCopyParser<'a> {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_string() {
let mut parser = ZeroCopyParser::new();
let input = br#""hello world""#;
let result = parser.parse_lazy(input).unwrap();
match result {
LazyJsonValue::StringBorrowed(bytes) => {
assert_eq!(bytes, b"hello world");
}
_ => panic!("Expected string"),
}
}
#[test]
fn test_parse_escaped_string() {
let mut parser = ZeroCopyParser::new();
let input = br#""hello \"world\"""#;
let result = parser.parse_lazy(input).unwrap();
match result {
LazyJsonValue::StringOwned(s) => {
assert_eq!(s, "hello \"world\"");
}
_ => panic!("Expected owned string due to escapes"),
}
}
#[test]
fn test_parse_number() {
let mut parser = ZeroCopyParser::new();
let input = b"123.45";
let result = parser.parse_lazy(input).unwrap();
match result {
LazyJsonValue::NumberSlice(bytes) => {
assert_eq!(bytes, b"123.45");
assert_eq!(result.as_number().unwrap(), 123.45);
}
_ => panic!("Expected number"),
}
}
#[test]
fn test_parse_boolean() {
let mut parser = ZeroCopyParser::new();
let result = parser.parse_lazy(b"true").unwrap();
assert_eq!(result, LazyJsonValue::Boolean(true));
parser.reset();
let result = parser.parse_lazy(b"false").unwrap();
assert_eq!(result, LazyJsonValue::Boolean(false));
}
#[test]
fn test_parse_null() {
let mut parser = ZeroCopyParser::new();
let result = parser.parse_lazy(b"null").unwrap();
assert_eq!(result, LazyJsonValue::Null);
assert!(result.is_null());
}
#[test]
fn test_parse_empty_object() {
let mut parser = ZeroCopyParser::new();
let result = parser.parse_lazy(b"{}").unwrap();
match result {
LazyJsonValue::ObjectSlice(bytes) => {
assert_eq!(bytes, b"{}");
}
_ => panic!("Expected object"),
}
}
#[test]
fn test_parse_empty_array() {
let mut parser = ZeroCopyParser::new();
let result = parser.parse_lazy(b"[]").unwrap();
match result {
LazyJsonValue::ArraySlice(bytes) => {
assert_eq!(bytes, b"[]");
}
_ => panic!("Expected array"),
}
}
#[test]
fn test_memory_usage() {
let mut parser = ZeroCopyParser::new();
let result1 = parser.parse_lazy(br#""hello""#).unwrap();
let usage1 = result1.memory_usage();
assert_eq!(usage1.allocated_bytes, 0);
assert_eq!(usage1.referenced_bytes, 5);
assert_eq!(usage1.efficiency(), 1.0);
parser.reset();
let result2 = parser.parse_lazy(br#""he\"llo""#).unwrap();
let usage2 = result2.memory_usage();
assert!(usage2.allocated_bytes > 0);
assert_eq!(usage2.referenced_bytes, 0);
assert_eq!(usage2.efficiency(), 0.0);
}
#[test]
fn test_complex_object() {
let mut parser = ZeroCopyParser::new();
let input = br#"{"name": "test", "value": 42, "active": true}"#;
let result = parser.parse_lazy(input).unwrap();
match result {
LazyJsonValue::ObjectSlice(bytes) => {
assert_eq!(bytes.len(), input.len());
}
_ => panic!("Expected object"),
}
}
#[test]
fn test_parser_reuse() {
let mut parser = ZeroCopyParser::new();
let result1 = parser.parse_lazy(b"123").unwrap();
assert!(matches!(result1, LazyJsonValue::NumberSlice(_)));
parser.reset();
let result2 = parser.parse_lazy(br#""hello""#).unwrap();
assert!(matches!(result2, LazyJsonValue::StringBorrowed(_)));
}
#[test]
fn test_escape_sequence_slash() {
let mut parser = ZeroCopyParser::new();
let input = br#""path\/to\/file""#;
let result = parser.parse_lazy(input).unwrap();
match result {
LazyJsonValue::StringOwned(s) => {
assert_eq!(s, "path/to/file");
}
_ => panic!("Expected owned string due to escapes"),
}
}
#[test]
fn test_escape_sequence_backspace() {
let mut parser = ZeroCopyParser::new();
let input = br#""text\bwith\bbackspace""#;
let result = parser.parse_lazy(input).unwrap();
match result {
LazyJsonValue::StringOwned(s) => {
assert_eq!(s, "text\x08with\x08backspace");
}
_ => panic!("Expected owned string due to escapes"),
}
}
#[test]
fn test_escape_sequence_formfeed() {
let mut parser = ZeroCopyParser::new();
let input = br#""text\fwith\fformfeed""#;
let result = parser.parse_lazy(input).unwrap();
match result {
LazyJsonValue::StringOwned(s) => {
assert_eq!(s, "text\x0Cwith\x0Cformfeed");
}
_ => panic!("Expected owned string due to escapes"),
}
}
#[test]
fn test_escape_sequence_unicode_basic() {
let mut parser = ZeroCopyParser::new();
let input = br#""text\u0041""#;
let result = parser.parse_lazy(input);
assert!(result.is_ok());
}
#[test]
fn test_number_parsing_partial() {
let mut parser = ZeroCopyParser::new();
let result = parser.parse_lazy(b"123");
assert!(result.is_ok());
assert!(matches!(result.unwrap(), LazyJsonValue::NumberSlice(_)));
}
#[test]
fn test_number_parsing_error_overflow() {
let mut parser = ZeroCopyParser::new();
let input = b"99999999999999999999999999999999999999999999999999";
let result = parser.parse_lazy(input);
assert!(result.is_ok() || result.is_err());
}
#[test]
fn test_incremental_parser_feed() {
let mut parser = IncrementalParser::new();
let result = parser.feed(b"{\"key\":");
assert!(result.is_ok());
let result2 = parser.feed(b"\"value\"}");
assert!(result2.is_ok());
}
#[test]
fn test_incremental_parser_multiple_feeds() {
let mut parser = IncrementalParser::new();
parser.feed(b"[1,").unwrap();
parser.feed(b"2,").unwrap();
parser.feed(b"3]").unwrap();
}
#[test]
fn test_lazy_json_value_matches() {
let num = LazyJsonValue::NumberSlice(b"123");
assert!(matches!(num, LazyJsonValue::NumberSlice(_)));
assert!(!num.is_null());
let null = LazyJsonValue::Null;
assert!(null.is_null());
assert!(!matches!(null, LazyJsonValue::NumberSlice(_)));
let bool_val = LazyJsonValue::Boolean(true);
assert!(matches!(bool_val, LazyJsonValue::Boolean(true)));
assert!(!bool_val.is_null());
}
#[test]
fn test_memory_usage_zero_copy_efficiency() {
let borrowed = LazyJsonValue::StringBorrowed(b"test");
let usage = borrowed.memory_usage();
assert_eq!(usage.efficiency(), 1.0);
assert_eq!(usage.allocated_bytes, 0);
let owned = LazyJsonValue::StringOwned("test".to_string());
let usage2 = owned.memory_usage();
assert_eq!(usage2.efficiency(), 0.0);
assert!(usage2.allocated_bytes > 0);
}
}