use crate::{
domain::{DomainError, DomainResult},
parser::{
ValueType,
buffer_pool::{BufferPool, BufferSize, PooledBuffer},
zero_copy::{LazyJsonValue, LazyParser, MemoryUsage},
},
};
use std::{marker::PhantomData, sync::Arc};
pub struct SimdZeroCopyParser<'a> {
buffer_pool: Arc<BufferPool>,
current_buffer: Option<PooledBuffer>,
input: &'a [u8],
position: usize,
depth: usize,
simd_enabled: bool,
_phantom: PhantomData<&'a ()>,
}
#[derive(Debug, Clone)]
pub struct SimdZeroCopyConfig {
pub max_depth: usize,
pub enable_simd: bool,
pub buffer_pool_config: Option<crate::parser::buffer_pool::PoolConfig>,
pub simd_threshold: usize,
pub track_memory_usage: bool,
}
#[derive(Debug)]
pub struct SimdParseResult<'a> {
pub value: LazyJsonValue<'a>,
pub memory_usage: MemoryUsage,
pub simd_used: bool,
pub processing_time_ns: u64,
}
#[derive(Debug, Clone)]
pub struct SimdParsingStats {
pub total_parses: u64,
pub simd_accelerated_parses: u64,
pub total_bytes_processed: u64,
pub average_processing_time_ns: u64,
pub simd_efficiency: f64,
}
impl<'a> Default for SimdZeroCopyParser<'a> {
fn default() -> Self {
Self::new()
}
}
impl<'a> SimdZeroCopyParser<'a> {
pub fn new() -> Self {
Self::with_config(SimdZeroCopyConfig::default())
}
pub fn with_config(config: SimdZeroCopyConfig) -> Self {
let buffer_pool = if let Some(pool_config) = config.buffer_pool_config {
Arc::new(BufferPool::with_config(pool_config))
} else {
Arc::new(BufferPool::new())
};
Self {
buffer_pool,
current_buffer: None,
input: &[],
position: 0,
depth: 0,
simd_enabled: config.enable_simd && Self::is_simd_available(),
_phantom: PhantomData,
}
}
pub fn parse_simd(&mut self, input: &'a [u8]) -> DomainResult<SimdParseResult<'a>> {
let start_time = std::time::Instant::now();
self.input = input;
self.position = 0;
self.depth = 0;
let use_simd = self.simd_enabled && input.len() >= 256;
let value = if use_simd {
self.parse_with_simd(input)?
} else {
self.parse_without_simd(input)?
};
let processing_time = start_time.elapsed().as_nanos() as u64;
let memory_usage = value.memory_usage();
Ok(SimdParseResult {
value,
memory_usage,
simd_used: use_simd,
processing_time_ns: processing_time,
})
}
fn parse_with_simd(&mut self, input: &'a [u8]) -> DomainResult<LazyJsonValue<'a>> {
let sonic_result = self.sonic_preprocess(input)?;
match sonic_result.value_type {
ValueType::Object => self.parse_simd_object(input, &sonic_result),
ValueType::Array => self.parse_simd_array(input, &sonic_result),
ValueType::String => self.parse_simd_string(input, &sonic_result),
ValueType::Number => self.parse_simd_number(input, &sonic_result),
ValueType::Boolean => self.parse_simd_boolean(input, &sonic_result),
ValueType::Null => Ok(LazyJsonValue::Null),
}
}
fn parse_without_simd(&mut self, input: &'a [u8]) -> DomainResult<LazyJsonValue<'a>> {
let mut zero_copy_parser = crate::parser::zero_copy::ZeroCopyParser::new();
zero_copy_parser.parse_lazy(input)
}
fn sonic_preprocess(&self, input: &[u8]) -> DomainResult<SonicStructuralInfo> {
if input.is_empty() {
return Err(DomainError::InvalidInput("Empty input".to_string()));
}
let mut pos = 0;
while pos < input.len() && input[pos].is_ascii_whitespace() {
pos += 1;
}
if pos >= input.len() {
return Err(DomainError::InvalidInput("Only whitespace".to_string()));
}
let value_type = match input[pos] {
b'{' => ValueType::Object,
b'[' => ValueType::Array,
b'"' => ValueType::String,
b't' | b'f' => ValueType::Boolean,
b'n' => ValueType::Null,
b'-' | b'0'..=b'9' => ValueType::Number,
_ => {
let ch = input[pos] as char;
return Err(DomainError::InvalidInput(format!(
"Invalid JSON start character: {ch}"
)));
}
};
Ok(SonicStructuralInfo {
value_type,
start_pos: pos,
has_escapes: self.detect_escapes(input),
is_simd_friendly: self.is_simd_friendly(input),
})
}
fn parse_simd_object(
&mut self,
input: &'a [u8],
info: &SonicStructuralInfo,
) -> DomainResult<LazyJsonValue<'a>> {
if info.is_simd_friendly {
self.simd_validate_object_structure(input)?;
}
Ok(LazyJsonValue::ObjectSlice(input))
}
fn parse_simd_array(
&mut self,
input: &'a [u8],
info: &SonicStructuralInfo,
) -> DomainResult<LazyJsonValue<'a>> {
if info.is_simd_friendly {
self.simd_validate_array_structure(input)?;
}
Ok(LazyJsonValue::ArraySlice(input))
}
fn parse_simd_string(
&mut self,
input: &'a [u8],
info: &SonicStructuralInfo,
) -> DomainResult<LazyJsonValue<'a>> {
if !info.has_escapes {
let start = info.start_pos + 1; let end = input.len() - 1; Ok(LazyJsonValue::StringBorrowed(&input[start..end]))
} else {
let unescaped = self.simd_unescape_string(input)?;
Ok(LazyJsonValue::StringOwned(unescaped))
}
}
fn parse_simd_number(
&mut self,
input: &'a [u8],
_info: &SonicStructuralInfo,
) -> DomainResult<LazyJsonValue<'a>> {
if self.simd_enabled {
self.simd_validate_number(input)?;
}
Ok(LazyJsonValue::NumberSlice(input))
}
fn parse_simd_boolean(
&mut self,
input: &'a [u8],
_info: &SonicStructuralInfo,
) -> DomainResult<LazyJsonValue<'a>> {
if self.simd_enabled {
if input == b"true" {
return Ok(LazyJsonValue::Boolean(true));
} else if input == b"false" {
return Ok(LazyJsonValue::Boolean(false));
} else {
return Err(DomainError::InvalidInput(
"Invalid boolean value".to_string(),
));
}
}
match input {
b"true" => Ok(LazyJsonValue::Boolean(true)),
b"false" => Ok(LazyJsonValue::Boolean(false)),
_ => Err(DomainError::InvalidInput(
"Invalid boolean value".to_string(),
)),
}
}
fn simd_validate_object_structure(&self, input: &[u8]) -> DomainResult<()> {
let open_count = input.iter().filter(|&&c| c == b'{').count();
let close_count = input.iter().filter(|&&c| c == b'}').count();
if open_count == close_count && open_count > 0 {
Ok(())
} else {
Err(DomainError::InvalidInput(
"Unmatched braces in object".to_string(),
))
}
}
fn simd_validate_array_structure(&self, input: &[u8]) -> DomainResult<()> {
let open_count = input.iter().filter(|&&c| c == b'[').count();
let close_count = input.iter().filter(|&&c| c == b']').count();
if open_count == close_count && open_count > 0 {
Ok(())
} else {
Err(DomainError::InvalidInput(
"Unmatched brackets in array".to_string(),
))
}
}
fn simd_validate_number(&self, input: &[u8]) -> DomainResult<()> {
if input.is_empty() {
return Err(DomainError::InvalidInput("Empty number".to_string()));
}
let is_valid = input.iter().all(|&c| {
c.is_ascii_digit() || c == b'.' || c == b'-' || c == b'+' || c == b'e' || c == b'E'
});
if is_valid {
Ok(())
} else {
Err(DomainError::InvalidInput(
"Invalid number format".to_string(),
))
}
}
fn simd_unescape_string(&self, input: &[u8]) -> DomainResult<String> {
let mut result = Vec::with_capacity(input.len());
let mut i = 1;
while i < input.len() - 1 {
if input[i] == b'\\' && i + 1 < input.len() - 1 {
match input[i + 1] {
b'n' => result.push(b'\n'),
b'r' => result.push(b'\r'),
b't' => result.push(b'\t'),
b'\\' => result.push(b'\\'),
b'"' => result.push(b'"'),
c => result.push(c),
}
i += 2;
} else {
result.push(input[i]);
i += 1;
}
}
String::from_utf8(result)
.map_err(|e| DomainError::InvalidInput(format!("Invalid UTF-8: {e}")))
}
fn detect_escapes(&self, input: &[u8]) -> bool {
input.contains(&b'\\')
}
fn is_simd_friendly(&self, input: &[u8]) -> bool {
input.len() >= 32 && (input.as_ptr() as usize).is_multiple_of(32)
}
fn is_simd_available() -> bool {
#[cfg(target_arch = "x86_64")]
{
std::arch::is_x86_feature_detected!("avx2")
}
#[cfg(not(target_arch = "x86_64"))]
{
false
}
}
pub fn get_buffer(&mut self, min_size: usize) -> DomainResult<&mut PooledBuffer> {
if self.current_buffer.is_none()
|| self.current_buffer.as_ref().unwrap().capacity() < min_size
{
let size = BufferSize::for_capacity(min_size);
self.current_buffer = Some(self.buffer_pool.get_buffer(size)?);
}
Ok(self.current_buffer.as_mut().unwrap())
}
pub fn release_buffer(&mut self) {
self.current_buffer = None;
}
}
impl<'a> LazyParser<'a> for SimdZeroCopyParser<'a> {
type Output = SimdParseResult<'a>;
type Error = DomainError;
fn parse_lazy(&mut self, input: &'a [u8]) -> Result<Self::Output, Self::Error> {
self.parse_simd(input)
}
fn remaining(&self) -> &'a [u8] {
if self.position < self.input.len() {
&self.input[self.position..]
} else {
&[]
}
}
fn is_complete(&self) -> bool {
self.position >= self.input.len()
}
fn reset(&mut self) {
self.input = &[];
self.position = 0;
self.depth = 0;
self.release_buffer();
}
}
#[derive(Debug, Clone)]
struct SonicStructuralInfo {
value_type: ValueType,
start_pos: usize,
has_escapes: bool,
is_simd_friendly: bool,
}
impl Default for SimdZeroCopyConfig {
fn default() -> Self {
Self {
max_depth: 64,
enable_simd: true,
buffer_pool_config: None,
simd_threshold: 256,
track_memory_usage: true,
}
}
}
impl SimdZeroCopyConfig {
pub fn high_performance() -> Self {
Self {
max_depth: 128,
enable_simd: true,
buffer_pool_config: Some(crate::parser::buffer_pool::PoolConfig::simd_optimized()),
simd_threshold: 128, track_memory_usage: false, }
}
pub fn low_memory() -> Self {
Self {
max_depth: 32,
enable_simd: false,
buffer_pool_config: Some(crate::parser::buffer_pool::PoolConfig::low_memory()),
simd_threshold: 1024, track_memory_usage: true,
}
}
}
impl Default for SimdParsingStats {
fn default() -> Self {
Self {
total_parses: 0,
simd_accelerated_parses: 0,
total_bytes_processed: 0,
average_processing_time_ns: 0,
simd_efficiency: 0.0,
}
}
}
impl SimdParsingStats {
pub fn simd_usage_ratio(&self) -> f64 {
if self.total_parses == 0 {
0.0
} else {
self.simd_accelerated_parses as f64 / self.total_parses as f64
}
}
pub fn average_throughput_mbps(&self) -> f64 {
if self.average_processing_time_ns == 0 {
0.0
} else {
let seconds = self.average_processing_time_ns as f64 / 1_000_000_000.0;
let mb = self.total_bytes_processed as f64 / (1024.0 * 1024.0);
mb / seconds
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simd_parser_creation() {
let parser = SimdZeroCopyParser::new();
assert!(!parser.simd_enabled || SimdZeroCopyParser::is_simd_available());
}
#[test]
fn test_simple_parsing() {
let mut parser = SimdZeroCopyParser::new();
let input = br#""hello world""#;
let result = parser.parse_simd(input).unwrap();
match result.value {
LazyJsonValue::StringBorrowed(s) => {
assert_eq!(s, b"hello world");
}
_ => panic!("Expected string"),
}
}
#[test]
fn test_number_parsing() {
let mut parser = SimdZeroCopyParser::new();
let input = b"123.456";
let result = parser.parse_simd(input).unwrap();
match result.value {
LazyJsonValue::NumberSlice(n) => {
assert_eq!(n, b"123.456");
}
_ => panic!("Expected number"),
}
}
#[test]
fn test_boolean_parsing() {
let mut parser = SimdZeroCopyParser::new();
let result = parser.parse_simd(b"true").unwrap();
assert_eq!(result.value, LazyJsonValue::Boolean(true));
parser.reset();
let result = parser.parse_simd(b"false").unwrap();
assert_eq!(result.value, LazyJsonValue::Boolean(false));
}
#[test]
fn test_object_parsing() {
let mut parser = SimdZeroCopyParser::new();
let input = br#"{"key": "value", "number": 42}"#;
let result = parser.parse_simd(input).unwrap();
match result.value {
LazyJsonValue::ObjectSlice(obj) => {
assert_eq!(obj, input);
}
_ => panic!("Expected object"),
}
}
#[test]
fn test_memory_usage_tracking() {
let mut parser = SimdZeroCopyParser::new();
let input = br#""test string""#;
let result = parser.parse_simd(input).unwrap();
assert_eq!(result.memory_usage.allocated_bytes, 0); assert!(result.memory_usage.referenced_bytes > 0);
}
#[test]
fn test_buffer_pool_integration() {
let mut parser = SimdZeroCopyParser::new();
let buffer = parser.get_buffer(1024).unwrap();
assert!(buffer.capacity() >= 1024);
parser.release_buffer();
assert!(parser.current_buffer.is_none());
}
}