use crate::{Error, Result, SemanticMeta, Frame, FrameFlags};
use crate::semantic::{SemanticType, ProcessingStrategy};
use crate::security::SecurityValidator;
use crate::config::SecurityConfig;
use serde_json::{self, Value};
use std::time::Instant;
#[derive(Debug, Clone)]
pub struct SemanticConfig {
pub detect_semantics: bool,
pub min_chunk_size: usize,
pub max_chunk_size: usize,
pub enable_streaming: bool,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Complexity {
Simple,
Complex,
Nested,
LargeArray,
}
#[derive(Debug, Default)]
pub struct ParserMetrics {
pub throughput_mbps: f64,
pub latency_p99_us: f64,
pub memory_allocated_mb: f64,
pub cache_misses: u64,
pub operations_count: u64,
}
pub struct HybridParser {
metrics: ParserMetrics,
serde_parser: SerdeBackend,
simd_parser: Option<SimdBackend>,
thresholds: BackendThresholds,
validator: SecurityValidator,
}
#[derive(Debug)]
pub struct BackendThresholds {
pub simd_min_size: usize,
pub simd_max_complexity: u8,
pub simd_throughput_threshold: f64,
}
pub struct SerdeBackend;
pub struct SimdBackend {
_placeholder: (),
}
impl HybridParser {
pub fn new() -> Self {
Self {
metrics: ParserMetrics::default(),
serde_parser: SerdeBackend,
simd_parser: None, thresholds: BackendThresholds::default(),
validator: SecurityValidator::default(),
}
}
pub fn with_security_config(security_config: SecurityConfig) -> Self {
Self {
metrics: ParserMetrics::default(),
serde_parser: SerdeBackend,
simd_parser: None,
thresholds: BackendThresholds::default(),
validator: SecurityValidator::new(security_config),
}
}
pub fn parse(&mut self, input: &[u8]) -> Result<JsonValue<'_>> {
let start_time = Instant::now();
let backend = self.select_backend(input);
let result = match backend {
ParserBackend::Serde => self.parse_with_serde(input),
ParserBackend::Simd => {
if let Some(ref simd) = self.simd_parser {
simd.parse(input)
} else {
self.parse_with_serde(input)
}
}
ParserBackend::Validator => {
self.validate_only(input)?;
Ok(JsonValue::Raw(input))
}
};
let duration = start_time.elapsed();
self.update_metrics(input.len(), duration, result.is_ok());
result
}
pub fn parse_with_semantics(&mut self, input: &[u8], semantics: &SemanticMeta) -> Result<JsonValue<'_>> {
let backend = self.select_backend_with_semantics(input, semantics);
match backend {
ParserBackend::Simd => {
self.parse_semantic_simd(input, semantics)
}
_ => {
let mut value = self.parse(input)?;
self.apply_semantic_hints(&mut value, semantics);
Ok(value)
}
}
}
fn select_backend(&self, input: &[u8]) -> ParserBackend {
let complexity = self.estimate_complexity(input);
let size = input.len();
if size < self.thresholds.simd_min_size {
return ParserBackend::Serde;
}
match complexity {
Complexity::Simple | Complexity::LargeArray if self.simd_parser.is_some() => {
ParserBackend::Simd
}
_ => ParserBackend::Serde,
}
}
fn select_backend_with_semantics(&self, input: &[u8], semantics: &SemanticMeta) -> ParserBackend {
match semantics.processing_strategy() {
ProcessingStrategy::Simd if self.simd_parser.is_some() => {
ParserBackend::Simd
}
ProcessingStrategy::Generic => {
self.select_backend(input)
}
_ => ParserBackend::Serde,
}
}
fn estimate_complexity(&self, input: &[u8]) -> Complexity {
if input.len() < 100 {
return Complexity::Simple;
}
let mut brace_count = 0;
let mut bracket_count = 0;
let mut max_depth = 0;
let mut current_depth = 0;
let mut number_count = 0;
for &byte in input.iter().take(std::cmp::min(input.len(), 1024)) {
match byte {
b'{' => {
brace_count += 1;
current_depth += 1;
max_depth = max_depth.max(current_depth);
}
b'}' => {
current_depth = current_depth.saturating_sub(1);
}
b'[' => {
bracket_count += 1;
current_depth += 1;
max_depth = max_depth.max(current_depth);
}
b']' => {
current_depth = current_depth.saturating_sub(1);
}
b'0'..=b'9' => {
number_count += 1;
}
_ => {}
}
}
if max_depth > 5 {
Complexity::Nested
} else if bracket_count > 0 && number_count > bracket_count * 10 {
Complexity::LargeArray
} else if brace_count + bracket_count < 5 {
Complexity::Simple
} else {
Complexity::Complex
}
}
fn parse_with_serde(&self, input: &[u8]) -> Result<JsonValue<'_>> {
self.serde_parser.parse(input)
}
fn parse_semantic_simd(&self, input: &[u8], semantics: &SemanticMeta) -> Result<JsonValue<'_>> {
match &semantics.semantic_type {
SemanticType::NumericArray { .. } => {
self.parse_with_serde(input)
}
_ => {
self.parse_with_serde(input)
}
}
}
fn apply_semantic_hints(&self, _value: &mut JsonValue<'_>, _semantics: &SemanticMeta) {
}
fn validate_only(&self, input: &[u8]) -> Result<()> {
self.validator.validate_input_size(input.len())?;
serde_json::from_slice::<serde_json::Value>(input)
.map(|_| ())
.map_err(|e| Error::invalid_json(0, e.to_string()))
}
fn update_metrics(&mut self, input_size: usize, duration: std::time::Duration, success: bool) {
if !success {
return;
}
let duration_us = duration.as_micros() as f64;
let throughput = (input_size as f64) / (duration_us / 1_000_000.0) / 1_024_000.0;
let alpha = 0.1;
self.metrics.throughput_mbps = alpha * throughput + (1.0 - alpha) * self.metrics.throughput_mbps;
self.metrics.latency_p99_us = alpha * duration_us + (1.0 - alpha) * self.metrics.latency_p99_us;
self.metrics.operations_count += 1;
}
pub fn metrics(&self) -> &ParserMetrics {
&self.metrics
}
pub fn should_use_simd(&self, input_size: usize) -> bool {
input_size > self.thresholds.simd_min_size
&& self.metrics.throughput_mbps < self.thresholds.simd_throughput_threshold
&& self.simd_parser.is_some()
}
}
impl SerdeBackend {
pub fn parse(&self, input: &[u8]) -> Result<JsonValue<'_>> {
let value: serde_json::Value = serde_json::from_slice(input)
.map_err(|e| Error::invalid_json(0, e.to_string()))?;
match value {
serde_json::Value::String(_) => Ok(JsonValue::Raw(input)), serde_json::Value::Number(_) => Ok(JsonValue::Raw(input)),
serde_json::Value::Bool(b) => Ok(JsonValue::Bool(b)),
serde_json::Value::Null => Ok(JsonValue::Null),
_ => Ok(JsonValue::Raw(input)), }
}
}
impl SimdBackend {
pub fn parse(&self, input: &[u8]) -> Result<JsonValue<'_>> {
Ok(JsonValue::Raw(input))
}
}
impl Default for HybridParser {
fn default() -> Self {
Self::new()
}
}
impl Default for BackendThresholds {
fn default() -> Self {
Self {
simd_min_size: 4096, simd_max_complexity: 3, simd_throughput_threshold: 1000.0, }
}
}
impl ParserMetrics {
pub fn should_use_simd(&self, input_size: usize) -> bool {
input_size > 4096
&& self.throughput_mbps < 1000.0
&& self.latency_p99_us > 100.0
}
pub fn summary(&self) -> String {
format!(
"Throughput: {:.1} MB/s, Latency P99: {:.1}μs, Operations: {}",
self.throughput_mbps,
self.latency_p99_us,
self.operations_count
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hybrid_parser_creation() {
let parser = HybridParser::new();
assert_eq!(parser.metrics.operations_count, 0);
}
#[test]
fn test_complexity_estimation() {
let parser = HybridParser::new();
let simple = br#"{"key": "value"}"#;
assert_eq!(parser.estimate_complexity(simple), Complexity::Simple);
let array = b"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]";
let complexity = parser.estimate_complexity(array);
assert!(matches!(complexity, Complexity::Simple | Complexity::LargeArray));
}
#[test]
fn test_serde_backend() {
let backend = SerdeBackend;
let input = br#"{"hello": "world"}"#;
let result = backend.parse(input);
assert!(result.is_ok());
}
#[test]
fn test_backend_selection() {
let parser = HybridParser::new();
let small_input = b"{}";
assert_eq!(parser.select_backend(small_input), ParserBackend::Serde);
let large_input = vec![b'{'; 5000];
assert_eq!(parser.select_backend(&large_input), ParserBackend::Serde);
}
}