use crate::{Result, semantic::NumericDType};
use smallvec::SmallVec;
pub trait JsonScanner {
fn scan(&self, input: &[u8]) -> Result<ScanResult>;
fn supports_simd(&self) -> bool;
fn parse_numeric_array(
&self,
input: &[u8],
dtype: NumericDType,
length: Option<usize>,
) -> Result<crate::parser::JsonValue<'_>>;
fn find_strings(&self, input: &[u8]) -> Result<Vec<StringLocation>>;
fn find_structural_chars(&self, input: &[u8]) -> Result<Vec<usize>>;
}
#[derive(Debug, Clone)]
pub struct ScanResult {
pub structural_chars: Vec<usize>,
pub string_bounds: SmallVec<[Range; 16]>,
pub number_bounds: SmallVec<[Range; 16]>,
pub literal_bounds: SmallVec<[Range; 8]>,
pub root_type: Option<crate::parser::ValueType>,
}
#[derive(Debug, Clone, Copy)]
pub struct Range {
pub start: usize,
pub end: usize,
}
#[derive(Debug, Clone)]
pub struct StringLocation {
pub start: usize,
pub end: usize,
pub has_escapes: bool,
pub unescaped_len: Option<usize>,
}
impl ScanResult {
pub fn new() -> Self {
Self {
structural_chars: Vec::new(),
string_bounds: SmallVec::new(),
number_bounds: SmallVec::new(),
literal_bounds: SmallVec::new(),
root_type: None,
}
}
pub fn determine_root_type(&self) -> crate::parser::ValueType {
if let Some(root_type) = self.root_type {
return root_type;
}
if !self.string_bounds.is_empty() {
crate::parser::ValueType::String
} else if !self.number_bounds.is_empty() {
crate::parser::ValueType::Number
} else if !self.literal_bounds.is_empty() {
crate::parser::ValueType::Boolean } else {
crate::parser::ValueType::Object }
}
pub fn is_numeric_array(&self) -> bool {
self.structural_chars
.first()
.is_some_and(|&c| c as u8 == b'[')
&& self.number_bounds.len() > 4
&& self.string_bounds.len() < 2
}
pub fn is_table_like(&self) -> bool {
self.structural_chars
.first()
.is_some_and(|&c| c as u8 == b'[')
&& self.count_object_starts() > 2
&& self.string_bounds.len() > self.number_bounds.len()
}
fn count_object_starts(&self) -> usize {
self.structural_chars
.iter()
.filter(|&&pos| pos as u8 == b'{')
.count()
}
}
impl Default for ScanResult {
fn default() -> Self {
Self::new()
}
}
impl Range {
pub fn new(start: usize, end: usize) -> Self {
Self { start, end }
}
pub fn len(&self) -> usize {
self.end.saturating_sub(self.start)
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
impl StringLocation {
pub fn new(start: usize, end: usize) -> Self {
Self {
start,
end,
has_escapes: false,
unescaped_len: None,
}
}
pub fn with_escapes(start: usize, end: usize, has_escapes: bool) -> Self {
Self {
start,
end,
has_escapes,
unescaped_len: None,
}
}
pub fn len(&self) -> usize {
self.end.saturating_sub(self.start)
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_scan_result_creation() {
let result = ScanResult::new();
assert!(result.structural_chars.is_empty());
assert!(result.string_bounds.is_empty());
}
#[test]
fn test_range_operations() {
let range = Range::new(10, 20);
assert_eq!(range.len(), 10);
assert!(!range.is_empty());
}
#[test]
fn test_string_location() {
let loc = StringLocation::new(5, 15);
assert_eq!(loc.len(), 10);
assert!(!loc.has_escapes);
}
}