pjson_rs/parser/
scanner.rs

1//! JSON scanning interface and common types
2
3use crate::{Result, semantic::NumericDType};
4use smallvec::SmallVec;
5
6/// Main scanning interface implemented by SIMD and scalar scanners
7pub trait JsonScanner {
8    /// Scan JSON input and return structural information
9    fn scan(&self, input: &[u8]) -> Result<ScanResult>;
10
11    /// Check if this scanner supports SIMD operations
12    fn supports_simd(&self) -> bool;
13
14    /// Parse numeric array with SIMD optimization if available
15    fn parse_numeric_array(
16        &self,
17        input: &[u8],
18        dtype: NumericDType,
19        length: Option<usize>,
20    ) -> Result<crate::parser::JsonValue<'_>>;
21
22    /// Find all string boundaries in the input
23    fn find_strings(&self, input: &[u8]) -> Result<Vec<StringLocation>>;
24
25    /// Find structural characters ({}[],:) positions
26    fn find_structural_chars(&self, input: &[u8]) -> Result<Vec<usize>>;
27}
28
29/// Result of scanning JSON input
30#[derive(Debug, Clone)]
31pub struct ScanResult {
32    /// Positions of structural characters
33    pub structural_chars: Vec<usize>,
34    /// String boundary positions
35    pub string_bounds: SmallVec<[Range; 16]>,
36    /// Number boundary positions  
37    pub number_bounds: SmallVec<[Range; 16]>,
38    /// Literal boundary positions (true/false/null)
39    pub literal_bounds: SmallVec<[Range; 8]>,
40    /// Detected root value type
41    pub root_type: Option<crate::parser::ValueType>,
42}
43
44/// Range representing start and end positions
45#[derive(Debug, Clone, Copy)]
46pub struct Range {
47    pub start: usize,
48    pub end: usize,
49}
50
51/// String location with metadata
52#[derive(Debug, Clone)]
53pub struct StringLocation {
54    /// Start position of string (after opening quote)
55    pub start: usize,
56    /// End position of string (before closing quote)
57    pub end: usize,
58    /// Whether string contains escape sequences
59    pub has_escapes: bool,
60    /// Estimated length after unescaping
61    pub unescaped_len: Option<usize>,
62}
63
64impl ScanResult {
65    /// Create new empty scan result
66    pub fn new() -> Self {
67        Self {
68            structural_chars: Vec::new(),
69            string_bounds: SmallVec::new(),
70            number_bounds: SmallVec::new(),
71            literal_bounds: SmallVec::new(),
72            root_type: None,
73        }
74    }
75
76    /// Determine the root JSON value type
77    pub fn determine_root_type(&self) -> crate::parser::ValueType {
78        if let Some(root_type) = self.root_type {
79            return root_type;
80        }
81
82        // Simplified type detection
83        if !self.string_bounds.is_empty() {
84            crate::parser::ValueType::String
85        } else if !self.number_bounds.is_empty() {
86            crate::parser::ValueType::Number
87        } else if !self.literal_bounds.is_empty() {
88            crate::parser::ValueType::Boolean // or Null
89        } else {
90            crate::parser::ValueType::Object // Default
91        }
92    }
93
94    /// Check if this appears to be a numeric array
95    pub fn is_numeric_array(&self) -> bool {
96        // Heuristic: starts with '[', has many numbers, few strings
97        self.structural_chars
98            .first()
99            .map_or(false, |&c| c as u8 == b'[')
100            && self.number_bounds.len() > 4
101            && self.string_bounds.len() < 2
102    }
103
104    /// Check if this appears to be a table/object array
105    pub fn is_table_like(&self) -> bool {
106        // Heuristic: starts with '[', has balanced objects and strings
107        self.structural_chars
108            .first()
109            .map_or(false, |&c| c as u8 == b'[')
110            && self.count_object_starts() > 2
111            && self.string_bounds.len() > self.number_bounds.len()
112    }
113
114    /// Count opening braces to estimate object count
115    fn count_object_starts(&self) -> usize {
116        self.structural_chars
117            .iter()
118            .filter(|&&pos| pos as u8 == b'{')
119            .count()
120    }
121}
122
123impl Default for ScanResult {
124    fn default() -> Self {
125        Self::new()
126    }
127}
128
129impl Range {
130    /// Create new range
131    pub fn new(start: usize, end: usize) -> Self {
132        Self { start, end }
133    }
134
135    /// Get range length
136    pub fn len(&self) -> usize {
137        self.end.saturating_sub(self.start)
138    }
139
140    /// Check if range is empty
141    pub fn is_empty(&self) -> bool {
142        self.len() == 0
143    }
144}
145
146impl StringLocation {
147    /// Create new string location
148    pub fn new(start: usize, end: usize) -> Self {
149        Self {
150            start,
151            end,
152            has_escapes: false,
153            unescaped_len: None,
154        }
155    }
156
157    /// Create with escape information
158    pub fn with_escapes(start: usize, end: usize, has_escapes: bool) -> Self {
159        Self {
160            start,
161            end,
162            has_escapes,
163            unescaped_len: None,
164        }
165    }
166
167    /// Get string length in bytes
168    pub fn len(&self) -> usize {
169        self.end.saturating_sub(self.start)
170    }
171
172    /// Check if empty
173    pub fn is_empty(&self) -> bool {
174        self.len() == 0
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn test_scan_result_creation() {
184        let result = ScanResult::new();
185        assert!(result.structural_chars.is_empty());
186        assert!(result.string_bounds.is_empty());
187    }
188
189    #[test]
190    fn test_range_operations() {
191        let range = Range::new(10, 20);
192        assert_eq!(range.len(), 10);
193        assert!(!range.is_empty());
194    }
195
196    #[test]
197    fn test_string_location() {
198        let loc = StringLocation::new(5, 15);
199        assert_eq!(loc.len(), 10);
200        assert!(!loc.has_escapes);
201    }
202}