pjson_rs/parser/
scanner.rs1use crate::{Result, semantic::NumericDType};
4use smallvec::SmallVec;
5
6pub trait JsonScanner {
8 fn scan(&self, input: &[u8]) -> Result<ScanResult>;
10
11 fn supports_simd(&self) -> bool;
13
14 fn parse_numeric_array(
16 &self,
17 input: &[u8],
18 dtype: NumericDType,
19 length: Option<usize>,
20 ) -> Result<crate::parser::JsonValue<'_>>;
21
22 fn find_strings(&self, input: &[u8]) -> Result<Vec<StringLocation>>;
24
25 fn find_structural_chars(&self, input: &[u8]) -> Result<Vec<usize>>;
27}
28
29#[derive(Debug, Clone)]
31pub struct ScanResult {
32 pub structural_chars: Vec<usize>,
34 pub string_bounds: SmallVec<[Range; 16]>,
36 pub number_bounds: SmallVec<[Range; 16]>,
38 pub literal_bounds: SmallVec<[Range; 8]>,
40 pub root_type: Option<crate::parser::ValueType>,
42}
43
44#[derive(Debug, Clone, Copy)]
46pub struct Range {
47 pub start: usize,
49 pub end: usize,
51}
52
53#[derive(Debug, Clone)]
55pub struct StringLocation {
56 pub start: usize,
58 pub end: usize,
60 pub has_escapes: bool,
62 pub unescaped_len: Option<usize>,
64}
65
66impl ScanResult {
67 pub fn new() -> Self {
69 Self {
70 structural_chars: Vec::new(),
71 string_bounds: SmallVec::new(),
72 number_bounds: SmallVec::new(),
73 literal_bounds: SmallVec::new(),
74 root_type: None,
75 }
76 }
77
78 pub fn determine_root_type(&self) -> crate::parser::ValueType {
80 if let Some(root_type) = self.root_type {
81 return root_type;
82 }
83
84 if !self.string_bounds.is_empty() {
86 crate::parser::ValueType::String
87 } else if !self.number_bounds.is_empty() {
88 crate::parser::ValueType::Number
89 } else if !self.literal_bounds.is_empty() {
90 crate::parser::ValueType::Boolean } else {
92 crate::parser::ValueType::Object }
94 }
95
96 pub fn is_numeric_array(&self) -> bool {
98 self.structural_chars
100 .first()
101 .is_some_and(|&c| c as u8 == b'[')
102 && self.number_bounds.len() > 4
103 && self.string_bounds.len() < 2
104 }
105
106 pub fn is_table_like(&self) -> bool {
108 self.structural_chars
110 .first()
111 .is_some_and(|&c| c as u8 == b'[')
112 && self.count_object_starts() > 2
113 && self.string_bounds.len() > self.number_bounds.len()
114 }
115
116 fn count_object_starts(&self) -> usize {
118 self.structural_chars
119 .iter()
120 .filter(|&&pos| pos as u8 == b'{')
121 .count()
122 }
123}
124
125impl Default for ScanResult {
126 fn default() -> Self {
127 Self::new()
128 }
129}
130
131impl Range {
132 pub fn new(start: usize, end: usize) -> Self {
134 Self { start, end }
135 }
136
137 pub fn len(&self) -> usize {
139 self.end.saturating_sub(self.start)
140 }
141
142 pub fn is_empty(&self) -> bool {
144 self.len() == 0
145 }
146}
147
148impl StringLocation {
149 pub fn new(start: usize, end: usize) -> Self {
151 Self {
152 start,
153 end,
154 has_escapes: false,
155 unescaped_len: None,
156 }
157 }
158
159 pub fn with_escapes(start: usize, end: usize, has_escapes: bool) -> Self {
161 Self {
162 start,
163 end,
164 has_escapes,
165 unescaped_len: None,
166 }
167 }
168
169 pub fn len(&self) -> usize {
171 self.end.saturating_sub(self.start)
172 }
173
174 pub fn is_empty(&self) -> bool {
176 self.len() == 0
177 }
178}
179
180#[cfg(test)]
181mod tests {
182 use super::*;
183
184 #[test]
185 fn test_scan_result_creation() {
186 let result = ScanResult::new();
187 assert!(result.structural_chars.is_empty());
188 assert!(result.string_bounds.is_empty());
189 }
190
191 #[test]
192 fn test_range_operations() {
193 let range = Range::new(10, 20);
194 assert_eq!(range.len(), 10);
195 assert!(!range.is_empty());
196 }
197
198 #[test]
199 fn test_string_location() {
200 let loc = StringLocation::new(5, 15);
201 assert_eq!(loc.len(), 10);
202 assert!(!loc.has_escapes);
203 }
204}