pjson_rs/parser/
scanner.rs1use crate::{Result, semantic::NumericDType};
4use smallvec::SmallVec;
5
6pub trait JsonScanner {
8 fn scan(&self, input: &[u8]) -> Result<ScanResult>;
10
11 fn supports_simd(&self) -> bool;
13
14 fn parse_numeric_array(
16 &self,
17 input: &[u8],
18 dtype: NumericDType,
19 length: Option<usize>,
20 ) -> Result<crate::parser::JsonValue<'_>>;
21
22 fn find_strings(&self, input: &[u8]) -> Result<Vec<StringLocation>>;
24
25 fn find_structural_chars(&self, input: &[u8]) -> Result<Vec<usize>>;
27}
28
29#[derive(Debug, Clone)]
31pub struct ScanResult {
32 pub structural_chars: Vec<usize>,
34 pub string_bounds: SmallVec<[Range; 16]>,
36 pub number_bounds: SmallVec<[Range; 16]>,
38 pub literal_bounds: SmallVec<[Range; 8]>,
40 pub root_type: Option<crate::parser::ValueType>,
42}
43
44#[derive(Debug, Clone, Copy)]
46pub struct Range {
47 pub start: usize,
48 pub end: usize,
49}
50
51#[derive(Debug, Clone)]
53pub struct StringLocation {
54 pub start: usize,
56 pub end: usize,
58 pub has_escapes: bool,
60 pub unescaped_len: Option<usize>,
62}
63
64impl ScanResult {
65 pub fn new() -> Self {
67 Self {
68 structural_chars: Vec::new(),
69 string_bounds: SmallVec::new(),
70 number_bounds: SmallVec::new(),
71 literal_bounds: SmallVec::new(),
72 root_type: None,
73 }
74 }
75
76 pub fn determine_root_type(&self) -> crate::parser::ValueType {
78 if let Some(root_type) = self.root_type {
79 return root_type;
80 }
81
82 if !self.string_bounds.is_empty() {
84 crate::parser::ValueType::String
85 } else if !self.number_bounds.is_empty() {
86 crate::parser::ValueType::Number
87 } else if !self.literal_bounds.is_empty() {
88 crate::parser::ValueType::Boolean } else {
90 crate::parser::ValueType::Object }
92 }
93
94 pub fn is_numeric_array(&self) -> bool {
96 self.structural_chars
98 .first()
99 .map_or(false, |&c| c as u8 == b'[')
100 && self.number_bounds.len() > 4
101 && self.string_bounds.len() < 2
102 }
103
104 pub fn is_table_like(&self) -> bool {
106 self.structural_chars
108 .first()
109 .map_or(false, |&c| c as u8 == b'[')
110 && self.count_object_starts() > 2
111 && self.string_bounds.len() > self.number_bounds.len()
112 }
113
114 fn count_object_starts(&self) -> usize {
116 self.structural_chars
117 .iter()
118 .filter(|&&pos| pos as u8 == b'{')
119 .count()
120 }
121}
122
123impl Default for ScanResult {
124 fn default() -> Self {
125 Self::new()
126 }
127}
128
129impl Range {
130 pub fn new(start: usize, end: usize) -> Self {
132 Self { start, end }
133 }
134
135 pub fn len(&self) -> usize {
137 self.end.saturating_sub(self.start)
138 }
139
140 pub fn is_empty(&self) -> bool {
142 self.len() == 0
143 }
144}
145
146impl StringLocation {
147 pub fn new(start: usize, end: usize) -> Self {
149 Self {
150 start,
151 end,
152 has_escapes: false,
153 unescaped_len: None,
154 }
155 }
156
157 pub fn with_escapes(start: usize, end: usize, has_escapes: bool) -> Self {
159 Self {
160 start,
161 end,
162 has_escapes,
163 unescaped_len: None,
164 }
165 }
166
167 pub fn len(&self) -> usize {
169 self.end.saturating_sub(self.start)
170 }
171
172 pub fn is_empty(&self) -> bool {
174 self.len() == 0
175 }
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181
182 #[test]
183 fn test_scan_result_creation() {
184 let result = ScanResult::new();
185 assert!(result.structural_chars.is_empty());
186 assert!(result.string_bounds.is_empty());
187 }
188
189 #[test]
190 fn test_range_operations() {
191 let range = Range::new(10, 20);
192 assert_eq!(range.len(), 10);
193 assert!(!range.is_empty());
194 }
195
196 #[test]
197 fn test_string_location() {
198 let loc = StringLocation::new(5, 15);
199 assert_eq!(loc.len(), 10);
200 assert!(!loc.has_escapes);
201 }
202}