1use std::fmt::{Debug};
2use std::hash::{Hash, Hasher};
3
4use crate::lexer::Lexer;
5use crate::parser::Parser;
6use crate::serializer::{serialize_to_json, Value};
7
8pub mod parser;
9pub mod lexer;
10pub mod serializer;
11
12pub struct JSONParser {}
13
14#[derive(Clone)]
15pub struct ParseOptions {
16 pub parse_array: bool,
17 pub keep_object_raw_data: bool,
18 pub max_depth: u8,
19 pub start_parse_at: Option<String>,
20 pub start_depth: u8,
21 pub prefix: Option<String>,
22}
23
24impl Default for ParseOptions {
25 fn default() -> Self {
26 Self {
27 parse_array: true,
28 keep_object_raw_data: true,
29 max_depth: 10,
30 start_parse_at: None,
31 start_depth: 1,
32 prefix: None,
33 }
34 }
35}
36
37impl ParseOptions {
38 pub fn parse_array(mut self, parse_array: bool) -> Self {
39 self.parse_array = parse_array;
40 self
41 }
42
43 pub fn start_parse_at(mut self, pointer: String) -> Self {
44 self.start_parse_at = Some(pointer);
45 self
46 }
47 pub fn start_depth(mut self, depth: u8) -> Self {
48 self.start_depth = depth;
49 self
50 }
51 pub fn max_depth(mut self, max_depth: u8) -> Self {
52 self.max_depth = max_depth;
53 self
54 }
55 pub fn prefix(mut self, prefix: String) -> Self {
56 self.prefix = Some(prefix);
57 self
58 }
59 pub fn keep_object_raw_data(mut self, keep_object_raw_data: bool) -> Self {
60 self.keep_object_raw_data = keep_object_raw_data;
61 self
62 }
63}
64
65pub trait GetBytes {
66 fn get_bytes(&self) -> &[u8];
67}
68
69impl GetBytes for String {
70 fn get_bytes(&self) -> &[u8] {
71 self.as_bytes()
72 }
73}
74
75impl GetBytes for &str {
76 fn get_bytes(&self) -> &[u8] {
77 self.as_bytes()
78 }
79}
80
81#[derive(Debug, Clone)]
82pub struct JsonArrayEntries<V: Debug + Clone + AsRef<str> + GetBytes> {
83 pub entries: Vec<FlatJsonValue<V>>,
84 pub index: usize,
85}
86
87impl<V: Debug + Clone + AsRef<str> + GetBytes> JsonArrayEntries<V> {
88 pub fn entries(&self) -> &Vec<FlatJsonValue<V>> {
89 &self.entries
90 }
91 pub fn index(&self) -> usize {
92 self.index
93 }
94
95 pub fn find_node_at(&self, pointer: &str) -> Option<&FlatJsonValue<V>> {
96 self.entries().iter().find(|v| v.pointer.pointer.eq(pointer))
97 }
98}
99
100impl<V: Debug + Clone + AsRef<str> + GetBytes> Hash for JsonArrayEntries<V> {
101 fn hash<H: Hasher>(&self, state: &mut H) {
102 self.index.hash(state);
103 self.entries.len().hash(state);
104 }
105}
106
107
108#[derive(Debug, Default, Clone)]
109pub struct PointerKey {
110 pub pointer: String,
111 pub value_type: ValueType,
112 pub depth: u8, pub position: usize, pub column_id: usize, }
116
117impl PartialEq<Self> for PointerKey {
118 fn eq(&self, other: &Self) -> bool {
119 self.pointer.eq(&other.pointer)
120 }
121}
122
123impl Eq for PointerKey {}
124
125impl Hash for PointerKey {
126 fn hash<H: Hasher>(&self, state: &mut H) {
127 self.pointer.hash(state);
128 }
129}
130
131impl PointerKey {
132 pub fn parent(&self) -> &str {
133 let index = self.pointer.rfind('/').unwrap_or(0);
134
135 (if index == 0 {
136 "/"
137 } else {
138 &self.pointer[0..index]
139 }) as _
140 }
141}
142#[macro_export]
143macro_rules! concat_string {
144 () => { String::with_capacity(0) };
145 ($($s:expr),+) => {{
146 use std::ops::AddAssign;
147 let mut len = 0;
148 $(len.add_assign(AsRef::<str>::as_ref(&$s).len());)+
149 let mut buf = String::with_capacity(len);
150 $(buf.push_str($s.as_ref());)+
151 buf
152 }};
153}
154macro_rules! change_depth {
155 ($($t:ty, $func:ident, $to_owned:expr),+) => {$(
156 pub fn $func<'json>(previous_parse_result: &mut ParseResult<$t>, mut parse_options: ParseOptions) -> Result<(), String> {
157 let previous_parse_depth = previous_parse_result.parsing_max_depth;
158 let previous_max_json_depth = previous_parse_result.max_json_depth;
159 previous_parse_result.parsing_max_depth = parse_options.max_depth;
160 if previous_parse_depth < parse_options.max_depth {
161 let previous_len = previous_parse_result.json.len();
162 for i in 0..previous_len {
163 let entry = &previous_parse_result.json[i];
164 let mut should_parse = false;
165 let mut is_object = false;
166 let mut new_depth = entry.pointer.depth;
167 match entry.pointer.value_type {
168 ValueType::Array(_) => {
169 should_parse = parse_options.parse_array && entry.pointer.depth - previous_parse_result.depth_after_start_at == previous_parse_depth;
170 new_depth = entry.pointer.depth + 1;
172 }
173 ValueType::Object(parsed, elements_count) => {
174 should_parse = !parsed && entry.pointer.depth - previous_parse_result.depth_after_start_at <= previous_parse_depth;
175 is_object = true;
177 new_depth = entry.pointer.depth + 1;
178 }
179 _ => {}
180 };
181
182 if should_parse {
183 if let Some(ref v) = entry.value {
184 let mut lexer = Lexer::new(v.as_bytes());
185 let mut parser = Parser::new_for_change_depth(&mut lexer, previous_parse_result.depth_after_start_at, previous_max_json_depth);
186 parse_options.prefix = Some(entry.pointer.pointer.clone());
187 let res = parser.parse(&parse_options, new_depth).unwrap();
188 let mut res = $to_owned(res);
189 if previous_parse_result.max_json_depth < res.max_json_depth {
190 previous_parse_result.max_json_depth = res.max_json_depth;
191 }
192
193 if res.json.len() > 0 {
195 match &res.json[0].pointer.value_type {
196 ValueType::Array(size) => {
197 previous_parse_result.json[i].pointer.value_type = ValueType::Array(*size);
198 if res.json[0].pointer.pointer.eq("") {
199 res.json.swap_remove(0); }
201 }
202 _ => {}
203 }
204 }
205
206 if is_object {
207 let root_depth = previous_parse_result.json[i].pointer.depth + 1;
208 let elements_count = res.json.iter().filter(|e| e.pointer.depth == root_depth).count();
209 previous_parse_result.json[i].pointer.value_type = ValueType::Object(true, elements_count);
210 }
211
212 previous_parse_result.json.extend(res.json);
213 }
214 }
215 }
216 Ok(())
217 } else {
218 Ok(())
219 }
220 }
221 )+};
222}
223
224impl PointerKey {
225 pub fn from_pointer(pointer: String, value_type: ValueType, depth: u8, position: usize) -> Self {
226 Self {
227 pointer,
228 value_type,
229 depth,
230 position,
231 column_id: 0,
232 }
233 }
234}
235
236#[derive(Eq, Hash, PartialEq, Debug, Clone, Copy)]
237#[derive(Default)]
238pub enum ValueType {
239 Array(usize),
240 Object(bool, usize), Number,
242 String,
243 Bool,
244 Null,
245 #[default]
246 None,
247}
248
249
250type PointerFragment = Vec<String>;
251
252
253#[derive(Debug, Clone, Default)]
254pub struct FlatJsonValue<V: Debug + Clone + AsRef<str> + GetBytes> {
255 pub pointer: PointerKey,
256 pub value: Option<V>,
257}
258
259
260impl<V: Debug + Clone + AsRef<str> + GetBytes> Hash for FlatJsonValue<V> {
261 fn hash<H: Hasher>(&self, state: &mut H) {
262 self.pointer.hash(state);
263 }
264}
265
266
267#[derive(Debug, Clone)]
268pub struct ParseResult<V: Debug + Clone + AsRef<str> + GetBytes> {
269 pub json: Vec<FlatJsonValue<V>>,
270 pub max_json_depth: usize,
271 pub parsing_max_depth: u8,
272 pub started_parsing_at: Option<String>,
273 pub started_parsing_at_index_start: usize,
274 pub started_parsing_at_index_end: usize,
275 pub parsing_prefix: Option<String>,
276 pub depth_after_start_at: u8,
277}
278
279impl ParseResult<String> {
280 pub fn clone_except_json(&self) -> Self {
281 Self {
282 json: Default::default(),
283 max_json_depth: self.max_json_depth,
284 parsing_max_depth: self.parsing_max_depth,
285 started_parsing_at: self.started_parsing_at.clone(),
286 started_parsing_at_index_start: self.started_parsing_at_index_start,
287 started_parsing_at_index_end: self.started_parsing_at_index_end,
288 parsing_prefix: self.parsing_prefix.clone(),
289 depth_after_start_at: self.depth_after_start_at,
290 }
291 }
292
293 pub fn to_owned(self) -> ParseResult<String> {
294 self
295 }
296
297}
298impl ParseResult<&str> {
299 pub fn clone_except_json(&self) -> Self {
300 Self {
301 json: Default::default(),
302 max_json_depth: self.max_json_depth,
303 parsing_max_depth: self.parsing_max_depth,
304 started_parsing_at_index_start: self.started_parsing_at_index_start,
305 started_parsing_at_index_end: self.started_parsing_at_index_end,
306 started_parsing_at: self.started_parsing_at.clone(),
307 parsing_prefix: self.parsing_prefix.clone(),
308 depth_after_start_at: self.depth_after_start_at,
309 }
310 }
311 pub fn to_owned(self) -> ParseResult<String> {
312 let mut transformed_vec: Vec<FlatJsonValue<String>> = Vec::with_capacity(self.json.len());
313
314 for entry in self.json {
315 transformed_vec.push(FlatJsonValue { pointer: entry.pointer, value: entry.value.map(|s| s.to_owned()) });
316 }
317 ParseResult::<String> {
318 json: transformed_vec,
319 max_json_depth: self.max_json_depth,
320 parsing_max_depth: self.parsing_max_depth,
321 started_parsing_at_index_start: self.started_parsing_at_index_start,
322 started_parsing_at_index_end: self.started_parsing_at_index_end,
323 started_parsing_at: self.started_parsing_at.clone(),
324 parsing_prefix: self.parsing_prefix.clone(),
325 depth_after_start_at: self.depth_after_start_at,
326 }
327 }
328
329}
330
331
332impl JSONParser {
333 pub fn parse(input: &str, options: ParseOptions) -> Result<ParseResult<&str>, String> {
334 JSONParser::parse_bytes(input.as_bytes(), options)
335 }
336 pub fn parse_bytes(input: &[u8], options: ParseOptions) -> Result<ParseResult<&str>, String> {
337 let mut lexer = Lexer::new(input);
338 let mut parser = Parser::new(&mut lexer);
339 parser.parse(&options, options.start_depth)
340 }
341
342
343 change_depth!(&'json str, change_depth, |r: ParseResult<&'json str>| r);
344 change_depth!(String, change_depth_owned, |r: ParseResult<&str>| r.to_owned());
345
346
347 pub fn serialize<'a>(data: &mut Vec<FlatJsonValue<&'a str>>) -> Value<&'a str> {
348 serialize_to_json(data)
349 }
350
351 pub fn serialize_owned(data: &mut Vec<FlatJsonValue<String>>) -> Value<String> {
352 serialize_to_json(data)
353 }
354
355}
356
357
358#[inline]
359pub fn string_from_bytes(bytes: &[u8]) -> Option<&str> {
360 #[cfg(feature = "simdutf8")]{
361 simdutf8::basic::from_utf8(bytes).ok()
362 }
363 #[cfg(not(feature = "simdutf8"))]{
364 std::str::from_utf8(bytes).ok()
365 }
366}