json_flat_parser/
lib.rs

1use std::fmt::{Debug};
2use std::hash::{Hash, Hasher};
3
4use crate::lexer::Lexer;
5use crate::parser::Parser;
6use crate::serializer::{serialize_to_json, Value};
7
8pub mod parser;
9pub mod lexer;
10pub mod serializer;
11
12pub struct JSONParser {}
13
14#[derive(Clone)]
15pub struct ParseOptions {
16    pub parse_array: bool,
17    pub keep_object_raw_data: bool,
18    pub max_depth: u8,
19    pub start_parse_at: Option<String>,
20    pub start_depth: u8,
21    pub prefix: Option<String>,
22}
23
24impl Default for ParseOptions {
25    fn default() -> Self {
26        Self {
27            parse_array: true,
28            keep_object_raw_data: true,
29            max_depth: 10,
30            start_parse_at: None,
31            start_depth: 1,
32            prefix: None,
33        }
34    }
35}
36
37impl ParseOptions {
38    pub fn parse_array(mut self, parse_array: bool) -> Self {
39        self.parse_array = parse_array;
40        self
41    }
42
43    pub fn start_parse_at(mut self, pointer: String) -> Self {
44        self.start_parse_at = Some(pointer);
45        self
46    }
47    pub fn start_depth(mut self, depth: u8) -> Self {
48        self.start_depth = depth;
49        self
50    }
51    pub fn max_depth(mut self, max_depth: u8) -> Self {
52        self.max_depth = max_depth;
53        self
54    }
55    pub fn prefix(mut self, prefix: String) -> Self {
56        self.prefix = Some(prefix);
57        self
58    }
59    pub fn keep_object_raw_data(mut self, keep_object_raw_data: bool) -> Self {
60        self.keep_object_raw_data = keep_object_raw_data;
61        self
62    }
63}
64
65pub trait GetBytes {
66    fn get_bytes(&self) -> &[u8];
67}
68
69impl GetBytes for String {
70    fn get_bytes(&self) -> &[u8] {
71        self.as_bytes()
72    }
73}
74
75impl GetBytes for &str {
76    fn get_bytes(&self) -> &[u8] {
77        self.as_bytes()
78    }
79}
80
81#[derive(Debug, Clone)]
82pub struct JsonArrayEntries<V: Debug + Clone + AsRef<str> + GetBytes> {
83    pub entries: Vec<FlatJsonValue<V>>,
84    pub index: usize,
85}
86
87impl<V: Debug + Clone + AsRef<str> + GetBytes> JsonArrayEntries<V> {
88    pub fn entries(&self) -> &Vec<FlatJsonValue<V>> {
89        &self.entries
90    }
91    pub fn index(&self) -> usize {
92        self.index
93    }
94
95    pub fn find_node_at(&self, pointer: &str) -> Option<&FlatJsonValue<V>> {
96        self.entries().iter().find(|v| v.pointer.pointer.eq(pointer))
97    }
98}
99
100impl<V: Debug + Clone + AsRef<str> + GetBytes> Hash for JsonArrayEntries<V> {
101    fn hash<H: Hasher>(&self, state: &mut H) {
102        self.index.hash(state);
103        self.entries.len().hash(state);
104    }
105}
106
107
108#[derive(Debug, Default, Clone)]
109pub struct PointerKey {
110    pub pointer: String,
111    pub value_type: ValueType,
112    pub depth: u8,    // depth of the pointed value in the json
113    pub position: usize, // position on the original json
114    pub column_id: usize, // can be used to map to external object
115}
116
117impl PartialEq<Self> for PointerKey {
118    fn eq(&self, other: &Self) -> bool {
119        self.pointer.eq(&other.pointer)
120    }
121}
122
123impl Eq for PointerKey {}
124
125impl Hash for PointerKey {
126    fn hash<H: Hasher>(&self, state: &mut H) {
127        self.pointer.hash(state);
128    }
129}
130
131impl PointerKey {
132    pub fn parent(&self) -> &str {
133        let index = self.pointer.rfind('/').unwrap_or(0);
134        
135        (if index == 0 {
136            "/"
137        } else {
138            &self.pointer[0..index]
139        }) as _
140    }
141}
142#[macro_export]
143macro_rules! concat_string {
144    () => { String::with_capacity(0) };
145    ($($s:expr),+) => {{
146        use std::ops::AddAssign;
147        let mut len = 0;
148        $(len.add_assign(AsRef::<str>::as_ref(&$s).len());)+
149        let mut buf = String::with_capacity(len);
150        $(buf.push_str($s.as_ref());)+
151        buf
152    }};
153}
154macro_rules! change_depth {
155    ($($t:ty, $func:ident, $to_owned:expr),+) => {$(
156    pub fn $func<'json>(previous_parse_result: &mut ParseResult<$t>, mut parse_options: ParseOptions) -> Result<(), String> {
157        let previous_parse_depth = previous_parse_result.parsing_max_depth;
158        let previous_max_json_depth = previous_parse_result.max_json_depth;
159        previous_parse_result.parsing_max_depth = parse_options.max_depth;
160        if previous_parse_depth < parse_options.max_depth {
161            let previous_len = previous_parse_result.json.len();
162            for i in 0..previous_len {
163                let entry = &previous_parse_result.json[i];
164                let mut should_parse = false;
165                let mut is_object = false;
166                let mut new_depth = entry.pointer.depth;
167                match entry.pointer.value_type {
168                    ValueType::Array(_) => {
169                        should_parse = parse_options.parse_array && entry.pointer.depth - previous_parse_result.depth_after_start_at == previous_parse_depth;
170                        // println!("{}({:?}) - should parse: {} ({} - {} <= {})", entry.pointer.pointer, entry.pointer.value_type, should_parse, entry.pointer.depth, previous_parse_result.depth_after_start_at, previous_parse_depth);
171                        new_depth = entry.pointer.depth + 1;
172                    }
173                    ValueType::Object(parsed, elements_count) => {
174                        should_parse = !parsed && entry.pointer.depth - previous_parse_result.depth_after_start_at <= previous_parse_depth;
175                        // println!("{}({:?}) - should parse: {} (!{} && {} - {} <= {})", entry.pointer.pointer, entry.pointer.value_type, should_parse, parsed, entry.pointer.depth, previous_parse_result.depth_after_start_at, previous_parse_depth);
176                        is_object = true;
177                        new_depth = entry.pointer.depth + 1;
178                    }
179                    _ => {}
180                };
181
182                if should_parse {
183                    if let Some(ref v) = entry.value {
184                        let mut lexer = Lexer::new(v.as_bytes());
185                        let mut parser = Parser::new_for_change_depth(&mut lexer, previous_parse_result.depth_after_start_at, previous_max_json_depth);
186                        parse_options.prefix = Some(entry.pointer.pointer.clone());
187                        let res = parser.parse(&parse_options, new_depth).unwrap();
188                        let mut res = $to_owned(res);
189                        if previous_parse_result.max_json_depth < res.max_json_depth {
190                            previous_parse_result.max_json_depth = res.max_json_depth;
191                        }
192
193                        // println!("{:?}", res.json);
194                        if res.json.len() > 0 {
195                            match &res.json[0].pointer.value_type {
196                                ValueType::Array(size) => {
197                                    previous_parse_result.json[i].pointer.value_type = ValueType::Array(*size);
198                                    if res.json[0].pointer.pointer.eq("") {
199                                        res.json.swap_remove(0); // remove array empty pointer
200                                    }
201                                }
202                                _ => {}
203                            }
204                        }
205
206                        if is_object {
207                            let root_depth = previous_parse_result.json[i].pointer.depth + 1;
208                            let  elements_count = res.json.iter().filter(|e| e.pointer.depth == root_depth).count();
209                            previous_parse_result.json[i].pointer.value_type = ValueType::Object(true, elements_count);
210                        }
211
212                        previous_parse_result.json.extend(res.json);
213                    }
214                }
215            }
216            Ok(())
217        } else {
218            Ok(())
219        }
220    }
221    )+};
222}
223
224impl PointerKey {
225    pub fn from_pointer(pointer: String, value_type: ValueType, depth: u8, position: usize) -> Self {
226        Self {
227            pointer,
228            value_type,
229            depth,
230            position,
231            column_id: 0,
232        }
233    }
234}
235
236#[derive(Eq, Hash, PartialEq, Debug, Clone, Copy)]
237#[derive(Default)]
238pub enum ValueType {
239    Array(usize),
240    Object(bool, usize), // parsed or not, number of elements
241    Number,
242    String,
243    Bool,
244    Null,
245    #[default]
246    None,
247}
248
249
250type PointerFragment = Vec<String>;
251
252
253#[derive(Debug, Clone, Default)]
254pub struct FlatJsonValue<V: Debug + Clone + AsRef<str> + GetBytes> {
255    pub pointer: PointerKey,
256    pub value: Option<V>,
257}
258
259
260impl<V: Debug + Clone + AsRef<str> + GetBytes>  Hash for FlatJsonValue<V> {
261    fn hash<H: Hasher>(&self, state: &mut H) {
262        self.pointer.hash(state);
263    }
264}
265
266
267#[derive(Debug, Clone)]
268pub struct ParseResult<V: Debug + Clone + AsRef<str> + GetBytes> {
269    pub json: Vec<FlatJsonValue<V>>,
270    pub max_json_depth: usize,
271    pub parsing_max_depth: u8,
272    pub started_parsing_at: Option<String>,
273    pub started_parsing_at_index_start: usize,
274    pub started_parsing_at_index_end: usize,
275    pub parsing_prefix: Option<String>,
276    pub depth_after_start_at: u8,
277}
278
279impl ParseResult<String> {
280    pub fn clone_except_json(&self) -> Self {
281        Self {
282            json: Default::default(),
283            max_json_depth: self.max_json_depth,
284            parsing_max_depth: self.parsing_max_depth,
285            started_parsing_at: self.started_parsing_at.clone(),
286            started_parsing_at_index_start: self.started_parsing_at_index_start,
287            started_parsing_at_index_end: self.started_parsing_at_index_end,
288            parsing_prefix: self.parsing_prefix.clone(),
289            depth_after_start_at: self.depth_after_start_at,
290        }
291    }
292
293    pub fn to_owned(self) -> ParseResult<String> {
294        self
295    }
296
297}
298impl ParseResult<&str> {
299    pub fn clone_except_json(&self) -> Self {
300        Self {
301            json: Default::default(),
302            max_json_depth: self.max_json_depth,
303            parsing_max_depth: self.parsing_max_depth,
304            started_parsing_at_index_start: self.started_parsing_at_index_start,
305            started_parsing_at_index_end: self.started_parsing_at_index_end,
306            started_parsing_at: self.started_parsing_at.clone(),
307            parsing_prefix: self.parsing_prefix.clone(),
308            depth_after_start_at: self.depth_after_start_at,
309        }
310    }
311    pub fn to_owned(self) -> ParseResult<String> {
312        let mut transformed_vec: Vec<FlatJsonValue<String>> = Vec::with_capacity(self.json.len());
313
314        for entry in self.json {
315            transformed_vec.push(FlatJsonValue { pointer: entry.pointer, value: entry.value.map(|s| s.to_owned()) });
316        }
317        ParseResult::<String> {
318            json: transformed_vec,
319            max_json_depth: self.max_json_depth,
320            parsing_max_depth: self.parsing_max_depth,
321            started_parsing_at_index_start: self.started_parsing_at_index_start,
322            started_parsing_at_index_end: self.started_parsing_at_index_end,
323            started_parsing_at: self.started_parsing_at.clone(),
324            parsing_prefix: self.parsing_prefix.clone(),
325            depth_after_start_at: self.depth_after_start_at,
326        }
327    }
328
329}
330
331
332impl JSONParser {
333    pub fn parse(input: &str, options: ParseOptions) -> Result<ParseResult<&str>, String> {
334        JSONParser::parse_bytes(input.as_bytes(), options)
335    }
336    pub fn parse_bytes(input: &[u8], options: ParseOptions) -> Result<ParseResult<&str>, String> {
337        let mut lexer = Lexer::new(input);
338        let mut parser = Parser::new(&mut lexer);
339        parser.parse(&options, options.start_depth)
340    }
341
342
343    change_depth!(&'json str, change_depth, |r: ParseResult<&'json str>| r);
344    change_depth!(String, change_depth_owned, |r: ParseResult<&str>| r.to_owned());
345
346
347    pub fn serialize<'a>(data: &mut Vec<FlatJsonValue<&'a str>>) -> Value<&'a str> {
348        serialize_to_json(data)
349    }
350
351    pub fn serialize_owned(data: &mut Vec<FlatJsonValue<String>>) -> Value<String> {
352        serialize_to_json(data)
353    }
354
355}
356
357
358#[inline]
359pub fn string_from_bytes(bytes: &[u8]) -> Option<&str> {
360    #[cfg(feature = "simdutf8")]{
361        simdutf8::basic::from_utf8(bytes).ok()
362    }
363    #[cfg(not(feature = "simdutf8"))]{
364        std::str::from_utf8(bytes).ok()
365    }
366}