Skip to main content

sciforge_parser/json/
parser.rs

1use super::error::{JsonError, JsonErrorKind};
2use super::lexer::Cursor;
3use super::number::parse_number;
4use super::string::parse_string;
5use super::value::JsonValue;
6
7const DUP_TRACK_LIMIT: usize = 256;
8
9struct FixedSliceVec<'a, T> {
10    buf: &'a mut [T],
11    len: usize,
12}
13
14impl<'a, T: Copy> FixedSliceVec<'a, T> {
15    fn new(buf: &'a mut [T]) -> Self {
16        Self { buf, len: 0 }
17    }
18
19    fn push(&mut self, val: T) -> Result<(), T> {
20        if self.len >= self.buf.len() {
21            return Err(val);
22        }
23        self.buf[self.len] = val;
24        self.len += 1;
25        Ok(())
26    }
27
28    fn as_slice(&self) -> &[T] {
29        &self.buf[..self.len]
30    }
31
32    fn is_full(&self) -> bool {
33        self.len >= self.buf.len()
34    }
35}
36
37pub const DEFAULT_MAX_DEPTH: usize = 64;
38
39#[derive(Clone, Copy, Debug, PartialEq, Eq)]
40pub enum DuplicateKeyPolicy {
41    Allow,
42    Reject,
43}
44
45#[derive(Clone, Copy, Debug, PartialEq, Eq)]
46pub struct JsonLimits {
47    pub max_depth: usize,
48    pub max_string_len: usize,
49    pub max_array_len: usize,
50    pub max_object_len: usize,
51    pub max_node_count: usize,
52    pub duplicate_key_policy: DuplicateKeyPolicy,
53}
54
55pub const DEFAULT_LIMITS: JsonLimits = JsonLimits {
56    max_depth: DEFAULT_MAX_DEPTH,
57    max_string_len: 64 * 1024,
58    max_array_len: 16 * 1024,
59    max_object_len: 16 * 1024,
60    max_node_count: 128 * 1024,
61    duplicate_key_policy: DuplicateKeyPolicy::Allow,
62};
63
64pub struct JsonParser<'a> {
65    cursor: Cursor<'a>,
66    limits: JsonLimits,
67    nodes_seen: usize,
68}
69
70impl<'a> JsonParser<'a> {
71    pub const fn new(bytes: &'a [u8]) -> Self {
72        Self {
73            cursor: Cursor::new(bytes),
74            limits: DEFAULT_LIMITS,
75            nodes_seen: 0,
76        }
77    }
78
79    pub const fn with_max_depth(mut self, max_depth: usize) -> Self {
80        self.limits.max_depth = max_depth;
81        self
82    }
83
84    pub const fn with_limits(mut self, limits: JsonLimits) -> Self {
85        self.limits = limits;
86        self
87    }
88
89    pub fn parse(mut self) -> Result<JsonValue<'a>, JsonError> {
90        self.cursor.skip_ws();
91        let value = self.parse_value(0)?;
92        self.cursor.skip_ws();
93        if self.cursor.position() != self.cursor.len() {
94            return Err(JsonError::new(
95                JsonErrorKind::TrailingCharacters,
96                self.cursor.position(),
97            ));
98        }
99        Ok(value)
100    }
101
102    pub fn validate(mut self) -> Result<(), JsonError> {
103        self.cursor.skip_ws();
104        self.parse_value(0)?;
105        self.cursor.skip_ws();
106        if self.cursor.position() != self.cursor.len() {
107            return Err(JsonError::new(
108                JsonErrorKind::TrailingCharacters,
109                self.cursor.position(),
110            ));
111        }
112        Ok(())
113    }
114
115    fn parse_value(&mut self, depth: usize) -> Result<JsonValue<'a>, JsonError> {
116        if depth > self.limits.max_depth {
117            return Err(JsonError::new(
118                JsonErrorKind::MaxDepthExceeded,
119                self.cursor.position(),
120            ));
121        }
122
123        self.nodes_seen = self.nodes_seen.saturating_add(1);
124        if self.nodes_seen > self.limits.max_node_count {
125            return Err(JsonError::new(
126                JsonErrorKind::MaxNodeCountExceeded,
127                self.cursor.position(),
128            ));
129        }
130
131        let b = self
132            .cursor
133            .peek()
134            .ok_or(JsonError::new(JsonErrorKind::Eof, self.cursor.position()))?;
135
136        match b {
137            b'{' => self.parse_object(depth + 1),
138            b'[' => self.parse_array(depth + 1),
139            b'"' => {
140                let s = parse_string(&mut self.cursor)?;
141                if s.raw.len() > self.limits.max_string_len {
142                    return Err(JsonError::new(
143                        JsonErrorKind::MaxStringLengthExceeded,
144                        self.cursor.position(),
145                    ));
146                }
147                Ok(JsonValue::String(s.raw))
148            }
149            b't' => self.parse_true(),
150            b'f' => self.parse_false(),
151            b'n' => self.parse_null(),
152            b'-' | b'0'..=b'9' => parse_number(&mut self.cursor).map(JsonValue::Number),
153            _ => Err(JsonError::new(
154                JsonErrorKind::UnexpectedToken,
155                self.cursor.position(),
156            )),
157        }
158    }
159
160    fn parse_object(&mut self, depth: usize) -> Result<JsonValue<'a>, JsonError> {
161        self.cursor.consume(b'{')?;
162        self.cursor.skip_ws();
163
164        if self.cursor.try_consume(b'}') {
165            return Ok(JsonValue::Object);
166        }
167
168        let mut object_len = 0usize;
169        let mut seen_key_ranges_buf = [(0usize, 0usize); DUP_TRACK_LIMIT];
170        let mut seen_key_ranges = FixedSliceVec::new(&mut seen_key_ranges_buf);
171
172        loop {
173            self.cursor.skip_ws();
174            let key = parse_string(&mut self.cursor)?;
175            if key.raw.len() > self.limits.max_string_len {
176                return Err(JsonError::new(
177                    JsonErrorKind::MaxStringLengthExceeded,
178                    self.cursor.position(),
179                ));
180            }
181
182            if self.limits.duplicate_key_policy == DuplicateKeyPolicy::Reject {
183                let key_bytes = &self.cursor.bytes()[key.start..key.end];
184                let ranges = seen_key_ranges.as_slice();
185                let mut i = 0usize;
186                while i < ranges.len() {
187                    let (start, end) = ranges[i];
188                    if &self.cursor.bytes()[start..end] == key_bytes {
189                        return Err(JsonError::new(
190                            JsonErrorKind::DuplicateObjectKey,
191                            self.cursor.position(),
192                        ));
193                    }
194                    i += 1;
195                }
196
197                if seen_key_ranges.is_full() {
198                    return Err(JsonError::new(
199                        JsonErrorKind::MaxObjectLengthExceeded,
200                        self.cursor.position(),
201                    ));
202                }
203
204                if seen_key_ranges.push((key.start, key.end)).is_err() {
205                    return Err(JsonError::new(
206                        JsonErrorKind::MaxObjectLengthExceeded,
207                        self.cursor.position(),
208                    ));
209                }
210            }
211
212            self.cursor.skip_ws();
213            self.cursor.consume(b':')?;
214            self.cursor.skip_ws();
215            self.parse_value(depth)?;
216            object_len = object_len.saturating_add(1);
217            if object_len > self.limits.max_object_len {
218                return Err(JsonError::new(
219                    JsonErrorKind::MaxObjectLengthExceeded,
220                    self.cursor.position(),
221                ));
222            }
223            self.cursor.skip_ws();
224
225            if self.cursor.try_consume(b',') {
226                self.cursor.skip_ws();
227                continue;
228            }
229            self.cursor.consume(b'}')?;
230            return Ok(JsonValue::Object);
231        }
232    }
233
234    fn parse_array(&mut self, depth: usize) -> Result<JsonValue<'a>, JsonError> {
235        self.cursor.consume(b'[')?;
236        self.cursor.skip_ws();
237
238        if self.cursor.try_consume(b']') {
239            return Ok(JsonValue::Array);
240        }
241
242        let mut array_len = 0usize;
243
244        loop {
245            self.cursor.skip_ws();
246            self.parse_value(depth)?;
247            array_len = array_len.saturating_add(1);
248            if array_len > self.limits.max_array_len {
249                return Err(JsonError::new(
250                    JsonErrorKind::MaxArrayLengthExceeded,
251                    self.cursor.position(),
252                ));
253            }
254            self.cursor.skip_ws();
255
256            if self.cursor.try_consume(b',') {
257                self.cursor.skip_ws();
258                continue;
259            }
260            self.cursor.consume(b']')?;
261            return Ok(JsonValue::Array);
262        }
263    }
264
265    fn parse_true(&mut self) -> Result<JsonValue<'a>, JsonError> {
266        self.cursor.expect_bytes(b"true")?;
267        Ok(JsonValue::Bool(true))
268    }
269
270    fn parse_false(&mut self) -> Result<JsonValue<'a>, JsonError> {
271        self.cursor.expect_bytes(b"false")?;
272        Ok(JsonValue::Bool(false))
273    }
274
275    fn parse_null(&mut self) -> Result<JsonValue<'a>, JsonError> {
276        self.cursor.expect_bytes(b"null")?;
277        Ok(JsonValue::Null)
278    }
279}
280
281pub fn parse_json(bytes: &[u8]) -> Result<JsonValue<'_>, JsonError> {
282    JsonParser::new(bytes).parse()
283}
284
285pub fn parse_json_with_max_depth(
286    bytes: &[u8],
287    max_depth: usize,
288) -> Result<JsonValue<'_>, JsonError> {
289    JsonParser::new(bytes).with_max_depth(max_depth).parse()
290}
291
292pub fn parse_json_with_limits(
293    bytes: &[u8],
294    limits: JsonLimits,
295) -> Result<JsonValue<'_>, JsonError> {
296    JsonParser::new(bytes).with_limits(limits).parse()
297}
298
299pub fn validate_json(bytes: &[u8]) -> Result<(), JsonError> {
300    JsonParser::new(bytes).validate()
301}