rill_json/
lib.rs

1//! # rill-json
2//!
3//! `rill-json` is a fast, 100% safe, and RFC 8259-compliant streaming JSON parser
4//! and serializer, built from scratch in pure Rust.
5//!
6//! This library is designed for performance, safety, and correctness.
7//!
8//! ## Key Features
9//!
10//! * **100% Safe Rust:** Contains no `unsafe` code.
11//! * **Streaming Parser:** An `Iterator` that emits `ParserEvent`s, ideal
12//!   for parsing large files with minimal memory.
13//! * **Optimized Performance:** Uses a byte-slice-based tokenizer with a
14//!   branchless Lookup Table (LUT) and `memchr` for high-performance,
15//!   safe-SIMD-accelerated string parsing.
16//! * **Serializer Included:** Comes with `stringify()` and `stringify_pretty()`
17//!   to serialize your Rust data back to JSON.
18//! * **RFC 8259 Compliant:** Passes a full test suite for specification compliance.
19//!
20//! ## Quick Start: Parsing (Streaming)
21//!
22//! The `parse_streaming` function is the primary entry point. It's the most
23//! efficient way to parse JSON, especially large files.
24//!
25//! no_run
26//! use rill_json::{parse_streaming, ParserEvent};
27//!
28//! fn main() {
29//!     let json_data = r#"{ "name": "Babbage", "id": 1815 }"#;
30//!     let mut parser = parse_streaming(json_data).unwrap();
31//!     let mut found_name_key = false;
32//!
33//!     while let Some(event) = parser.next() {
34//!         match event.unwrap() {
35//!             ParserEvent::Key(key) if key == "name" => found_name_key = true,
36//!             ParserEvent::String(value) if found_name_key => {
37//!                 println!("Found name: {}", value);
38//!                 break;
39//!             }
40//!             _ => found_name_key = false,
41//!         }
42//!     }
43//! }
44//! ```
45//!
46//! // ## Quick Start: Serializing
47//!
48//! // You can also create JSON strings from your own Rust data using the `JsonValue` enum.
49//!
50//! ```no_run
51//! use rill_json::JsonValue;
52//! use std::collections::HashMap;
53//!
54//! let mut user = HashMap::new();
55//! user.insert("username".to_string(), JsonValue::String("ada_l".to_string()));
56//! user.insert("id".to_string(), JsonValue::Number(1815.0));
57//!
58//! let json_object = JsonValue::Object(user);
59//!
60//! // Get the compact string
61//! let json_string = json_object.stringify();
62//! assert_eq!(json_string, r#"{"id":1815,"username":"ada_l"}"#);
63//! ```
64
65// 1. Declare all the new modules.
66/// Contains the primary `ParseError` type for the library.
67pub mod error;
68/// Contains the streaming `Parser` and its `ParserEvent` enum.
69pub mod parser;
70/// Contains the `Token` and `TokenType` enums used internally.
71pub mod token;
72/// Contains the `JsonValue` enum and the serialization (stringify) logic.
73pub mod value;
74
75/// The internal, high-performance, byte-based tokenizer (lexer).
76/// This module is private to the crate.
77mod tokenizer;
78
79// 2. Re-export the public-facing types.
80// This creates the clean, top-level API for users.
81pub use error::ParseError;
82pub use parser::{ParserEvent, StreamingParser};
83pub use value::JsonValue;
84
85// --- Constants ---
86/// The default maximum nesting depth (e.g., `[[[]]]`) to prevent stack overflows.
87const DEFAULT_MAX_DEPTH: usize = 100;
88/// The maximum allowed size of an input JSON (10MB) to prevent DoS attacks.
89const MAX_JSON_SIZE_BYTES: usize = 10 * 1024 * 1024;
90
91// --- Public-facing helper function ---
92
93/// Parses a JSON string slice into a `StreamingParser`.
94///
95/// This is the main entry point for the streaming parser. It's fast,
96/// low-allocation, and operates as an `Iterator` over `ParserEvent`s.
97///
98/// # Arguments
99/// * `input` - A string slice containing the JSON data to be parsed.
100///
101/// # Errors
102/// Returns a `ParseError` if the input exceeds the `MAX_JSON_SIZE_BYTES`
103/// limit (10MB) *before* parsing begins.
104///
105/// # Examples
106/// ```
107/// use rill_json::{parse_streaming, ParserEvent};
108///
109/// let json_data = r#"[1, "hello"]"#;
110/// let mut parser = parse_streaming(json_data).unwrap();
111///
112/// assert_eq!(parser.next().unwrap().unwrap(), ParserEvent::StartArray);
113/// assert_eq!(parser.next().unwrap().unwrap(), ParserEvent::Number(1.0));
114/// assert_eq!(parser.next().unwrap().unwrap(), ParserEvent::String("hello".to_string()));
115/// assert_eq!(parser.next().unwrap().unwrap(), ParserEvent::EndArray);
116/// assert!(parser.next().is_none());
117/// ```
118pub fn parse_streaming(input: &'_ str) -> Result<StreamingParser<'_>, ParseError> {
119    if input.len() > MAX_JSON_SIZE_BYTES {
120        return Err(ParseError {
121            message: "Input exceeds maximum size limit".to_string(),
122            line: 1,
123            column: 1,
124        });
125    }
126    // We can call StreamingParser::new because it's public in parser.rs
127    Ok(StreamingParser::new(input, DEFAULT_MAX_DEPTH))
128}
129
130// --- 10. Test Module ---
131// The tests all stay in lib.rs, but we update the `use` statements.
132#[cfg(test)]
133mod tests {
134    use std::collections::HashMap;
135    // Use the public API we just defined
136    use super::{parse_streaming, JsonValue, ParseError, ParserEvent, StreamingParser};
137
138    fn collect_events(input: &str) -> Result<Vec<ParserEvent>, ParseError> {
139        parse_streaming(input)?.collect()
140    }
141
142    fn collect_events_with_depth(
143        input: &str,
144        depth: usize,
145    ) -> Result<Vec<ParserEvent>, ParseError> {
146        // We can call this because `StreamingParser` and its `new` are public
147        StreamingParser::new(input, depth).collect()
148    }
149
150    #[test]
151    fn test_streaming_parser_simple() {
152        let input = "{ \"key\": [1, null, true, \"hello\"] }";
153        let events = collect_events(input).unwrap();
154
155        assert_eq!(
156            events,
157            vec![
158                ParserEvent::StartObject,
159                ParserEvent::Key("key".to_string()),
160                ParserEvent::StartArray,
161                ParserEvent::Number(1.0),
162                ParserEvent::Null,
163                ParserEvent::Boolean(true),
164                ParserEvent::String("hello".to_string()),
165                ParserEvent::EndArray,
166                ParserEvent::EndObject
167            ]
168        );
169    }
170
171    #[test]
172    fn test_streaming_empty_array_object() {
173        let input = "[ { } ]";
174        let events = collect_events(input).unwrap();
175
176        assert_eq!(
177            events,
178            vec![
179                ParserEvent::StartArray,
180                ParserEvent::StartObject,
181                ParserEvent::EndObject,
182                ParserEvent::EndArray
183            ]
184        );
185    }
186
187    #[test]
188    fn test_streaming_errors() {
189        let input = "[1 true]";
190        let err = collect_events(input).unwrap_err();
191        assert_eq!(err.message, "Expected ',' or ']'");
192        assert_eq!(err.line, 1);
193        assert_eq!(err.column, 4);
194    }
195
196    #[test]
197    fn test_streaming_object_errors() {
198        let input = "{ : 1 }";
199        let err = collect_events(input).unwrap_err();
200        assert_eq!(err.message, "Expected '}' or a string key");
201        assert_eq!(err.line, 1);
202        assert_eq!(err.column, 3);
203
204        let input = "{\"key\" 1}";
205        let err = collect_events(input).unwrap_err();
206        assert_eq!(err.message, "Expected ':'");
207        assert_eq!(err.line, 1);
208        assert_eq!(err.column, 8);
209    }
210
211    #[test]
212    fn test_streaming_tokenizer_errors() {
213        let input = "[1, ?]";
214        let err = collect_events(input).unwrap_err();
215        assert_eq!(err.message, "Unexpected character '?'");
216        assert_eq!(err.line, 1);
217        assert_eq!(err.column, 5);
218
219        let input = "[1] [2]";
220        let err = collect_events(input).unwrap_err();
221        assert_eq!(err.message, "Unexpected trailing token");
222        assert_eq!(err.line, 1);
223        assert_eq!(err.column, 5);
224    }
225
226    #[test]
227    fn test_streaming_rfc_8259_compliance() {
228        // Trailing Commas
229        let err = collect_events("[1, 2,]").unwrap_err();
230        assert_eq!(err.message, "Unexpected ']', expected a value");
231        assert_eq!(err.line, 1);
232        assert_eq!(err.column, 7);
233
234        let err = collect_events("{\"key\": 1,}").unwrap_err();
235        assert_eq!(err.message, "Unexpected '}', expected a string key");
236        assert_eq!(err.line, 1);
237        assert_eq!(err.column, 11);
238
239        let err = collect_events("// a comment\n[1, 2]").unwrap_err();
240        assert_eq!(err.message, "Unexpected character '/'");
241
242        let err = collect_events("0123").unwrap_err();
243        assert_eq!(err.message, "Invalid number: leading zeros not allowed");
244
245        let err = collect_events("1.").unwrap_err();
246        assert_eq!(
247            err.message,
248            "Invalid number: cannot end with a decimal point"
249        );
250
251        let err = collect_events(".5").unwrap_err();
252        assert_eq!(err.message, "Unexpected character '.'");
253
254        let err = collect_events("\"\n\"").unwrap_err();
255        assert_eq!(err.message, "Unescaped control character in string");
256        assert_eq!(err.line, 2);
257        assert_eq!(err.column, 1);
258    }
259
260    #[test]
261    fn test_streaming_security_limits() {
262        let evil_input = "[".repeat(101) + &"]".repeat(101);
263        let err = collect_events_with_depth(&evil_input, 100).unwrap_err();
264        assert_eq!(err.message, "Maximum nesting depth exceeded");
265        assert_eq!(err.line, 1);
266        assert_eq!(err.column, 101);
267
268        let ok_input = "[".repeat(100) + &"]".repeat(100);
269        assert!(collect_events_with_depth(&ok_input, 100).is_ok());
270
271        // Test size limit
272        let small_input = "[1]";
273        let err = parse_streaming(small_input);
274        assert!(err.is_ok());
275    }
276
277    // --- Stage 16 Tests ---
278
279    #[test]
280    fn test_stringify_stage_16_examples() {
281        // Test case from challenge:
282        // Input: A native map {"key": "value", "items": [1, None]}
283        // Output: The string {"key":"value","items":[1,null]}
284        let mut items = HashMap::new();
285        items.insert("key".to_string(), JsonValue::String("value".to_string()));
286        items.insert(
287            "items".to_string(),
288            JsonValue::Array(vec![JsonValue::Number(1.0), JsonValue::Null]),
289        );
290        let obj = JsonValue::Object(items);
291
292        // We must check both key orders since HashMap order is not guaranteed
293        let output = obj.stringify();
294        let expected1 = r#"{"key":"value","items":[1,null]}"#;
295        let expected2 = r#"{"items":[1,null],"key":"value"}"#;
296
297        assert!(
298            output == expected1 || output == expected2,
299            "Stringify output was: {}",
300            output
301        );
302
303        // Test case from challenge:
304        // Input: A native string a "quoted" \ string
305        // Output: The string "a \"quoted\" \\ string"
306        let s = JsonValue::String("a \"quoted\" \\ string".to_string());
307        assert_eq!(s.stringify(), r#""a \"quoted\" \\ string""#);
308    }
309
310    #[test]
311    fn test_stringify_all_types() {
312        // Primitives
313        assert_eq!(JsonValue::Null.stringify(), "null");
314        assert_eq!(JsonValue::Boolean(true).stringify(), "true");
315        assert_eq!(JsonValue::Boolean(false).stringify(), "false");
316        assert_eq!(JsonValue::Number(123.45).stringify(), "123.45");
317        assert_eq!(JsonValue::Number(-0.5).stringify(), "-0.5");
318        assert_eq!(JsonValue::Number(1e+3).stringify(), "1000");
319
320        // Empty Structures
321        assert_eq!(JsonValue::Array(vec![]).stringify(), "[]");
322        assert_eq!(JsonValue::Object(HashMap::new()).stringify(), "{}");
323
324        // Complex Array
325        let arr = JsonValue::Array(vec![
326            JsonValue::Number(1.0),
327            JsonValue::String("test".to_string()),
328            JsonValue::Boolean(true),
329            JsonValue::Null,
330            JsonValue::Object(HashMap::new()),
331        ]);
332        assert_eq!(arr.stringify(), r#"[1,"test",true,null,{}]"#);
333    }
334
335    #[test]
336    fn test_stringify_string_escapes() {
337        // Test all escapes from Stage 8
338        let s = JsonValue::String("\" \\ / \u{0008} \u{000C} \n \r \t".to_string());
339        assert_eq!(s.stringify(), r#""\" \\ \/ \b \f \n \r \t""#);
340
341        // Test control character escape
342        let s_control = JsonValue::String("hello\u{0001}world".to_string());
343        assert_eq!(s_control.stringify(), r#""hello\u0001world""#);
344    }
345
346    #[test]
347    fn test_stringify_pretty_print() {
348        let mut sub_obj = HashMap::new();
349        sub_obj.insert("sub_key".to_string(), JsonValue::Number(2.0));
350
351        let mut items = HashMap::new();
352        items.insert("key".to_string(), JsonValue::String("value".to_string()));
353        items.insert(
354            "items".to_string(),
355            JsonValue::Array(vec![
356                JsonValue::Number(1.0),
357                JsonValue::Null,
358                JsonValue::Object(sub_obj),
359            ]),
360        );
361        items.insert("admin".to_string(), JsonValue::Boolean(true));
362        let obj = JsonValue::Object(items);
363
364        let pretty_string = obj.stringify_pretty();
365
366        // We can't test for an exact string match because HashMap
367        // iteration order is not guaranteed.
368
369        assert!(pretty_string.starts_with("{\n"));
370        assert!(pretty_string.ends_with("\n}"));
371
372        // --- FIXED LINES ---
373        // Check for the content of the lines, but NOT the trailing comma,
374        // because any of them could be the last item.
375        assert!(pretty_string.contains("\n  \"key\": \"value\""));
376        assert!(pretty_string.contains("\n  \"admin\": true"));
377        // --- END FIX ---
378
379        // This assertion is still correct because the value itself contains newlines
380        assert!(pretty_string.contains("\n  \"items\": [\n"));
381
382        // These assertions are also fine
383        assert!(pretty_string.contains("\n    1,"));
384        assert!(pretty_string.contains("\n    null,"));
385        assert!(pretty_string.contains("\n    {\n"));
386        assert!(pretty_string.contains("\n      \"sub_key\": 2\n"));
387        assert!(pretty_string.contains("\n    }\n"));
388        assert!(pretty_string.contains("\n  ]\n"));
389
390        // Test empty object and array
391        assert_eq!(JsonValue::Object(HashMap::new()).stringify_pretty(), "{}");
392        assert_eq!(JsonValue::Array(vec![]).stringify_pretty(), "[]");
393    }
394}