rill_json/
lib.rs

1//! # rill-json
2//!
3//! `rill-json` is a fast, 100% safe, and RFC 8259-compliant streaming JSON parser
4//! and serializer, built from scratch in pure Rust.
5//!
6//! This library is designed for performance, safety, and correctness.
7//!
8//! ## Key Features
9//!
10//! * **100% Safe Rust:** Contains `#![forbid(unsafe_code)]` to guarantee no `unsafe` keyword.
11//! * **Streaming Parser:** An `Iterator` that emits `ParserEvent`s, ideal
12//!   for parsing large files with minimal memory.
13//! * **Optimized Performance:** Uses a byte-slice-based tokenizer with a
14//!   branchless Lookup Table (LUT) and `memchr` for high-performance,
15//!   safe-SIMD-accelerated string parsing.
16//! * **Zero-Allocation String Parsing:** Returns borrowed string slices (`&str`)
17//!   when no JSON escapes are present, avoiding allocations.
18//! * **In-Memory DOM:** Provides a `JsonValue` enum for convenience, with a
19//!   `JsonValue::parse()` function to build an in-memory tree.
20//! * **Serializer Included:** Comes with `stringify()` and `stringify_pretty()`
21//!   to serialize your Rust data back to JSON.
22//! * **RFC 8259 Compliant:** Passes a full test suite for specification compliance.
23//!
24//! ## Quick Start: 3 Ways to Use `rill-json`
25//!
26//! ### 1. Streaming Parser (Lowest Memory)
27//!
28//! The `parse_streaming` function is the most efficient way to parse JSON,
29//! especially large files.
30//!
31//! ```no_run
32//! use rill_json::{parse_streaming, ParserEvent};
33//!
34//! let json_data = r#"{ "name": "Babbage", "id": 1815 }"#;
35//! let mut parser = parse_streaming(json_data).unwrap();
36//! let mut found_name_key = false;
37//!
38//! while let Some(event) = parser.next() {
39//!     match event.unwrap() {
40//!         ParserEvent::Key(key) if key == "name" => found_name_key = true,
41//!         ParserEvent::String(value) if found_name_key => {
42//!             println!("Found name: {}", value);
43//!             break;
44//!         }
45//!         _ => found_name_key = false,
46//!     }
47//! }
48//! ```
49//!
50//! ### 2. In-Memory Parsing (Most Convenient)
51//!
52//! For convenience, you can also parse directly to an in-memory `JsonValue`.
53//!
54//! ```no_run
55//! use rill_json::{JsonValue, JsonNumber};
56//! use std::collections::BTreeMap;
57//!
58//! let json_data = r#"{ "id": 1815 }"#;
59//! let parsed = JsonValue::parse(json_data).unwrap();
60//!
61//! let mut expected_map = BTreeMap::new();
62//! expected_map.insert("id".to_string(), JsonValue::Number(JsonNumber::I64(1815)));
63//! let expected_val = JsonValue::Object(expected_map);
64//!
65//! assert_eq!(parsed, expected_val);
66//! ```
67//!
68//! ### 3. Serializing (Writing JSON)
69//!
70//! You can also create JSON strings from your own Rust data using the `JsonValue` enum.
71//!
72//! ```no_run
73//! use rill_json::{JsonValue, JsonNumber};
74//! use std::collections::BTreeMap;
75//!
76//! let mut user = BTreeMap::new();
77//! user.insert("username".to_string(), JsonValue::String("ada_l".to_string()));
78//! user.insert("id".to_string(), JsonValue::Number(JsonNumber::I64(1815)));
79//!
80//! let json_object = JsonValue::Object(user);
81//!
82//! // Get the compact string
83//! let json_string = json_object.stringify().unwrap();
84//! assert_eq!(json_string, r#"{"id":1815,"username":"ada_l"}"#);
85//!
86//! // Or get the pretty-printed version
87//! let pretty_string = json_object.stringify_pretty().unwrap();
88//! println!("{}", pretty_string);
89//! ```
90
91// 1. Declare all the new modules.
92/// Contains the primary `ParseError` type for the library.
93pub mod error;
94/// Contains the streaming `Parser` and its `ParserEvent` enum.
95pub mod parser;
96/// Contains the `Token` and `TokenType` enums used internally.
97pub mod token;
98/// Contains the `JsonValue` enum and the serialization (stringify) logic.
99pub mod value;
100
101/// The internal, high-performance, byte-based tokenizer (lexer).
102/// This module is private to the crate.
103mod tokenizer;
104
105// 2. Re-export the public-facing types.
106// This creates the clean, top-level API for users.
107pub use error::ParseError;
108pub use parser::{ParserEvent, StreamingParser};
109pub use value::{JsonNumber, JsonValue}; // <-- Added JsonNumber
110
111// --- Constants ---
112/// The default maximum nesting depth (e.g., `[[[]]]`) to prevent stack overflows.
113const DEFAULT_MAX_DEPTH: usize = 100;
114/// The maximum allowed size of an input JSON (10MB) to prevent DoS attacks.
115const MAX_JSON_SIZE_BYTES: usize = 10 * 1024 * 1024;
116
117// --- Public-facing helper function ---
118
119/// Parses a JSON string slice into a `StreamingParser`.
120///
121/// This is the main entry point for the streaming parser. It's fast,
122/// low-allocation, and operates as an `Iterator` over `ParserEvent`s.
123///
124/// # Arguments
125/// * `input` - A string slice containing the JSON data to be parsed.
126///
127/// # Errors
128/// Returns a `ParseError` if the input exceeds the `MAX_JSON_SIZE_BYTES`
129/// limit (10MB) *before* parsing begins.
130///
131/// # Examples
132/// ```
133/// use rill_json::{parse_streaming, ParserEvent, JsonNumber};
134///
135/// let json_data = r#"[1, "hello"]"#;
136/// let mut parser = parse_streaming(json_data).unwrap();
137///
138/// assert_eq!(parser.next().unwrap().unwrap(), ParserEvent::StartArray);
139/// assert_eq!(parser.next().unwrap().unwrap(), ParserEvent::Number(JsonNumber::I64(1)));
140/// assert_eq!(parser.next().unwrap().unwrap(), ParserEvent::String("hello".into()));
141/// assert_eq!(parser.next().unwrap().unwrap(), ParserEvent::EndArray);
142/// assert!(parser.next().is_none());
143/// ```
144pub fn parse_streaming(input: &str) -> Result<StreamingParser<'_>, ParseError> {
145    if input.len() > MAX_JSON_SIZE_BYTES {
146        return Err(ParseError {
147            message: "Input exceeds maximum size limit".to_string(),
148            line: 1,
149            column: 1,
150        });
151    }
152    // We can call StreamingParser::new because it's public in parser.rs
153    Ok(StreamingParser::new(input, DEFAULT_MAX_DEPTH))
154}
155
156#[cfg(test)]
157mod tests {
158    use std::collections::BTreeMap;
159    // Use the public API we just defined
160    use super::{parse_streaming, JsonNumber, JsonValue, ParseError, ParserEvent, StreamingParser};
161    use serde_json::{self, Value as SerdeValue};
162    use std::borrow::Cow;
163
164    fn collect_events(input: &str) -> Result<Vec<ParserEvent<'_>>, ParseError> {
165        parse_streaming(input)?.collect()
166    }
167
168    fn collect_events_with_depth(
169        input: &str,
170        depth: usize,
171    ) -> Result<Vec<ParserEvent<'_>>, ParseError> {
172        // We can call this because `StreamingParser` and its `new` are public
173        StreamingParser::new(input, depth).collect()
174    }
175
176    #[test]
177    fn test_streaming_parser_simple() {
178        let input = "{ \"key\": [1, null, true, \"hello\"] }";
179        let events = collect_events(input).unwrap();
180
181        assert_eq!(
182            events,
183            vec![
184                ParserEvent::StartObject,
185                ParserEvent::Key(Cow::Borrowed("key")),
186                ParserEvent::StartArray,
187                ParserEvent::Number(JsonNumber::I64(1)),
188                ParserEvent::Null,
189                ParserEvent::Boolean(true),
190                ParserEvent::String(Cow::Borrowed("hello")),
191                ParserEvent::EndArray,
192                ParserEvent::EndObject
193            ]
194        );
195    }
196
197    #[test]
198    fn test_streaming_parser_escaped_string() {
199        let input = r#"["a\n\"b"]"#;
200        let events = collect_events(input).unwrap();
201        assert_eq!(
202            events,
203            vec![
204                ParserEvent::StartArray,
205                ParserEvent::String(Cow::Owned("a\n\"b".to_string())),
206                ParserEvent::EndArray
207            ]
208        );
209    }
210
211    #[test]
212    fn test_streaming_empty_array_object() {
213        let input = "[ { } ]";
214        let events = collect_events(input).unwrap();
215
216        assert_eq!(
217            events,
218            vec![
219                ParserEvent::StartArray,
220                ParserEvent::StartObject,
221                ParserEvent::EndObject,
222                ParserEvent::EndArray
223            ]
224        );
225    }
226
227    #[test]
228    fn test_streaming_parser_top_level_values() {
229        assert_eq!(
230            collect_events(r#""hello""#).unwrap(),
231            vec![ParserEvent::String("hello".into())]
232        );
233        assert_eq!(
234            collect_events("123.5").unwrap(),
235            vec![ParserEvent::Number(JsonNumber::F64(123.5))]
236        );
237        assert_eq!(
238            collect_events("9007199254740993").unwrap(),
239            vec![ParserEvent::Number(JsonNumber::I64(9007199254740993))]
240        );
241        assert_eq!(
242            collect_events("9223372036854775808").unwrap(),
243            vec![ParserEvent::Number(JsonNumber::U64(9223372036854775808))]
244        );
245        assert_eq!(
246            collect_events("-1234567890").unwrap(),
247            vec![ParserEvent::Number(JsonNumber::I64(-1234567890))]
248        );
249        assert_eq!(
250            collect_events("true").unwrap(),
251            vec![ParserEvent::Boolean(true)]
252        );
253        assert_eq!(collect_events("null").unwrap(), vec![ParserEvent::Null]);
254    }
255
256    #[test]
257    fn test_streaming_parser_complex_nesting() {
258        let input = r#"[{"a": 1, "b": [null, {"c": {}}]}]"#;
259        let events = collect_events(input).unwrap();
260        assert_eq!(
261            events,
262            vec![
263                ParserEvent::StartArray,
264                ParserEvent::StartObject,
265                ParserEvent::Key("a".into()),
266                ParserEvent::Number(JsonNumber::I64(1)),
267                ParserEvent::Key("b".into()),
268                ParserEvent::StartArray,
269                ParserEvent::Null,
270                ParserEvent::StartObject,
271                ParserEvent::Key("c".into()),
272                ParserEvent::StartObject,
273                ParserEvent::EndObject,
274                ParserEvent::EndObject,
275                ParserEvent::EndArray,
276                ParserEvent::EndObject,
277                ParserEvent::EndArray,
278            ]
279        );
280    }
281
282    #[test]
283    fn test_streaming_errors() {
284        let input = "[1 true]";
285        let err = collect_events(input).unwrap_err();
286        assert_eq!(err.message, "Expected ',' or ']'");
287        assert_eq!(err.line, 1);
288        assert_eq!(err.column, 4);
289    }
290
291    #[test]
292    fn test_streaming_object_errors() {
293        let input = "{ : 1 }";
294        let err = collect_events(input).unwrap_err();
295        assert_eq!(err.message, "Expected '}' or a string key");
296        assert_eq!(err.line, 1);
297        assert_eq!(err.column, 3);
298
299        let input = "{\"key\" 1}";
300        let err = collect_events(input).unwrap_err();
301        assert_eq!(err.message, "Expected ':'");
302        assert_eq!(err.line, 1);
303        assert_eq!(err.column, 8);
304    }
305
306    #[test]
307    fn test_streaming_tokenizer_errors() {
308        let input = "[1, ?]";
309        let err = collect_events(input).unwrap_err();
310        assert_eq!(err.message, "Unexpected character '?'");
311        assert_eq!(err.line, 1);
312        assert_eq!(err.column, 5);
313
314        let input = "[1] [2]";
315        let err = collect_events(input).unwrap_err();
316        assert_eq!(err.message, "Unexpected trailing token");
317        assert_eq!(err.line, 1);
318        assert_eq!(err.column, 5);
319    }
320
321    #[test]
322    fn test_streaming_rfc_8259_compliance() {
323        // Trailing Commas
324        let err = collect_events("[1, 2,]").unwrap_err();
325        assert_eq!(err.message, "Unexpected ']', expected a value");
326        assert_eq!(err.line, 1);
327        assert_eq!(err.column, 7);
328
329        let err = collect_events("{\"key\": 1,}").unwrap_err();
330        assert_eq!(err.message, "Unexpected '}', expected a string key");
331        assert_eq!(err.line, 1);
332        assert_eq!(err.column, 11);
333
334        let err = collect_events("// a comment\n[1, 2]").unwrap_err();
335        assert_eq!(err.message, "Unexpected character '/'");
336
337        let err = collect_events("0123").unwrap_err();
338        assert_eq!(err.message, "Invalid number: leading zeros not allowed");
339
340        let err = collect_events("1.").unwrap_err();
341        assert_eq!(
342            err.message,
343            "Invalid number: cannot end with a decimal point"
344        );
345
346        let err = collect_events(".5").unwrap_err();
347        assert_eq!(err.message, "Unexpected character '.'");
348
349        let err = collect_events("\"\n\"").unwrap_err();
350        assert_eq!(err.message, "Unescaped control character in string");
351        assert_eq!(err.line, 2);
352        assert_eq!(err.column, 1);
353    }
354
355    #[test]
356    fn test_streaming_security_limits() {
357        let evil_input = "[".repeat(101) + &"]".repeat(101);
358        let err = collect_events_with_depth(&evil_input, 100).unwrap_err();
359        assert_eq!(err.message, "Maximum nesting depth exceeded");
360        assert_eq!(err.line, 1);
361        assert_eq!(err.column, 101);
362
363        let ok_input = "[".repeat(100) + &"]".repeat(100);
364        assert!(collect_events_with_depth(&ok_input, 100).is_ok());
365
366        // Test size limit
367        let small_input = "[1]";
368        let err = parse_streaming(small_input);
369        assert!(err.is_ok());
370    }
371
372    #[test]
373    fn test_stringify_basic() {
374        // Input: A native map {"key": "value", "items": [1, None]}
375        // Output: The string {"items":[1,null],"key":"value"}
376        let mut items = BTreeMap::new();
377        items.insert("key".to_string(), JsonValue::String("value".to_string()));
378        items.insert(
379            "items".to_string(),
380            JsonValue::Array(vec![JsonValue::Number(JsonNumber::I64(1)), JsonValue::Null]),
381        );
382        let obj = JsonValue::Object(items);
383
384        // Parse the string output back into a serde_json::Value
385        let output_str = obj.stringify().unwrap();
386        let parsed_value: SerdeValue =
387            serde_json::from_str(&output_str).expect("Stringify output should be valid JSON");
388
389        // Create the expected structure
390        let expected_value = serde_json::json!({
391            "key": "value",
392            "items": [1, null]
393        });
394
395        assert_eq!(parsed_value, expected_value);
396        // BTreeMap guarantees key order, so we can also test the string directly
397        assert_eq!(output_str, r#"{"items":[1,null],"key":"value"}"#);
398
399        // Test case from challenge:
400        // Input: A native string a "quoted" \ string
401        // Output: The string "a \"quoted\" \\ string"
402        let s = JsonValue::String("a \"quoted\" \\ string".to_string());
403        assert_eq!(s.stringify().unwrap(), r#""a \"quoted\" \\ string""#);
404    }
405
406    #[test]
407    fn test_stringify_all_types() {
408        assert_eq!(JsonValue::Null.stringify().unwrap(), "null");
409        assert_eq!(JsonValue::Boolean(true).stringify().unwrap(), "true");
410        assert_eq!(JsonValue::Boolean(false).stringify().unwrap(), "false");
411        assert_eq!(
412            JsonValue::Number(JsonNumber::F64(123.45))
413                .stringify()
414                .unwrap(),
415            "123.45"
416        );
417        assert_eq!(
418            JsonValue::Number(JsonNumber::F64(-0.5))
419                .stringify()
420                .unwrap(),
421            "-0.5"
422        );
423        assert_eq!(
424            JsonValue::Number(JsonNumber::I64(1000))
425                .stringify()
426                .unwrap(),
427            "1000"
428        );
429        assert_eq!(
430            JsonValue::Number(JsonNumber::U64(123456789012345))
431                .stringify()
432                .unwrap(),
433            "123456789012345"
434        );
435
436        // Empty Structures
437        assert_eq!(JsonValue::Array(vec![]).stringify().unwrap(), "[]");
438        assert_eq!(
439            JsonValue::Object(BTreeMap::new()).stringify().unwrap(),
440            "{}"
441        );
442
443        // Complex Array
444        let arr = JsonValue::Array(vec![
445            JsonValue::Number(JsonNumber::I64(1)),
446            JsonValue::String("test".to_string()),
447            JsonValue::Boolean(true),
448            JsonValue::Null,
449            JsonValue::Object(BTreeMap::new()),
450        ]);
451        assert_eq!(arr.stringify().unwrap(), r#"[1,"test",true,null,{}]"#);
452    }
453
454    #[test]
455    fn test_stringify_string_escapes() {
456        // Test all escapes from Stage 8
457        let s = JsonValue::String("\" \\ / \u{0008} \u{000C} \n \r \t".to_string());
458        assert_eq!(s.stringify().unwrap(), r#""\" \\ \/ \b \f \n \r \t""#);
459
460        // Test control character escape
461        let s_control = JsonValue::String("hello\u{0001}world".to_string());
462        assert_eq!(s_control.stringify().unwrap(), r#""hello\u0001world""#);
463    }
464
465    #[test]
466    fn test_stringify_pretty_print() {
467        let mut sub_obj = BTreeMap::new();
468        sub_obj.insert("sub_key".to_string(), JsonValue::Number(JsonNumber::I64(2)));
469
470        let mut items = BTreeMap::new();
471        items.insert("key".to_string(), JsonValue::String("value".to_string()));
472        items.insert(
473            "items".to_string(),
474            JsonValue::Array(vec![
475                JsonValue::Number(JsonNumber::I64(1)),
476                JsonValue::Null,
477                JsonValue::Object(sub_obj),
478            ]),
479        );
480        items.insert("admin".to_string(), JsonValue::Boolean(true));
481        let obj = JsonValue::Object(items);
482
483        let pretty_string = obj.stringify_pretty().unwrap();
484
485        // Parse the output string with serde_json
486        let parsed_value: SerdeValue =
487            serde_json::from_str(&pretty_string).expect("Pretty-printed JSON should be valid");
488
489        // Define the expected JSON value
490        let expected_value = serde_json::json!({
491            "key": "value",
492            "admin": true,
493            "items": [
494                1,
495                null,
496                {
497                    "sub_key": 2
498                }
499            ]
500        });
501
502        // Assert that the *parsed value* matches the expected value.
503        assert_eq!(parsed_value, expected_value);
504
505        // With BTreeMap, we can also test the exact string output
506        let expected_string = r#"{
507  "admin": true,
508  "items": [
509    1,
510    null,
511    {
512      "sub_key": 2
513    }
514  ],
515  "key": "value"
516}"#;
517        assert_eq!(pretty_string, expected_string);
518    }
519
520    #[test]
521    fn test_stringify_pretty_empty() {
522        // Test empty object and array
523        assert_eq!(
524            JsonValue::Object(BTreeMap::new())
525                .stringify_pretty()
526                .unwrap(),
527            "{}"
528        );
529        assert_eq!(JsonValue::Array(vec![]).stringify_pretty().unwrap(), "[]");
530    }
531
532    #[test]
533    fn test_stringify_nan_inf() {
534        let val_nan = JsonValue::Number(JsonNumber::F64(f64::NAN));
535        let val_inf = JsonValue::Number(JsonNumber::F64(f64::INFINITY));
536        let val_neg_inf = JsonValue::Number(JsonNumber::F64(f64::NEG_INFINITY));
537
538        // We defined this as a hard error
539        assert!(val_nan.stringify().is_err());
540        assert!(val_inf.stringify().is_err());
541        assert!(val_neg_inf.stringify().is_err());
542
543        // Test pretty print
544        assert!(val_nan.stringify_pretty().is_err());
545        assert!(val_inf.stringify_pretty().is_err());
546    }
547
548    #[test]
549    fn test_value_parse_simple() {
550        let input = r#"{ "key": [1, null, true, "hello"] }"#;
551        let value = JsonValue::parse(input).unwrap();
552
553        let mut obj = BTreeMap::new();
554        obj.insert(
555            "key".to_string(),
556            JsonValue::Array(vec![
557                JsonValue::Number(JsonNumber::I64(1)),
558                JsonValue::Null,
559                JsonValue::Boolean(true),
560                JsonValue::String("hello".to_string()),
561            ]),
562        );
563        let expected = JsonValue::Object(obj);
564
565        assert_eq!(value, expected);
566    }
567
568    #[test]
569    fn test_value_parse_primitives() {
570        assert_eq!(JsonValue::parse("null").unwrap(), JsonValue::Null);
571        assert_eq!(
572            JsonValue::parse("123").unwrap(),
573            JsonValue::Number(JsonNumber::I64(123))
574        );
575        assert_eq!(
576            JsonValue::parse("-456").unwrap(),
577            JsonValue::Number(JsonNumber::I64(-456))
578        );
579        assert_eq!(
580            JsonValue::parse("123.5").unwrap(),
581            JsonValue::Number(JsonNumber::F64(123.5))
582        );
583        assert_eq!(
584            JsonValue::parse("1e5").unwrap(),
585            JsonValue::Number(JsonNumber::F64(100000.0))
586        );
587        assert_eq!(
588            JsonValue::parse("9007199254740993").unwrap(),
589            JsonValue::Number(JsonNumber::I64(9007199254740993))
590        );
591        assert_eq!(
592            JsonValue::parse("9223372036854775808").unwrap(),
593            JsonValue::Number(JsonNumber::U64(9223372036854775808))
594        );
595        assert_eq!(
596            JsonValue::parse(r#""hi""#).unwrap(),
597            JsonValue::String("hi".to_string())
598        );
599        assert_eq!(
600            JsonValue::parse(r#""esc\n""#).unwrap(),
601            JsonValue::String("esc\n".to_string())
602        );
603    }
604
605    #[test]
606    fn test_value_parse_errors() {
607        assert_eq!(JsonValue::parse("").unwrap_err().message, "Empty input");
608        assert_eq!(
609            JsonValue::parse("{").unwrap_err().message,
610            "Unclosed object"
611        );
612        assert_eq!(
613            JsonValue::parse("[1, 2").unwrap_err().message,
614            "Unclosed array"
615        );
616        assert!(JsonValue::parse("[1, 2]").is_ok());
617        assert_eq!(
618            JsonValue::parse("[1, 2] extra").unwrap_err().message,
619            "Unexpected character 'e'"
620        );
621        assert_eq!(
622            JsonValue::parse(r#"{ "key": 1, }"#).unwrap_err().message,
623            "Unexpected '}', expected a string key"
624        );
625    }
626}