rson_core/
parser.rs

1//! RSON parser implementation using nom.
2//!
3//! This module provides functions to parse RSON text into `RsonValue` structures.
4//! The parser is built with nom for efficient, zero-copy parsing where possible.
5
6use crate::{RsonValue, RsonError, RsonResult};
7use indexmap::IndexMap;
8use nom::{
9    branch::alt,
10    bytes::complete::{tag, take_while, take_while1},
11    character::complete::{
12        char, multispace1, none_of, 
13        alpha1, digit1
14    },
15    combinator::{map, map_res, opt, recognize, value},
16    multi::separated_list0,
17    number::complete::recognize_float,
18    sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
19    IResult,
20};
21
22#[cfg(not(feature = "std"))]
23use alloc::{
24    string::{String, ToString},
25    vec::Vec,
26};
27
28/// Parse a complete RSON document.
29pub fn parse_rson(input: &str) -> RsonResult<RsonValue> {
30    let result = terminated(
31        preceded(ws_and_comments, parse_value),
32        preceded(ws_and_comments, nom::combinator::eof)
33    )(input);
34    
35    match result {
36        Ok((_, value)) => Ok(value),
37        Err(e) => Err(RsonError::from(e)),
38    }
39}
40
41/// Parse a single RSON value (may not consume entire input).
42pub fn parse_rson_value(input: &str) -> RsonResult<RsonValue> {
43    let result = preceded(ws_and_comments, parse_value)(input);
44    
45    match result {
46        Ok((_, value)) => Ok(value),
47        Err(e) => Err(RsonError::from(e)),
48    }
49}
50
51/// Parse any RSON value.
52fn parse_value(input: &str) -> IResult<&str, RsonValue> {
53    alt((
54        parse_null,
55        parse_bool,
56        parse_option,
57        parse_enum,
58        parse_struct,
59        parse_tuple,
60        parse_number,
61        parse_char,
62        parse_string,
63        parse_array,
64        parse_map,
65    ))(input)
66}
67
68/// Parse whitespace and comments.
69fn ws_and_comments(input: &str) -> IResult<&str, ()> {
70    value(
71        (),
72        nom::multi::many0(alt((
73            value((), multispace1),
74            parse_line_comment,
75            parse_block_comment,
76        )))
77    )(input)
78}
79
80/// Parse a line comment: // comment text
81fn parse_line_comment(input: &str) -> IResult<&str, ()> {
82    value(
83        (),
84        tuple((
85            tag("//"),
86            take_while(|c| c != '\n'),
87            opt(char('\n')),
88        ))
89    )(input)
90}
91
92/// Parse a block comment: /* comment text */
93fn parse_block_comment(input: &str) -> IResult<&str, ()> {
94    value(
95        (),
96        tuple((
97            tag("/*"),
98            take_while(|c| c != '*'),
99            tag("*/"),
100        ))
101    )(input)
102}
103
104/// Parse null value.
105fn parse_null(input: &str) -> IResult<&str, RsonValue> {
106    value(RsonValue::Null, tag("null"))(input)
107}
108
109/// Parse boolean value.
110fn parse_bool(input: &str) -> IResult<&str, RsonValue> {
111    alt((
112        value(RsonValue::Bool(true), tag("true")),
113        value(RsonValue::Bool(false), tag("false")),
114    ))(input)
115}
116
117/// Parse Option value: Some(value) or None
118fn parse_option(input: &str) -> IResult<&str, RsonValue> {
119    alt((
120        map(
121            preceded(
122                tag("Some"),
123                delimited(
124                    preceded(ws_and_comments, char('(')),
125                    preceded(ws_and_comments, parse_value),
126                    preceded(ws_and_comments, char(')')),
127                )
128            ),
129            |v| RsonValue::Option(Some(Box::new(v)))
130        ),
131        value(RsonValue::Option(None), tag("None")),
132    ))(input)
133}
134
135/// Parse a number (integer or float).
136fn parse_number(input: &str) -> IResult<&str, RsonValue> {
137    // First try to parse as float
138    let float_result: IResult<&str, &str> = recognize_float(input);
139    if let Ok((remaining, number_str)) = float_result {
140        if number_str.contains('.') || number_str.contains('e') || number_str.contains('E') {
141            if let Ok(f) = number_str.parse::<f64>() {
142                return Ok((remaining, RsonValue::Float(f)));
143            }
144        }
145    }
146    
147    // Try to parse as integer
148    map_res(
149        recognize(pair(opt(char('-')), digit1)),
150        |s: &str| s.parse::<i64>().map(RsonValue::Int)
151    )(input)
152}
153
154/// Parse a character literal: 'c'
155fn parse_char(input: &str) -> IResult<&str, RsonValue> {
156    map(
157        delimited(char('\''), parse_char_content, char('\'')),
158        RsonValue::Char
159    )(input)
160}
161
162/// Parse the content of a character literal.
163fn parse_char_content(input: &str) -> IResult<&str, char> {
164    alt((
165        none_of("'\\"),
166        preceded(char('\\'), parse_escape_char),
167    ))(input)
168}
169
170/// Parse an escape character.
171fn parse_escape_char(input: &str) -> IResult<&str, char> {
172    alt((
173        value('\n', char('n')),
174        value('\r', char('r')),
175        value('\t', char('t')),
176        value('\\', char('\\')),
177        value('\'', char('\'')),
178        value('\"', char('\"')),
179        value('\0', char('0')),
180        parse_unicode_escape,
181    ))(input)
182}
183
184/// Parse Unicode escape: \uXXXX
185fn parse_unicode_escape(input: &str) -> IResult<&str, char> {
186    map_res(
187        preceded(char('u'), take_while1(|c: char| c.is_ascii_hexdigit())),
188        |hex_str: &str| {
189            u32::from_str_radix(hex_str, 16)
190                .ok()
191                .and_then(char::from_u32)
192                .ok_or("Invalid Unicode codepoint")
193        }
194    )(input)
195}
196
197/// Parse a string literal: "string content"
198fn parse_string(input: &str) -> IResult<&str, RsonValue> {
199    map(
200        delimited(char('"'), parse_string_content, char('"')),
201        |s| RsonValue::String(s.to_string())
202    )(input)
203}
204
205/// Parse the content of a string literal.
206fn parse_string_content(input: &str) -> IResult<&str, String> {
207    let mut result = String::new();
208    let mut remaining = input;
209    
210    while !remaining.is_empty() {
211        if remaining.starts_with('"') {
212            break;
213        } else if remaining.starts_with('\\') {
214            let (rest, escaped_char) = preceded(char('\\'), parse_escape_char)(remaining)?;
215            result.push(escaped_char);
216            remaining = rest;
217        } else {
218            let (rest, ch) = nom::character::complete::anychar(remaining)?;
219            result.push(ch);
220            remaining = rest;
221        }
222    }
223    
224    Ok((remaining, result))
225}
226
227/// Parse an identifier (unquoted key or type name).
228fn parse_identifier(input: &str) -> IResult<&str, String> {
229    map(
230        recognize(pair(
231            alt((alpha1, tag("_"))),
232            take_while(|c: char| c.is_alphanumeric() || c == '_')
233        )),
234        |s: &str| s.to_string()
235    )(input)
236}
237
238/// Parse an array: [value1, value2, ...]
239fn parse_array(input: &str) -> IResult<&str, RsonValue> {
240    map(
241        delimited(
242            char('['),
243            terminated(
244                separated_list0(
245                    preceded(ws_and_comments, char(',')),
246                    preceded(ws_and_comments, parse_value)
247                ),
248                opt(preceded(ws_and_comments, char(',')))
249            ),
250            preceded(ws_and_comments, char(']'))
251        ),
252        RsonValue::Array
253    )(input)
254}
255
256/// Parse a map: { key: value, ... }
257fn parse_map(input: &str) -> IResult<&str, RsonValue> {
258    map(
259        delimited(
260            char('{'),
261            terminated(
262                separated_list0(
263                    preceded(ws_and_comments, char(',')),
264                    preceded(
265                        ws_and_comments,
266                        separated_pair(
267                            parse_map_key,
268                            preceded(ws_and_comments, char(':')),
269                            preceded(ws_and_comments, parse_value)
270                        )
271                    )
272                ),
273                opt(preceded(ws_and_comments, char(',')))
274            ),
275            preceded(ws_and_comments, char('}'))
276        ),
277        |pairs| {
278            let mut map = IndexMap::new();
279            for (key, value) in pairs {
280                map.insert(key, value);
281            }
282            RsonValue::Map(map)
283        }
284    )(input)
285}
286
287/// Parse a map key (identifier or string).
288fn parse_map_key(input: &str) -> IResult<&str, String> {
289    alt((
290        parse_identifier,
291        map(
292            delimited(char('"'), parse_string_content, char('"')),
293            |s| s
294        ),
295    ))(input)
296}
297
298/// Parse a struct: StructName(field: value, ...)
299fn parse_struct(input: &str) -> IResult<&str, RsonValue> {
300    map(
301        pair(
302            parse_identifier,
303            delimited(
304                char('('),
305                terminated(
306                    separated_list0(
307                        preceded(ws_and_comments, char(',')),
308                        preceded(
309                            ws_and_comments,
310                            separated_pair(
311                                parse_identifier,
312                                preceded(ws_and_comments, char(':')),
313                                preceded(ws_and_comments, parse_value)
314                            )
315                        )
316                    ),
317                    opt(preceded(ws_and_comments, char(',')))
318                ),
319                preceded(ws_and_comments, char(')'))
320            )
321        ),
322        |(name, fields)| {
323            let mut field_map = IndexMap::new();
324            for (field_name, value) in fields {
325                field_map.insert(field_name, value);
326            }
327            RsonValue::Struct {
328                name,
329                fields: field_map,
330            }
331        }
332    )(input)
333}
334
335/// Parse a tuple: (value1, value2, ...)
336fn parse_tuple(input: &str) -> IResult<&str, RsonValue> {
337    // Only parse as tuple if it doesn't look like a struct
338    let lookahead = preceded(
339        ws_and_comments,
340        alt((
341            // Check if next thing after '(' is an identifier followed by ':'
342            value(false, tuple((parse_identifier, preceded(ws_and_comments, char(':'))))),
343            // Otherwise it could be a tuple
344            value(true, take_while(|_| true))
345        ))
346    );
347    
348    // If lookahead suggests this might be a struct field, don't parse as tuple
349    if let Ok((_, false)) = preceded(char('('), lookahead)(input) {
350        return Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Alt)));
351    }
352    
353    map(
354        delimited(
355            char('('),
356            terminated(
357                separated_list0(
358                    preceded(ws_and_comments, char(',')),
359                    preceded(ws_and_comments, parse_value)
360                ),
361                opt(preceded(ws_and_comments, char(',')))
362            ),
363            preceded(ws_and_comments, char(')'))
364        ),
365        RsonValue::Tuple
366    )(input)
367}
368
369/// Parse an enum: EnumName::Variant or EnumName::Variant(value)
370fn parse_enum(input: &str) -> IResult<&str, RsonValue> {
371    map(
372        tuple((
373            parse_identifier,
374            preceded(tag("::"), parse_identifier),
375            opt(delimited(
376                char('('),
377                preceded(ws_and_comments, parse_value),
378                preceded(ws_and_comments, char(')'))
379            ))
380        )),
381        |(enum_name, variant, value)| RsonValue::Enum {
382            name: enum_name,
383            variant,
384            value: value.map(Box::new),
385        }
386    )(input)
387}
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392
393    #[test]
394    fn test_parse_null() {
395        assert_eq!(parse_rson("null").unwrap(), RsonValue::Null);
396    }
397
398    #[test]
399    fn test_parse_bool() {
400        assert_eq!(parse_rson("true").unwrap(), RsonValue::Bool(true));
401        assert_eq!(parse_rson("false").unwrap(), RsonValue::Bool(false));
402    }
403
404    #[test]
405    fn test_parse_numbers() {
406        assert_eq!(parse_rson("42").unwrap(), RsonValue::Int(42));
407        assert_eq!(parse_rson("-17").unwrap(), RsonValue::Int(-17));
408        assert_eq!(parse_rson("3.14").unwrap(), RsonValue::Float(3.14));
409        assert_eq!(parse_rson("-2.5").unwrap(), RsonValue::Float(-2.5));
410    }
411
412    #[test]
413    fn test_parse_string() {
414        assert_eq!(parse_rson(r#""hello""#).unwrap(), RsonValue::String("hello".to_string()));
415        assert_eq!(parse_rson(r#""hello\nworld""#).unwrap(), RsonValue::String("hello\nworld".to_string()));
416    }
417
418    #[test]
419    fn test_parse_char() {
420        assert_eq!(parse_rson("'a'").unwrap(), RsonValue::Char('a'));
421        assert_eq!(parse_rson(r"'\n'").unwrap(), RsonValue::Char('\n'));
422    }
423
424    #[test]
425    fn test_parse_array() {
426        assert_eq!(
427            parse_rson("[1, 2, 3]").unwrap(),
428            RsonValue::Array(vec![
429                RsonValue::Int(1),
430                RsonValue::Int(2),
431                RsonValue::Int(3),
432            ])
433        );
434        
435        // With trailing comma
436        assert_eq!(
437            parse_rson("[1, 2, 3,]").unwrap(),
438            RsonValue::Array(vec![
439                RsonValue::Int(1),
440                RsonValue::Int(2),
441                RsonValue::Int(3),
442            ])
443        );
444    }
445
446    #[test]
447    fn test_parse_map() {
448        let mut expected = IndexMap::new();
449        expected.insert("name".to_string(), RsonValue::String("Alice".to_string()));
450        expected.insert("age".to_string(), RsonValue::Int(30));
451        
452        assert_eq!(
453            parse_rson(r#"{ name: "Alice", age: 30 }"#).unwrap(),
454            RsonValue::Map(expected)
455        );
456    }
457
458    #[test]
459    fn test_parse_struct() {
460        let mut expected = IndexMap::new();
461        expected.insert("x".to_string(), RsonValue::Int(10));
462        expected.insert("y".to_string(), RsonValue::Int(20));
463        
464        assert_eq!(
465            parse_rson("Point(x: 10, y: 20)").unwrap(),
466            RsonValue::Struct {
467                name: "Point".to_string(),
468                fields: expected,
469            }
470        );
471    }
472
473    #[test]
474    fn test_parse_tuple() {
475        assert_eq!(
476            parse_rson("(1, 2, 3)").unwrap(),
477            RsonValue::Tuple(vec![
478                RsonValue::Int(1),
479                RsonValue::Int(2),
480                RsonValue::Int(3),
481            ])
482        );
483    }
484
485    #[test]
486    fn test_parse_enum() {
487        assert_eq!(
488            parse_rson("Color::Red").unwrap(),
489            RsonValue::Enum {
490                name: "Color".to_string(),
491                variant: "Red".to_string(),
492                value: None,
493            }
494        );
495        
496        assert_eq!(
497            parse_rson(r#"Result::Ok("success")"#).unwrap(),
498            RsonValue::Enum {
499                name: "Result".to_string(),
500                variant: "Ok".to_string(),
501                value: Some(Box::new(RsonValue::String("success".to_string()))),
502            }
503        );
504    }
505
506    #[test]
507    fn test_parse_option() {
508        assert_eq!(
509            parse_rson("None").unwrap(),
510            RsonValue::Option(None)
511        );
512        
513        assert_eq!(
514            parse_rson("Some(42)").unwrap(),
515            RsonValue::Option(Some(Box::new(RsonValue::Int(42))))
516        );
517    }
518
519    #[test]
520    fn test_parse_with_comments() {
521        let input = r#"
522        // This is a user struct
523        User(
524            id: 1, // user ID
525            name: "Alice", /* user name */
526            active: true,
527        )
528        "#;
529        
530        let mut expected = IndexMap::new();
531        expected.insert("id".to_string(), RsonValue::Int(1));
532        expected.insert("name".to_string(), RsonValue::String("Alice".to_string()));
533        expected.insert("active".to_string(), RsonValue::Bool(true));
534        
535        assert_eq!(
536            parse_rson(input).unwrap(),
537            RsonValue::Struct {
538                name: "User".to_string(),
539                fields: expected,
540            }
541        );
542    }
543
544    #[test]
545    fn test_json_compatibility() {
546        // Test that valid JSON parses correctly
547        let json_input = r#"{
548            "name": "Alice",
549            "age": 30,
550            "active": true,
551            "score": 95.5,
552            "tags": ["admin", "user"],
553            "metadata": null
554        }"#;
555        
556        let result = parse_rson(json_input);
557        assert!(result.is_ok());
558        
559        let value = result.unwrap();
560        match value {
561            RsonValue::Map(_) => {}
562            _ => panic!("Expected map"),
563        }
564    }
565}