1use crate::error::{CrousError, Result};
46use crate::value::Value;
47use base64::Engine;
48
49pub fn parse(input: &str) -> Result<Value> {
69 let mut parser = Parser::new(input);
70 let value = parser.parse_value()?;
71 parser.skip_whitespace_and_comments();
72 Ok(value)
73}
74
75struct Parser<'a> {
76 input: &'a str,
77 pos: usize,
78 line: usize,
79 col: usize,
80}
81
82impl<'a> Parser<'a> {
83 fn new(input: &'a str) -> Self {
84 Self {
85 input,
86 pos: 0,
87 line: 1,
88 col: 1,
89 }
90 }
91
92 fn peek(&self) -> Option<char> {
93 self.input[self.pos..].chars().next()
94 }
95
96 fn advance(&mut self) -> Option<char> {
97 let ch = self.peek()?;
98 self.pos += ch.len_utf8();
99 if ch == '\n' {
100 self.line += 1;
101 self.col = 1;
102 } else {
103 self.col += 1;
104 }
105 Some(ch)
106 }
107
108 fn remaining(&self) -> &'a str {
109 &self.input[self.pos..]
110 }
111
112 fn error(&self, msg: impl Into<String>) -> CrousError {
113 CrousError::ParseError {
114 line: self.line,
115 col: self.col,
116 message: msg.into(),
117 }
118 }
119
120 fn skip_whitespace_and_comments(&mut self) {
121 loop {
122 while let Some(ch) = self.peek() {
124 if ch.is_whitespace() {
125 self.advance();
126 } else {
127 break;
128 }
129 }
130 if self.remaining().starts_with("//") {
132 while let Some(ch) = self.advance() {
133 if ch == '\n' {
134 break;
135 }
136 }
137 continue;
138 }
139 if self.remaining().starts_with("/*") {
141 self.advance(); self.advance(); let mut depth = 1;
144 while depth > 0 {
145 match self.advance() {
146 Some('*') if self.peek() == Some('/') => {
147 self.advance();
148 depth -= 1;
149 }
150 Some('/') if self.peek() == Some('*') => {
151 self.advance();
152 depth += 1;
153 }
154 Some(_) => {}
155 None => break,
156 }
157 }
158 continue;
159 }
160 break;
161 }
162 }
163
164 fn expect_char(&mut self, expected: char) -> Result<()> {
165 self.skip_whitespace_and_comments();
166 match self.advance() {
167 Some(ch) if ch == expected => Ok(()),
168 Some(ch) => Err(self.error(format!("expected '{expected}', got '{ch}'"))),
169 None => Err(self.error(format!("expected '{expected}', got EOF"))),
170 }
171 }
172
173 fn parse_value(&mut self) -> Result<Value> {
174 self.skip_whitespace_and_comments();
175
176 match self.peek() {
177 None => Err(self.error("unexpected end of input")),
178 Some('{') => self.parse_object(),
179 Some('[') => self.parse_array(),
180 Some('"') => self.parse_string_value(),
181 Some('b') if self.remaining().starts_with("b64#") => self.parse_bytes(),
182 Some('t') if self.remaining().starts_with("true") => self.parse_true(),
183 Some('f') if self.remaining().starts_with("false") => self.parse_false(),
184 Some('n') if self.remaining().starts_with("null") => self.parse_null(),
185 Some('i') if self.remaining().starts_with("inf") => self.parse_inf(false),
186 Some('N') if self.remaining().starts_with("NaN") => self.parse_nan(),
187 Some(ch) if ch == '-' || ch == '+' || ch.is_ascii_digit() => {
188 if ch == '-' && self.remaining().starts_with("-inf") {
190 self.parse_inf(true)
191 } else {
192 self.parse_number()
193 }
194 }
195 Some(ch) => Err(self.error(format!("unexpected character: '{ch}'"))),
196 }
197 }
198
199 fn parse_null(&mut self) -> Result<Value> {
200 for _ in 0..4 {
201 self.advance();
202 }
203 self.skip_type_annotation();
204 Ok(Value::Null)
205 }
206
207 fn parse_inf(&mut self, negative: bool) -> Result<Value> {
208 if negative {
209 for _ in 0..4 {
211 self.advance();
212 }
213 self.skip_type_annotation();
214 Ok(Value::Float(f64::NEG_INFINITY))
215 } else {
216 for _ in 0..3 {
218 self.advance();
219 }
220 self.skip_type_annotation();
221 Ok(Value::Float(f64::INFINITY))
222 }
223 }
224
225 fn parse_nan(&mut self) -> Result<Value> {
226 for _ in 0..3 {
227 self.advance();
228 }
229 self.skip_type_annotation();
230 Ok(Value::Float(f64::NAN))
231 }
232
233 fn parse_true(&mut self) -> Result<Value> {
234 for _ in 0..4 {
235 self.advance();
236 }
237 self.skip_type_annotation();
238 Ok(Value::Bool(true))
239 }
240
241 fn parse_false(&mut self) -> Result<Value> {
242 for _ in 0..5 {
243 self.advance();
244 }
245 self.skip_type_annotation();
246 Ok(Value::Bool(false))
247 }
248
249 fn parse_number(&mut self) -> Result<Value> {
250 let start = self.pos;
251 let mut is_negative = false;
252 let mut is_float = false;
253
254 if self.peek() == Some('-') {
255 is_negative = true;
256 self.advance();
257 } else if self.peek() == Some('+') {
258 self.advance();
259 }
260
261 while let Some(ch) = self.peek() {
262 if ch.is_ascii_digit() {
263 self.advance();
264 } else if ch == '.' {
265 is_float = true;
266 self.advance();
267 } else if ch == 'e' || ch == 'E' {
268 is_float = true;
269 self.advance();
270 if self.peek() == Some('+') || self.peek() == Some('-') {
271 self.advance();
272 }
273 } else {
274 break;
275 }
276 }
277
278 let num_str = &self.input[start..self.pos];
279 self.skip_type_annotation();
280
281 if is_float {
282 let f: f64 = num_str
283 .parse()
284 .map_err(|_| self.error(format!("invalid float: {num_str}")))?;
285 Ok(Value::Float(f))
286 } else if is_negative {
287 let i: i64 = num_str
288 .parse()
289 .map_err(|_| self.error(format!("invalid integer: {num_str}")))?;
290 Ok(Value::Int(i))
291 } else {
292 let u: u64 = num_str
293 .parse()
294 .map_err(|_| self.error(format!("invalid integer: {num_str}")))?;
295 Ok(Value::UInt(u))
296 }
297 }
298
299 fn parse_string_value(&mut self) -> Result<Value> {
300 let s = self.parse_quoted_string()?;
301 self.skip_type_annotation();
302 Ok(Value::Str(s))
303 }
304
305 fn parse_quoted_string(&mut self) -> Result<String> {
306 self.expect_char('"')?;
307 let mut s = String::new();
308 loop {
309 match self.advance() {
310 Some('"') => break,
311 Some('\\') => match self.advance() {
312 Some('n') => s.push('\n'),
313 Some('t') => s.push('\t'),
314 Some('r') => s.push('\r'),
315 Some('\\') => s.push('\\'),
316 Some('"') => s.push('"'),
317 Some(ch) => {
318 s.push('\\');
319 s.push(ch);
320 }
321 None => return Err(self.error("unterminated string escape")),
322 },
323 Some(ch) => s.push(ch),
324 None => return Err(self.error("unterminated string")),
325 }
326 }
327 Ok(s)
328 }
329
330 fn parse_bytes(&mut self) -> Result<Value> {
331 for _ in 0..4 {
333 self.advance();
334 }
335 let start = self.pos;
336 while let Some(ch) = self.peek() {
339 if ch == ';' {
340 break;
341 }
342 self.advance();
343 }
344 let b64_str = &self.input[start..self.pos];
345 let bytes = base64::engine::general_purpose::STANDARD
348 .decode(b64_str.trim())
349 .map_err(|e| self.error(format!("invalid base64: {e}")))?;
350 Ok(Value::Bytes(bytes))
351 }
352
353 fn parse_array(&mut self) -> Result<Value> {
354 self.expect_char('[')?;
355 let mut items = Vec::new();
356
357 loop {
358 self.skip_whitespace_and_comments();
359 if self.peek() == Some(']') {
360 self.advance();
361 break;
362 }
363 items.push(self.parse_value()?);
364 self.skip_whitespace_and_comments();
365 if self.peek() == Some(',') {
366 self.advance();
367 }
368 }
369
370 Ok(Value::Array(items))
371 }
372
373 fn parse_object(&mut self) -> Result<Value> {
374 self.expect_char('{')?;
375 let mut entries = Vec::new();
376
377 loop {
378 self.skip_whitespace_and_comments();
379 if self.peek() == Some('}') {
380 self.advance();
381 break;
382 }
383
384 let key = self.parse_key()?;
386 self.expect_char(':')?;
387 let value = self.parse_value()?;
388 self.expect_char(';')?;
389
390 entries.push((key, value));
391 }
392
393 Ok(Value::Object(entries))
394 }
395
396 fn parse_key(&mut self) -> Result<String> {
397 self.skip_whitespace_and_comments();
398 if self.peek() == Some('"') {
399 self.parse_quoted_string()
400 } else {
401 self.parse_identifier()
402 }
403 }
404
405 fn parse_identifier(&mut self) -> Result<String> {
406 let start = self.pos;
407 match self.peek() {
408 Some(ch) if ch.is_alphabetic() || ch == '_' => {
409 self.advance();
410 }
411 _ => return Err(self.error("expected identifier")),
412 }
413 while let Some(ch) = self.peek() {
414 if ch.is_alphanumeric() || ch == '_' {
415 self.advance();
416 } else {
417 break;
418 }
419 }
420 Ok(self.input[start..self.pos].to_string())
421 }
422
423 fn skip_type_annotation(&mut self) {
425 if self.remaining().starts_with("::") {
426 self.advance(); self.advance(); while let Some(ch) = self.peek() {
429 if ch.is_alphanumeric() || ch == '_' {
430 self.advance();
431 } else {
432 break;
433 }
434 }
435 }
436 }
437}
438
439pub fn pretty_print(value: &Value, indent: usize) -> String {
460 let mut out = String::new();
461 write_value(&mut out, value, indent, 0);
462 out
463}
464
465fn write_value(out: &mut String, value: &Value, indent_size: usize, depth: usize) {
466 let indent = " ".repeat(indent_size * depth);
467 let inner_indent = " ".repeat(indent_size * (depth + 1));
468
469 match value {
470 Value::Null => out.push_str("null"),
471 Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
472 Value::UInt(n) => out.push_str(&n.to_string()),
473 Value::Int(n) => {
474 if *n == 0 {
477 out.push_str("-0");
478 } else {
479 out.push_str(&n.to_string());
480 }
481 }
482 Value::Float(f) => {
483 if f.is_nan() {
484 out.push_str("NaN");
485 } else if f.is_infinite() {
486 if f.is_sign_negative() {
487 out.push_str("-inf");
488 } else {
489 out.push_str("inf");
490 }
491 } else {
492 let s = format!("{f}");
494 if s.contains('.') || s.contains('e') || s.contains('E') {
495 out.push_str(&s);
496 } else {
497 out.push_str(&format!("{f}.0"));
498 }
499 }
500 }
501 Value::Str(s) => {
502 out.push('"');
503 for ch in s.chars() {
504 match ch {
505 '"' => out.push_str("\\\""),
506 '\\' => out.push_str("\\\\"),
507 '\n' => out.push_str("\\n"),
508 '\r' => out.push_str("\\r"),
509 '\t' => out.push_str("\\t"),
510 c => out.push(c),
511 }
512 }
513 out.push('"');
514 }
515 Value::Bytes(b) => {
516 out.push_str("b64#");
517 out.push_str(&base64::engine::general_purpose::STANDARD.encode(b));
518 }
519 Value::Array(items) => {
520 if items.is_empty() {
521 out.push_str("[]");
522 } else if is_simple_array(items) {
523 out.push('[');
525 for (i, item) in items.iter().enumerate() {
526 if i > 0 {
527 out.push_str(", ");
528 }
529 write_value(out, item, indent_size, depth);
530 }
531 out.push(']');
532 } else {
533 out.push_str("[\n");
534 for (i, item) in items.iter().enumerate() {
535 out.push_str(&inner_indent);
536 write_value(out, item, indent_size, depth + 1);
537 if i < items.len() - 1 {
538 out.push(',');
539 }
540 out.push('\n');
541 }
542 out.push_str(&indent);
543 out.push(']');
544 }
545 }
546 Value::Object(entries) => {
547 if entries.is_empty() {
548 out.push_str("{}");
549 } else {
550 out.push_str("{\n");
551 for (key, val) in entries {
552 out.push_str(&inner_indent);
553 if is_valid_identifier(key) {
554 out.push_str(key);
555 } else {
556 out.push('"');
557 out.push_str(key);
558 out.push('"');
559 }
560 out.push_str(": ");
561 write_value(out, val, indent_size, depth + 1);
562 out.push_str(";\n");
563 }
564 out.push_str(&indent);
565 out.push('}');
566 }
567 }
568 }
569}
570
571fn is_simple_array(items: &[Value]) -> bool {
573 items.len() <= 8
574 && items.iter().all(|v| {
575 matches!(
576 v,
577 Value::Null
578 | Value::Bool(_)
579 | Value::UInt(_)
580 | Value::Int(_)
581 | Value::Float(_)
582 | Value::Str(_)
583 )
584 })
585}
586
587fn is_valid_identifier(s: &str) -> bool {
589 let mut chars = s.chars();
590 match chars.next() {
591 Some(c) if c.is_alphabetic() || c == '_' => {}
592 _ => return false,
593 }
594 chars.all(|c| c.is_alphanumeric() || c == '_')
595}
596
597#[cfg(test)]
598mod tests {
599 use super::*;
600
601 #[test]
602 fn parse_null() {
603 assert_eq!(parse("null").unwrap(), Value::Null);
604 }
605
606 #[test]
607 fn parse_bool() {
608 assert_eq!(parse("true").unwrap(), Value::Bool(true));
609 assert_eq!(parse("false").unwrap(), Value::Bool(false));
610 }
611
612 #[test]
613 fn parse_uint() {
614 assert_eq!(parse("42").unwrap(), Value::UInt(42));
615 assert_eq!(parse("0").unwrap(), Value::UInt(0));
616 }
617
618 #[test]
619 fn parse_int() {
620 assert_eq!(parse("-1").unwrap(), Value::Int(-1));
621 assert_eq!(parse("-42").unwrap(), Value::Int(-42));
622 }
623
624 #[test]
625 fn parse_float() {
626 assert_eq!(parse("3.125").unwrap(), Value::Float(3.125));
627 assert_eq!(parse("-2.5").unwrap(), Value::Float(-2.5));
628 }
629
630 #[test]
631 fn parse_string() {
632 assert_eq!(parse(r#""hello""#).unwrap(), Value::Str("hello".into()));
633 assert_eq!(
634 parse(r#""with \"quotes\"""#).unwrap(),
635 Value::Str("with \"quotes\"".into())
636 );
637 }
638
639 #[test]
640 fn parse_bytes() {
641 let v = parse("b64#AQID;").unwrap();
642 assert_eq!(v, Value::Bytes(vec![1, 2, 3]));
643 }
644
645 #[test]
646 fn parse_array() {
647 let v = parse("[1, 2, 3]").unwrap();
648 assert_eq!(
649 v,
650 Value::Array(vec![Value::UInt(1), Value::UInt(2), Value::UInt(3)])
651 );
652 }
653
654 #[test]
655 fn parse_object() {
656 let v = parse(r#"{ name: "Alice"; age: 30; }"#).unwrap();
657 assert_eq!(
658 v,
659 Value::Object(vec![
660 ("name".into(), Value::Str("Alice".into())),
661 ("age".into(), Value::UInt(30)),
662 ])
663 );
664 }
665
666 #[test]
667 fn parse_nested() {
668 let input = r#"{
669 users: [
670 { name: "Bob"; scores: [100, 95, 87]; }
671 ];
672 count: 1;
673 }"#;
674 let v = parse(input).unwrap();
675 let expected = Value::Object(vec![
676 (
677 "users".into(),
678 Value::Array(vec![Value::Object(vec![
679 ("name".into(), Value::Str("Bob".into())),
680 (
681 "scores".into(),
682 Value::Array(vec![Value::UInt(100), Value::UInt(95), Value::UInt(87)]),
683 ),
684 ])]),
685 ),
686 ("count".into(), Value::UInt(1)),
687 ]);
688 assert_eq!(v, expected);
689 }
690
691 #[test]
692 fn parse_comments() {
693 let input = r#"{
694 // This is a comment
695 name: "Alice"; /* inline comment */
696 age: 30;
697 }"#;
698 let v = parse(input).unwrap();
699 assert_eq!(
700 v,
701 Value::Object(vec![
702 ("name".into(), Value::Str("Alice".into())),
703 ("age".into(), Value::UInt(30)),
704 ])
705 );
706 }
707
708 #[test]
709 fn parse_type_annotation() {
710 let v = parse("42::u32").unwrap();
711 assert_eq!(v, Value::UInt(42));
712 }
713
714 #[test]
715 fn pretty_print_roundtrip() {
716 let original = Value::Object(vec![
717 ("name".into(), Value::Str("Alice".into())),
718 ("age".into(), Value::UInt(30)),
719 (
720 "tags".into(),
721 Value::Array(vec![Value::Str("admin".into()), Value::Str("user".into())]),
722 ),
723 ]);
724 let text = pretty_print(&original, 4);
725 let parsed = parse(&text).unwrap();
726 assert_eq!(parsed, original);
727 }
728
729 #[test]
730 fn pretty_print_bytes() {
731 let v = Value::Bytes(vec![0xDE, 0xAD, 0xBE, 0xEF]);
732 let text = pretty_print(&v, 0);
733 assert!(text.starts_with("b64#"));
734 let parsed = parse(&text).unwrap();
737 assert_eq!(parsed, v);
738 }
739
740 #[test]
741 fn text_binary_text_roundtrip() {
742 let input = r#"{ name: "Alice"; age: 30; active: true; }"#;
744 let val1 = parse(input).unwrap();
745
746 let mut enc = crate::encoder::Encoder::new();
747 enc.encode_value(&val1).unwrap();
748 let binary = enc.finish().unwrap();
749
750 let mut dec = crate::decoder::Decoder::new(&binary);
751 let val2 = dec.decode_next().unwrap().to_owned_value();
752 assert_eq!(val1, val2);
753
754 let text2 = pretty_print(&val2, 4);
755 let val3 = parse(&text2).unwrap();
756 assert_eq!(val1, val3);
757 }
758}