1use crate::error::{Error, Result};
2use crate::lexer::{Lexer, Token};
3use crate::value::Value;
4use indexmap::IndexMap;
5
6pub struct Parser<'a> {
8 tokens: Vec<Token>,
9 pos: usize,
10 #[allow(dead_code)]
11 input: &'a str,
12}
13
14impl<'a> Parser<'a> {
15 pub fn new(input: &'a str) -> Result<Self> {
17 let mut lexer = Lexer::new(input);
18 let tokens = lexer.lex()?;
19 Ok(Parser {
20 tokens,
21 pos: 0,
22 input,
23 })
24 }
25
26 pub fn parse(&mut self) -> Result<Value> {
28 self.skip_empty_lines();
29
30 if self.is_eof() {
31 return Ok(Value::Object(IndexMap::new()));
32 }
33
34 if let Some(Token::Hyphen) = self.peek() {
36 return self.parse_expanded_array();
37 }
38
39 self.parse_object_at_depth(0)
41 }
42
43 fn peek(&self) -> Option<&Token> {
45 self.tokens.get(self.pos)
46 }
47
48 fn next_token(&mut self) -> Option<&Token> {
50 let token = self.tokens.get(self.pos);
51 if token.is_some() {
52 self.pos += 1;
53 }
54 token
55 }
56
57 fn is_eof(&self) -> bool {
59 matches!(self.peek(), Some(Token::Eof) | None)
60 }
61
62 fn expect(&mut self, expected: &Token) -> Result<()> {
64 match self.next_token() {
65 Some(token) if token == expected => Ok(()),
66 Some(token) => Err(Error::Message(format!(
67 "expected {:?}, found {:?}",
68 expected, token
69 ))),
70 None => Err(Error::UnexpectedEof),
71 }
72 }
73
74 fn skip_whitespace(&mut self) {
76 while matches!(self.peek(), Some(Token::Newline) | Some(Token::Indent(_))) {
77 self.pos += 1;
78 }
79 }
80
81 fn skip_empty_lines(&mut self) {
83 while matches!(self.peek(), Some(Token::Newline)) {
84 self.pos += 1;
85 }
86 }
87
88 #[allow(dead_code)]
90 fn get_indent(&self) -> usize {
91 match self.peek() {
92 Some(Token::Indent(n)) => *n,
93 _ => 0,
94 }
95 }
96
97 fn consume_indent(&mut self) -> usize {
99 match self.peek() {
100 Some(Token::Indent(n)) => {
101 let level = *n;
102 self.pos += 1;
103 level
104 }
105 _ => 0,
106 }
107 }
108
109 fn parse_object_at_depth(&mut self, expected_depth: usize) -> Result<Value> {
111 let mut object = IndexMap::new();
112
113 loop {
114 self.skip_empty_lines();
115
116 if self.is_eof() {
117 break;
118 }
119
120 let current_depth = self.consume_indent();
122
123 if self.is_eof() {
125 break;
126 }
127
128 if current_depth < expected_depth {
130 if current_depth > 0 {
132 self.pos -= 1;
133 }
134 break;
135 }
136
137 let key = match self.next_token() {
139 Some(Token::Ident(s)) => s.clone(),
140 Some(Token::String(s)) => s.clone(),
141 Some(token) => {
142 return Err(Error::Message(format!(
143 "expected key, found {:?}",
144 token
145 )))
146 }
147 None => return Err(Error::UnexpectedEof),
148 };
149
150 if let Some(Token::OpenBracket) = self.peek() {
152 let arr = self.parse_array_header_rest()?;
154 object.insert(key, arr);
155 } else {
156 self.expect(&Token::Colon)?;
158 let value = self.parse_value_after_colon(current_depth)?;
159 object.insert(key, value);
160 }
161 }
162
163 Ok(Value::Object(object))
164 }
165
166 fn parse_array_header_rest(&mut self) -> Result<Value> {
168 self.expect(&Token::OpenBracket)?;
170 let length = match self.next_token() {
171 Some(Token::Number(n)) => n.parse::<usize>().map_err(|_| Error::InvalidNumber {
172 text: n.clone(),
173 pos: 0,
174 })?,
175 Some(token) => {
176 return Err(Error::Message(format!(
177 "expected number in array length, found {:?}",
178 token
179 )))
180 }
181 None => return Err(Error::UnexpectedEof),
182 };
183 self.expect(&Token::CloseBracket)?;
184
185 let fields = if let Some(Token::OpenBrace) = self.peek() {
187 self.pos += 1;
188 let mut field_names = Vec::new();
189 loop {
190 match self.next_token() {
191 Some(Token::Ident(s)) => field_names.push(s.clone()),
192 Some(Token::String(s)) => field_names.push(s.clone()),
193 Some(token) => {
194 return Err(Error::Message(format!(
195 "expected field name, found {:?}",
196 token
197 )))
198 }
199 None => return Err(Error::UnexpectedEof),
200 }
201
202 match self.peek() {
203 Some(Token::Comma) => self.pos += 1,
204 Some(Token::CloseBrace) => {
205 self.pos += 1;
206 break;
207 }
208 Some(token) => {
209 return Err(Error::Message(format!(
210 "expected comma or }}, found {:?}",
211 token
212 )))
213 }
214 None => return Err(Error::UnexpectedEof),
215 }
216 }
217 Some(field_names)
218 } else {
219 None
220 };
221
222 self.expect(&Token::Colon)?;
224
225 if let Some(fields) = fields {
227 self.parse_tabular_rows(&fields, length)
229 } else {
230 self.parse_inline_primitives(length)
232 }
233 }
234
235 fn parse_value_after_colon(&mut self, parent_depth: usize) -> Result<Value> {
237 while matches!(self.peek(), Some(Token::Indent(_))) {
239 self.pos += 1;
240 }
241
242 match self.peek() {
243 Some(Token::True) => {
244 self.pos += 1;
245 Ok(Value::Bool(true))
246 }
247 Some(Token::False) => {
248 self.pos += 1;
249 Ok(Value::Bool(false))
250 }
251 Some(Token::Null) => {
252 self.pos += 1;
253 Ok(Value::Null)
254 }
255 Some(Token::Number(n)) => {
256 let val = n.clone();
257 self.pos += 1;
258 Ok(Value::Number(val))
259 }
260 Some(Token::String(s)) => {
261 let val = s.clone();
262 self.pos += 1;
263 Ok(Value::String(val))
264 }
265 Some(Token::Ident(s)) => {
266 let mut val = s.clone();
267 self.pos += 1;
268 while let Some(Token::Ident(ref w)) | Some(Token::Number(ref w)) = self.peek() {
270 if val == "true" || val == "false" || val == "null" {
271 break;
272 }
273 val.push(' ');
274 val.push_str(w);
275 self.pos += 1;
276 }
277 Ok(Value::String(val))
278 }
279 Some(Token::OpenBracket) => self.parse_bracket_array(),
280 Some(Token::Hyphen) => self.parse_expanded_array(),
281 Some(Token::Newline) => {
282 self.pos += 1;
284 self.parse_nested_block(parent_depth + 1)
285 }
286 Some(Token::Eof) => Ok(Value::Null),
287 Some(token) => Err(Error::Message(format!(
288 "unexpected token after colon: {:?}",
289 token
290 ))),
291 None => Err(Error::UnexpectedEof),
292 }
293 }
294
295 fn parse_nested_block(&mut self, expected_depth: usize) -> Result<Value> {
297 self.skip_empty_lines();
298
299 if self.is_eof() {
300 return Ok(Value::Null);
301 }
302
303 match self.peek() {
305 Some(Token::Indent(n)) if *n >= expected_depth => {
306 self.parse_object_at_depth(expected_depth)
307 }
308 Some(Token::Hyphen) => {
309 self.parse_expanded_array()
311 }
312 _ => {
313 self.parse_value()
315 }
316 }
317 }
318
319 fn parse_expanded_array(&mut self) -> Result<Value> {
321 let mut items = Vec::new();
322
323 while let Some(Token::Hyphen) = self.peek() {
324 self.pos += 1; let value = self.parse_value()?;
326 items.push(value);
327 self.skip_whitespace();
328 }
329
330 Ok(Value::Array(items))
331 }
332
333 fn parse_bracket_array(&mut self) -> Result<Value> {
335 self.expect(&Token::OpenBracket)?;
336 let mut values = Vec::new();
337
338 loop {
339 self.skip_whitespace();
340 match self.peek() {
341 Some(Token::CloseBracket) => {
342 self.pos += 1;
343 break;
344 }
345 _ => {
346 let value = self.parse_value()?;
347 values.push(value);
348
349 self.skip_whitespace();
350 match self.peek() {
351 Some(Token::Comma) => {
352 self.pos += 1;
353 }
354 Some(Token::CloseBracket) => {
355 self.pos += 1;
356 break;
357 }
358 _ => break,
359 }
360 }
361 }
362 }
363
364 Ok(Value::Array(values))
365 }
366
367 fn parse_inline_primitives(&mut self, expected_count: usize) -> Result<Value> {
369 let mut values = Vec::new();
370 let mut count = 0;
371
372 while count < expected_count {
373 self.skip_whitespace();
374
375 if self.is_eof() {
376 break;
377 }
378
379 let value = match self.peek() {
380 Some(Token::Number(n)) => {
381 let val = n.clone();
382 self.pos += 1;
383 Value::Number(val)
384 }
385 Some(Token::String(s)) => {
386 let val = s.clone();
387 self.pos += 1;
388 Value::String(val)
389 }
390 Some(Token::Ident(s)) => {
391 let mut val = s.clone();
392 self.pos += 1;
393 loop {
395 if let Some(Token::Ident(ref w)) = self.peek() {
396 if val == "true" || val == "false" || val == "null" {
397 break;
398 }
399 val.push(' ');
400 val.push_str(w);
401 self.pos += 1;
402 } else if let Some(Token::Number(ref w)) = self.peek() {
403 if val == "true" || val == "false" || val == "null" {
404 break;
405 }
406 val.push(' ');
407 val.push_str(w);
408 self.pos += 1;
409 } else {
410 break;
411 }
412 }
413 if val == "true" {
414 Value::Bool(true)
415 } else if val == "false" {
416 Value::Bool(false)
417 } else if val == "null" {
418 Value::Null
419 } else {
420 Value::String(val)
421 }
422 }
423 Some(Token::True) => {
424 self.pos += 1;
425 Value::Bool(true)
426 }
427 Some(Token::False) => {
428 self.pos += 1;
429 Value::Bool(false)
430 }
431 Some(Token::Null) => {
432 self.pos += 1;
433 Value::Null
434 }
435 _ => break,
436 };
437
438 values.push(value);
439 count += 1;
440
441 match self.peek() {
443 Some(Token::Comma) | Some(Token::Pipe) => {
444 self.pos += 1;
445 }
446 _ => break,
447 }
448 }
449
450 if count != expected_count {
451 return Err(Error::ArrayLengthMismatch {
452 declared: expected_count,
453 found: count,
454 pos: 0,
455 });
456 }
457
458 Ok(Value::Array(values))
459 }
460
461 fn parse_tabular_rows(&mut self, fields: &[String], expected_count: usize) -> Result<Value> {
463 let mut rows = Vec::new();
464 let mut count = 0;
465
466 while count < expected_count {
467 self.skip_empty_lines();
468
469 if self.is_eof() {
470 break;
471 }
472
473 match self.peek() {
475 Some(Token::Indent(_)) => {
476 self.consume_indent();
477 }
478 Some(Token::Ident(_)) | Some(Token::String(_)) | Some(Token::Hyphen) => {
479 break;
481 }
482 _ => {
483 break;
484 }
485 }
486
487 let mut row = IndexMap::new();
488 let mut field_idx = 0;
489
490 while field_idx < fields.len() {
491 let value = match self.peek() {
492 Some(Token::Number(n)) => {
493 let val = n.clone();
494 self.pos += 1;
495 Value::Number(val)
496 }
497 Some(Token::String(s)) => {
498 let val = s.clone();
499 self.pos += 1;
500 Value::String(val)
501 }
502 Some(Token::Ident(s)) => {
503 let mut val = s.clone();
504 self.pos += 1;
505 loop {
507 if let Some(Token::Ident(ref w)) = self.peek() {
508 if val == "true" || val == "false" || val == "null" {
509 break;
510 }
511 val.push(' ');
512 val.push_str(w);
513 self.pos += 1;
514 } else if let Some(Token::Number(ref w)) = self.peek() {
515 if val == "true" || val == "false" || val == "null" {
516 break;
517 }
518 val.push(' ');
519 val.push_str(w);
520 self.pos += 1;
521 } else {
522 break;
523 }
524 }
525 if val == "true" {
526 Value::Bool(true)
527 } else if val == "false" {
528 Value::Bool(false)
529 } else if val == "null" {
530 Value::Null
531 } else {
532 Value::String(val)
533 }
534 }
535 Some(Token::True) => {
536 self.pos += 1;
537 Value::Bool(true)
538 }
539 Some(Token::False) => {
540 self.pos += 1;
541 Value::Bool(false)
542 }
543 Some(Token::Null) => {
544 self.pos += 1;
545 Value::Null
546 }
547 _ => break,
548 };
549
550 row.insert(fields[field_idx].clone(), value);
551 field_idx += 1;
552
553 match self.peek() {
555 Some(Token::Comma) | Some(Token::Pipe) => {
556 self.pos += 1;
557 }
558 _ => break,
559 }
560 }
561
562 rows.push(Value::Object(row));
563 count += 1;
564 }
565
566 if count != expected_count {
567 return Err(Error::ArrayLengthMismatch {
568 declared: expected_count,
569 found: count,
570 pos: 0,
571 });
572 }
573
574 Ok(Value::Array(rows))
575 }
576
577 fn parse_value(&mut self) -> Result<Value> {
579 self.skip_whitespace();
580
581 match self.peek() {
582 Some(Token::True) => {
583 self.pos += 1;
584 Ok(Value::Bool(true))
585 }
586 Some(Token::False) => {
587 self.pos += 1;
588 Ok(Value::Bool(false))
589 }
590 Some(Token::Null) => {
591 self.pos += 1;
592 Ok(Value::Null)
593 }
594 Some(Token::Number(n)) => {
595 let val = n.clone();
596 self.pos += 1;
597 Ok(Value::Number(val))
598 }
599 Some(Token::String(s)) => {
600 let val = s.clone();
601 self.pos += 1;
602 Ok(Value::String(val))
603 }
604 Some(Token::Ident(s)) => {
605 let val = s.clone();
606 self.pos += 1;
607 Ok(Value::String(val))
608 }
609 Some(Token::OpenBracket) => self.parse_bracket_array(),
610 Some(Token::Hyphen) => self.parse_expanded_array(),
611 Some(token) => Err(Error::Message(format!(
612 "unexpected token in value: {:?}",
613 token
614 ))),
615 None => Err(Error::UnexpectedEof),
616 }
617 }
618}
619
620pub fn parse(input: &str) -> Result<Value> {
622 let mut parser = Parser::new(input)?;
623 parser.parse()
624}
625
626#[cfg(test)]
627mod tests {
628 use super::*;
629
630 #[test]
631 fn test_parse_simple_object() {
632 let input = "id: 123\nname: Alice\nactive: true";
633 let value = parse(input).unwrap();
634
635 assert!(value.is_object());
636 assert_eq!(value.get("id").unwrap().as_i64(), Some(123));
637 assert_eq!(value.get("name").unwrap().as_str(), Some("Alice"));
638 assert_eq!(value.get("active").unwrap().as_bool(), Some(true));
639 }
640
641 #[test]
642 fn test_parse_nested_object() {
643 let input = "user:\n id: 1\n name: Alice";
644 let value = parse(input).unwrap();
645
646 assert!(value.is_object());
647 let user = value.get("user").unwrap();
648 assert!(user.is_object());
649 assert_eq!(user.get("id").unwrap().as_i64(), Some(1));
650 assert_eq!(user.get("name").unwrap().as_str(), Some("Alice"));
651 }
652
653 #[test]
654 fn test_parse_array_inline() {
655 let input = "tags[3]: foo,bar,baz";
656 let value = parse(input).unwrap();
657
658 assert!(value.is_object());
659 let arr = value.get("tags").unwrap();
660 assert!(arr.is_array());
661 assert_eq!(arr.as_array().unwrap().len(), 3);
662 assert_eq!(arr.as_array().unwrap()[0].as_str(), Some("foo"));
663 assert_eq!(arr.as_array().unwrap()[1].as_str(), Some("bar"));
664 assert_eq!(arr.as_array().unwrap()[2].as_str(), Some("baz"));
665 }
666
667 #[test]
668 fn test_parse_tabular_array() {
669 let input = "users[2]{id,name,role}:\n 1,Alice,admin\n 2,Bob,user";
670 let value = parse(input).unwrap();
671
672 assert!(value.is_object());
673 let arr = value.get("users").unwrap();
674 assert!(arr.is_array());
675 assert_eq!(arr.as_array().unwrap().len(), 2);
676
677 let row1 = arr.as_array().unwrap()[0].as_object().unwrap();
678 assert_eq!(row1.get("id").unwrap().as_i64(), Some(1));
679 assert_eq!(row1.get("name").unwrap().as_str(), Some("Alice"));
680 assert_eq!(row1.get("role").unwrap().as_str(), Some("admin"));
681
682 let row2 = arr.as_array().unwrap()[1].as_object().unwrap();
683 assert_eq!(row2.get("id").unwrap().as_i64(), Some(2));
684 assert_eq!(row2.get("name").unwrap().as_str(), Some("Bob"));
685 assert_eq!(row2.get("role").unwrap().as_str(), Some("user"));
686 }
687
688 #[test]
689 fn test_parse_expanded_array() {
690 let input = "- first\n- second\n- third";
691 let value = parse(input).unwrap();
692
693 assert!(value.is_array());
694 let arr = value.as_array().unwrap();
695 assert_eq!(arr.len(), 3);
696 assert_eq!(arr[0].as_str(), Some("first"));
697 assert_eq!(arr[1].as_str(), Some("second"));
698 assert_eq!(arr[2].as_str(), Some("third"));
699 }
700
701 #[test]
702 fn test_parse_complex_document() {
703 let input = r#"context:
704 task: Our favorite hikes together
705 location: Boulder
706 season: spring_2025
707friends[3]: ana,luis,sam
708users[2]{id,name,role}:
709 1,Alice,admin
710 2,Bob,user"#;
711
712 let value = parse(input).unwrap();
713 assert!(value.is_object());
714
715 let context = value.get("context").unwrap();
716 assert!(context.is_object());
717 assert_eq!(
718 context.get("task").unwrap().as_str(),
719 Some("Our favorite hikes together")
720 );
721
722 let friends = value.get("friends").unwrap();
723 assert!(friends.is_array());
724 assert_eq!(friends.as_array().unwrap().len(), 3);
725
726 let users = value.get("users").unwrap();
727 assert!(users.is_array());
728 let users_arr = users.as_array().unwrap();
729 assert_eq!(users_arr.len(), 2);
730
731 let user1 = users_arr[0].as_object().unwrap();
732 assert_eq!(user1.get("name").unwrap().as_str(), Some("Alice"));
733 assert_eq!(user1.get("role").unwrap().as_str(), Some("admin"));
734 }
735}