1use crate::lexer::{Lexer, Token, TokenKind};
2use std::borrow::Cow;
3use std::collections::BTreeMap;
4
5#[derive(Debug, Clone, Copy)]
6enum ChompMode {
7 Strip, Clip, Keep, }
11
12#[derive(Debug, Clone, PartialEq)]
13pub enum YamlValue<'a> {
14 String(Cow<'a, str>),
15 Array(Vec<YamlNode<'a>>),
16 Object(BTreeMap<Cow<'a, str>, YamlNode<'a>>),
17}
18
19#[derive(Debug, Clone, PartialEq)]
20pub struct YamlNode<'a> {
21 pub value: YamlValue<'a>,
22 pub leading_comment: Option<Cow<'a, str>>,
23 pub inline_comment: Option<Cow<'a, str>>,
24}
25
26impl<'a> YamlNode<'a> {
27 pub(crate) fn new(value: YamlValue<'a>) -> Self {
28 YamlNode {
29 value,
30 leading_comment: None,
31 inline_comment: None,
32 }
33 }
34
35 pub(crate) fn with_comments(
36 value: YamlValue<'a>,
37 leading: Option<Cow<'a, str>>,
38 inline: Option<Cow<'a, str>>,
39 ) -> Self {
40 YamlNode {
41 value,
42 leading_comment: leading,
43 inline_comment: inline,
44 }
45 }
46
47 pub fn from_value(value: YamlValue<'a>) -> Self {
49 YamlNode {
50 value,
51 leading_comment: None,
52 inline_comment: None,
53 }
54 }
55
56 pub fn as_str(&self) -> Option<&str> {
60 match &self.value {
61 YamlValue::String(s) => Some(s.as_ref()),
62 _ => None,
63 }
64 }
65
66 pub fn as_object(&self) -> Option<&BTreeMap<Cow<'a, str>, YamlNode<'a>>> {
68 match &self.value {
69 YamlValue::Object(map) => Some(map),
70 _ => None,
71 }
72 }
73
74 pub fn as_array(&self) -> Option<&[YamlNode<'a>]> {
76 match &self.value {
77 YamlValue::Array(items) => Some(items),
78 _ => None,
79 }
80 }
81
82 pub fn get(&self, key: &str) -> Option<&YamlNode<'a>> {
84 match &self.value {
85 YamlValue::Object(map) => {
86 for (k, v) in map.iter() {
88 if k.as_ref() == key {
89 return Some(v);
90 }
91 }
92 None
93 }
94 _ => None,
95 }
96 }
97
98 pub fn is_string(&self) -> bool {
100 matches!(&self.value, YamlValue::String(_))
101 }
102
103 pub fn is_object(&self) -> bool {
105 matches!(&self.value, YamlValue::Object(_))
106 }
107
108 pub fn is_array(&self) -> bool {
110 matches!(&self.value, YamlValue::Array(_))
111 }
112}
113
114pub(crate) struct Parser<'g> {
115 tokens: Vec<Token<'g>>,
116 current: usize,
117}
118
119impl<'g> Parser<'g> {
120 pub(crate) fn new(source: &'g str) -> Self {
121 let mut lexer = Lexer::new(source);
122 let tokens = lexer.tokenize();
123 Parser { tokens, current: 0 }
124 }
125
126 pub(crate) fn parse(&mut self) -> Result<YamlNode<'g>, String> {
127 self.skip_whitespace_and_newlines();
128 let result = self.parse_value(0)?;
129 Ok(result)
130 }
131
132 fn current_token(&self) -> Option<&Token<'g>> {
133 self.tokens.get(self.current)
134 }
135
136 fn advance(&mut self) -> Option<&Token<'g>> {
137 if self.current < self.tokens.len() {
138 let token = &self.tokens[self.current];
139 self.current += 1;
140 Some(token)
141 } else {
142 None
143 }
144 }
145
146 fn skip_whitespace(&mut self) {
147 while let Some(token) = self.current_token() {
148 if token.kind != TokenKind::Whitespace {
149 break;
150 }
151 self.advance();
152 }
153 }
154
155 fn skip_whitespace_and_newlines(&mut self) {
156 while let Some(token) = self.current_token() {
157 match token.kind {
158 TokenKind::Whitespace
159 | TokenKind::NewLine
160 | TokenKind::Indent
161 | TokenKind::Dedent => {
162 self.advance();
163 }
164 TokenKind::Identifier
165 | TokenKind::Colon
166 | TokenKind::String
167 | TokenKind::Hyphen
168 | TokenKind::Comment
169 | TokenKind::Pipe
170 | TokenKind::GreaterThan => break,
171 }
172 }
173 }
174
175 fn collect_comment(&mut self) -> Option<Cow<'g, str>> {
176 self.skip_whitespace();
177 if let Some(token) = self.current_token()
178 && token.kind == TokenKind::Comment
179 {
180 let comment = token.text.trim_start_matches('#').trim();
181 self.advance();
182 return Some(Cow::Borrowed(comment));
183 }
184 None
185 }
186
187 fn parse_value(&mut self, min_indent: usize) -> Result<YamlNode<'g>, String> {
188 self.skip_whitespace();
189
190 let mut leading_comment: Option<Cow<'g, str>> = None;
192 if let Some(token) = self.current_token()
193 && token.kind == TokenKind::Comment
194 {
195 leading_comment = Some(Cow::Borrowed(token.text.trim_start_matches('#').trim()));
196 self.advance();
197 self.skip_whitespace_and_newlines();
198 }
199
200 let token = self
201 .current_token()
202 .ok_or_else(|| "Unexpected end of input".to_string())?;
203
204 let node = match token.kind {
205 TokenKind::Hyphen => {
206 let value = self.parse_array(min_indent)?;
207 YamlNode::new(value)
208 }
209 TokenKind::Identifier => {
210 let text = token.text;
211 self.advance();
212
213 self.skip_whitespace();
214 if let Some(next) = self.current_token()
215 && next.kind == TokenKind::Colon
216 {
217 self.current -= 1; return self.parse_object(min_indent);
219 }
220
221 YamlNode::new(YamlValue::String(Cow::Borrowed(text)))
223 }
224 TokenKind::String => {
225 let text = token.text;
226 let content = if text.starts_with('"') || text.starts_with('\'') {
227 &text[1..text.len() - 1]
228 } else {
229 text
230 };
231 self.advance();
232 YamlNode::new(YamlValue::String(Cow::Borrowed(content)))
233 }
234 TokenKind::Whitespace
235 | TokenKind::NewLine
236 | TokenKind::Colon
237 | TokenKind::Comment
238 | TokenKind::Indent
239 | TokenKind::Dedent
240 | TokenKind::Pipe
241 | TokenKind::GreaterThan => {
242 return Err(format!("Unexpected token: {:?}", token.kind));
243 }
244 };
245
246 let inline_comment = self.collect_comment();
247
248 Ok(YamlNode::with_comments(
249 node.value,
250 leading_comment,
251 inline_comment,
252 ))
253 }
254
255 fn parse_inline_value(&mut self) -> Result<YamlNode<'g>, String> {
256 let start_token = self
258 .current_token()
259 .ok_or_else(|| "Expected value".to_string())?;
260
261 match start_token.kind {
263 TokenKind::String => {
264 let text = start_token.text;
265 let content = if text.starts_with('"') || text.starts_with('\'') {
266 &text[1..text.len() - 1]
267 } else {
268 text
269 };
270 self.advance();
271 let inline_comment = self.collect_comment();
272 return Ok(YamlNode::with_comments(
273 YamlValue::String(Cow::Borrowed(content)),
274 None,
275 inline_comment,
276 ));
277 }
278 TokenKind::Identifier
279 | TokenKind::Colon
280 | TokenKind::Whitespace
281 | TokenKind::NewLine
282 | TokenKind::Hyphen
283 | TokenKind::Comment
284 | TokenKind::Indent
285 | TokenKind::Dedent
286 | TokenKind::Pipe
287 | TokenKind::GreaterThan => {}
288 }
289
290 let mut value_parts = Vec::with_capacity(4); let mut single_token_text: Option<&'g str> = None;
293
294 while let Some(token) = self.current_token() {
295 match token.kind {
296 TokenKind::NewLine | TokenKind::Comment => break,
297 TokenKind::Whitespace => {
298 value_parts.push(" ");
299 self.advance();
300 }
301 TokenKind::Identifier
302 | TokenKind::Colon
303 | TokenKind::String
304 | TokenKind::Hyphen
305 | TokenKind::Indent
306 | TokenKind::Dedent
307 | TokenKind::Pipe
308 | TokenKind::GreaterThan => {
309 if value_parts.is_empty() && single_token_text.is_none() {
310 single_token_text = Some(token.text);
311 }
312 value_parts.push(token.text);
313 self.advance();
314 }
315 }
316 }
317
318 while value_parts.last() == Some(&" ") {
320 value_parts.pop();
321 }
322
323 let value = if let Some(text) = single_token_text.filter(|_| value_parts.len() == 1) {
325 YamlValue::String(Cow::Borrowed(text))
326 } else {
327 let value_str = value_parts.join("");
329 YamlValue::String(Cow::Owned(value_str))
330 };
331
332 let inline_comment = self.collect_comment();
333
334 Ok(YamlNode::with_comments(value, None, inline_comment))
335 }
336
337 fn parse_array(&mut self, min_indent: usize) -> Result<YamlValue<'g>, String> {
338 let mut items = Vec::new();
339
340 while let Some(token) = self.current_token() {
341 if token.kind == TokenKind::Hyphen {
342 self.advance(); self.skip_whitespace();
344
345 let item = self.parse_value(min_indent)?;
346 items.push(item);
347
348 self.skip_whitespace();
349 if let Some(token) = self.current_token() {
350 if token.kind == TokenKind::NewLine {
351 self.advance();
352 self.skip_whitespace_and_newlines();
353 } else if token.kind != TokenKind::Hyphen {
354 break;
355 }
356 }
357 } else {
358 break;
359 }
360 }
361
362 Ok(YamlValue::Array(items))
363 }
364
365 fn parse_multiline_string(
366 &mut self,
367 base_indent: usize,
368 is_literal: bool,
369 ) -> Result<YamlNode<'g>, String> {
370 self.skip_whitespace();
372
373 let mut chomp_mode = ChompMode::Clip; if let Some(token) = self.current_token() {
376 match token.text {
377 "-" => {
378 chomp_mode = ChompMode::Strip;
379 self.advance();
380 }
381 "+" => {
382 chomp_mode = ChompMode::Keep;
383 self.advance();
384 }
385 _ => {}
386 }
387 }
388
389 while let Some(token) = self.current_token() {
391 if token.kind == TokenKind::NewLine {
392 self.advance();
393 break;
394 }
395 self.advance();
397 }
398
399 let mut lines = Vec::new();
400 let mut content_indent = None;
401
402 while let Some(token) = self.current_token() {
404 if token.kind == TokenKind::Dedent {
406 let mut peek_index = self.current + 1;
408 while peek_index < self.tokens.len() {
409 let peek_token = &self.tokens[peek_index];
410 if peek_token.kind != TokenKind::Whitespace
411 && peek_token.kind != TokenKind::Indent
412 && peek_token.kind != TokenKind::Dedent
413 {
414 if peek_token.column <= base_indent {
415 break;
416 }
417 break;
418 }
419 peek_index += 1;
420 }
421 if peek_index < self.tokens.len() && self.tokens[peek_index].column <= base_indent {
422 break;
423 }
424 }
425
426 if token.kind == TokenKind::Whitespace || token.kind == TokenKind::Indent {
428 self.advance();
429 continue;
430 }
431
432 if token.kind == TokenKind::NewLine {
434 lines.push("");
435 self.advance();
436 continue;
437 }
438
439 if token.column <= base_indent {
441 break;
442 }
443
444 if content_indent.is_none() {
446 content_indent = Some(token.column);
447 }
448
449 let _line_start = self.current;
451 let mut line_text = String::new();
452
453 while let Some(token) = self.current_token() {
454 if token.kind == TokenKind::NewLine {
455 break;
456 }
457
458 line_text.push_str(token.text);
461 self.advance();
462 }
463
464 lines.push(line_text.leak()); if let Some(token) = self.current_token()
467 && token.kind == TokenKind::NewLine
468 {
469 self.advance();
470 }
471 }
472
473 let result = if is_literal {
475 let mut result = lines.join("\n");
477
478 match chomp_mode {
480 ChompMode::Strip => {
481 while result.ends_with('\n') {
483 result.pop();
484 }
485 }
486 ChompMode::Clip => {
487 while result.ends_with("\n\n") {
489 result.pop();
490 }
491 if !result.ends_with('\n') && !result.is_empty() {
492 result.push('\n');
493 }
494 }
495 ChompMode::Keep => {
496 result.push('\n');
498 }
499 }
500
501 result
502 } else {
503 let mut result = String::new();
505 let mut prev_empty = false;
506
507 for (i, line) in lines.iter().enumerate() {
508 if line.is_empty() {
509 if !prev_empty && i > 0 {
510 result.push('\n');
511 }
512 prev_empty = true;
513 } else {
514 if i > 0 && !prev_empty {
515 result.push(' ');
516 }
517 result.push_str(line.trim_start());
518 prev_empty = false;
519 }
520 }
521
522 match chomp_mode {
524 ChompMode::Strip => {
525 while result.ends_with('\n') || result.ends_with(' ') {
526 result.pop();
527 }
528 }
529 ChompMode::Clip => {
530 while result.ends_with('\n') || result.ends_with(' ') {
531 result.pop();
532 }
533 if !result.is_empty() {
535 result.push('\n');
536 }
537 }
538 ChompMode::Keep => {
539 if !result.is_empty() && !result.ends_with('\n') {
541 result.push('\n');
542 }
543 }
544 }
545
546 result
547 };
548
549 Ok(YamlNode::new(YamlValue::String(Cow::Owned(result))))
550 }
551
552 fn parse_object(&mut self, min_indent: usize) -> Result<YamlNode<'g>, String> {
553 let mut map = BTreeMap::new();
554
555 while let Some(token) = self.current_token() {
556 if token.kind != TokenKind::Identifier {
557 break;
558 }
559
560 if min_indent > 0 && token.column <= min_indent {
563 break;
564 }
565
566 let key_column = token.column;
567 let key = Cow::Borrowed(token.text);
568 self.advance();
569
570 self.skip_whitespace();
571
572 let Some(token) = self.current_token() else {
574 return Err("Expected colon after key".to_string());
575 };
576 if token.kind != TokenKind::Colon {
577 return Err(format!("Expected colon after key, got {:?}", token.kind));
578 }
579 self.advance();
580
581 self.skip_whitespace();
582
583 self.skip_whitespace();
585
586 let Some(token) = self.current_token() else {
588 return Err("Expected value after colon".to_string());
589 };
590
591 let value = if token.kind == TokenKind::Pipe || token.kind == TokenKind::GreaterThan {
592 let is_literal = token.kind == TokenKind::Pipe;
594 self.advance(); self.parse_multiline_string(key_column, is_literal)?
596 } else if token.kind == TokenKind::NewLine || token.kind == TokenKind::Indent {
597 self.skip_whitespace_and_newlines();
599 self.parse_value(key_column)?
601 } else {
602 self.parse_inline_value()?
604 };
605
606 map.insert(key, value);
607
608 self.skip_whitespace();
609 if let Some(token) = self.current_token()
610 && token.kind == TokenKind::NewLine
611 {
612 self.advance();
613 self.skip_whitespace_and_newlines();
614 }
615
616 if let Some(token) = self.current_token()
618 && token.kind == TokenKind::Dedent
619 {
620 self.advance();
621 break;
622 }
623 }
624
625 Ok(YamlNode::new(YamlValue::Object(map)))
626 }
627}
628
629#[cfg(test)]
630mod tests {
631 use super::*;
632
633 #[test]
634 fn test_parse_simple_object() {
635 let yaml = "name: John\nage: 30";
636 let mut parser = Parser::new(yaml);
637 let result = parser.parse().unwrap();
638
639 if let YamlValue::Object(map) = &result.value {
640 assert_eq!(map.len(), 2);
641
642 let name_node = map.get(&Cow::Borrowed("name")).unwrap();
643 assert_eq!(name_node.value, YamlValue::String(Cow::Borrowed("John")));
644
645 let age_node = map.get(&Cow::Borrowed("age")).unwrap();
646 assert_eq!(age_node.value, YamlValue::String(Cow::Borrowed("30")));
647 } else {
648 panic!("Expected object");
649 }
650 }
651
652 #[test]
653 fn test_parse_array() {
654 let yaml = "- apple\n- banana\n- cherry";
655 let mut parser = Parser::new(yaml);
656 let result = parser.parse().unwrap();
657
658 if let YamlValue::Array(items) = &result.value {
659 assert_eq!(items.len(), 3);
660 assert_eq!(items[0].value, YamlValue::String(Cow::Borrowed("apple")));
661 assert_eq!(items[1].value, YamlValue::String(Cow::Borrowed("banana")));
662 assert_eq!(items[2].value, YamlValue::String(Cow::Borrowed("cherry")));
663 } else {
664 panic!("Expected array");
665 }
666 }
667
668 #[test]
669 fn test_parse_with_comments() {
670 let yaml = "name: John # inline comment\nage: 30";
671 let mut parser = Parser::new(yaml);
672 let result = parser.parse().unwrap();
673
674 if let YamlValue::Object(map) = &result.value {
675 let name_node = map.get(&Cow::Borrowed("name")).unwrap();
676 assert_eq!(
677 name_node.inline_comment,
678 Some(Cow::Borrowed("inline comment"))
679 );
680 } else {
681 panic!("Expected object");
682 }
683 }
684
685 #[test]
686 fn test_parse_mixed_types() {
687 let yaml = "enabled: true\ncount: 42\nratio: 2.5\nempty: null";
688 let mut parser = Parser::new(yaml);
689 let result = parser.parse().unwrap();
690
691 if let YamlValue::Object(map) = &result.value {
692 assert_eq!(
693 map.get(&Cow::Borrowed("enabled")).unwrap().value,
694 YamlValue::String(Cow::Borrowed("true"))
695 );
696 assert_eq!(
697 map.get(&Cow::Borrowed("count")).unwrap().value,
698 YamlValue::String(Cow::Borrowed("42"))
699 );
700 assert_eq!(
701 map.get(&Cow::Borrowed("ratio")).unwrap().value,
702 YamlValue::String(Cow::Borrowed("2.5"))
703 );
704 assert_eq!(
705 map.get(&Cow::Borrowed("empty")).unwrap().value,
706 YamlValue::String(Cow::Borrowed("null"))
707 );
708 } else {
709 panic!("Expected object");
710 }
711 }
712}