1use rowan::GreenNode;
7use styx_parse::{Lexer, Token, TokenKind};
8
9use crate::syntax_kind::{SyntaxKind, SyntaxNode};
10
11#[derive(Debug, Clone)]
13pub struct Parse {
14 green: GreenNode,
15 errors: Vec<ParseError>,
16}
17
18impl Parse {
19 pub fn syntax(&self) -> SyntaxNode {
21 SyntaxNode::new_root(self.green.clone())
22 }
23
24 pub fn errors(&self) -> &[ParseError] {
26 &self.errors
27 }
28
29 pub fn is_ok(&self) -> bool {
31 self.errors.is_empty()
32 }
33
34 pub fn ok(self) -> Result<SyntaxNode, Vec<ParseError>> {
36 if self.errors.is_empty() {
37 Ok(self.syntax())
38 } else {
39 Err(self.errors)
40 }
41 }
42
43 pub fn green(&self) -> &GreenNode {
45 &self.green
46 }
47}
48
49#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct ParseError {
52 pub offset: u32,
54 pub message: String,
56}
57
58impl ParseError {
59 fn new(offset: u32, message: impl Into<String>) -> Self {
60 Self {
61 offset,
62 message: message.into(),
63 }
64 }
65}
66
67pub fn parse(source: &str) -> Parse {
69 let parser = CstParser::new(source);
70 parser.parse()
71}
72
73struct CstParser<'src> {
75 #[allow(dead_code)]
76 source: &'src str,
77 lexer: std::iter::Peekable<TokenIter<'src>>,
78 builder: rowan::GreenNodeBuilder<'static>,
79 errors: Vec<ParseError>,
80}
81
82struct TokenIter<'src> {
84 lexer: Lexer<'src>,
85 done: bool,
86}
87
88impl<'src> Iterator for TokenIter<'src> {
89 type Item = Token<'src>;
90
91 fn next(&mut self) -> Option<Self::Item> {
92 if self.done {
93 return None;
94 }
95 let token = self.lexer.next_token();
96 if token.kind == TokenKind::Eof {
97 self.done = true;
98 }
99 Some(token)
100 }
101}
102
103impl<'src> CstParser<'src> {
104 fn new(source: &'src str) -> Self {
105 let lexer = Lexer::new(source);
106 Self {
107 source,
108 lexer: TokenIter { lexer, done: false }.peekable(),
109 builder: rowan::GreenNodeBuilder::new(),
110 errors: Vec::new(),
111 }
112 }
113
114 fn parse(mut self) -> Parse {
115 self.builder.start_node(SyntaxKind::DOCUMENT.into());
116 self.parse_entries(None);
117 self.builder.finish_node();
118
119 Parse {
120 green: self.builder.finish(),
121 errors: self.errors,
122 }
123 }
124
125 fn peek(&mut self) -> TokenKind {
127 self.lexer.peek().map(|t| t.kind).unwrap_or(TokenKind::Eof)
128 }
129
130 fn peek_token(&mut self) -> Option<&Token<'src>> {
132 self.lexer.peek()
133 }
134
135 fn current_pos(&mut self) -> u32 {
137 self.lexer.peek().map(|t| t.span.start).unwrap_or(0)
138 }
139
140 fn bump(&mut self) {
142 if let Some(token) = self.lexer.next() {
143 self.builder
144 .token(SyntaxKind::from(token.kind).into(), token.text);
145 }
146 }
147
148 fn skip_trivia(&mut self) {
150 while matches!(
151 self.peek(),
152 TokenKind::Whitespace | TokenKind::Newline | TokenKind::LineComment
153 ) {
154 self.bump();
155 }
156 }
157
158 fn skip_whitespace(&mut self) {
160 while self.peek() == TokenKind::Whitespace {
161 self.bump();
162 }
163 }
164
165 fn at_eof(&mut self) -> bool {
167 self.peek() == TokenKind::Eof
168 }
169
170 fn at_entry_end(&mut self, closing: Option<TokenKind>) -> bool {
172 let kind = self.peek();
173 kind == TokenKind::Eof
174 || kind == TokenKind::Newline
175 || kind == TokenKind::LineComment
176 || kind == TokenKind::Comma
177 || closing.is_some_and(|c| kind == c)
178 }
179
180 fn at_attribute(&mut self) -> bool {
182 if self.peek() != TokenKind::BareScalar {
183 return false;
184 }
185 let token = match self.peek_token() {
187 Some(t) => t,
188 None => return false,
189 };
190 let after_scalar = token.span.end as usize;
191
192 let rest = &self.source[after_scalar..];
194 for ch in rest.chars() {
195 match ch {
196 ' ' | '\t' => continue,
197 '>' => return true,
198 _ => return false,
199 }
200 }
201 false
202 }
203
204 fn parse_entries(&mut self, closing: Option<TokenKind>) {
206 loop {
207 self.skip_trivia();
208
209 while self.peek() == TokenKind::DocComment {
211 self.bump();
212 while matches!(self.peek(), TokenKind::Whitespace | TokenKind::Newline) {
214 self.bump();
215 }
216 }
217
218 if self.at_eof() {
220 break;
221 }
222 if closing.is_some_and(|close| self.peek() == close) {
223 break;
224 }
225
226 self.parse_entry(closing);
228
229 self.skip_whitespace();
231 if matches!(self.peek(), TokenKind::Comma | TokenKind::Newline) {
232 self.bump();
233 }
234 }
235 }
236
237 fn parse_entry(&mut self, closing: Option<TokenKind>) {
244 self.builder.start_node(SyntaxKind::ENTRY.into());
245
246 if self.at_attribute() {
248 self.parse_attributes(closing);
249 } else {
250 if !self.at_entry_end(closing) {
252 self.builder.start_node(SyntaxKind::KEY.into());
253 self.parse_atom();
254 self.builder.finish_node();
255 }
256
257 self.skip_whitespace();
259
260 let mut value_count = 0;
262 while !self.at_entry_end(closing) {
263 if self.at_attribute() {
265 self.builder.start_node(SyntaxKind::VALUE.into());
266 self.parse_attributes(closing);
267 self.builder.finish_node();
268 value_count += 1;
269 } else {
270 if closing == Some(TokenKind::RBrace)
273 && value_count > 0
274 && self.peek() == TokenKind::BareScalar
275 {
276 let pos = self.current_pos();
277 self.errors.push(ParseError::new(
278 pos,
279 "unexpected atom after value (missing comma between entries?)"
280 .to_string(),
281 ));
282 }
283 self.builder.start_node(SyntaxKind::VALUE.into());
284 self.parse_atom();
285 self.builder.finish_node();
286 value_count += 1;
287 }
288 self.skip_whitespace();
289 }
290 }
291
292 self.builder.finish_node();
293 }
294
295 fn parse_attributes(&mut self, closing: Option<TokenKind>) {
297 self.builder.start_node(SyntaxKind::ATTRIBUTES.into());
298
299 while self.at_attribute() {
300 self.parse_attribute();
301 self.skip_whitespace();
302
303 if self.at_entry_end(closing) {
305 break;
306 }
307 }
308
309 self.builder.finish_node();
310 }
311
312 fn parse_attribute(&mut self) {
316 self.builder.start_node(SyntaxKind::ATTRIBUTE.into());
317
318 self.bump();
320
321 if self.peek() == TokenKind::Gt {
323 self.bump();
324 } else {
325 let pos = self.current_pos();
326 self.errors.push(ParseError::new(
327 pos,
328 "expected `>` immediately after attribute key".to_string(),
329 ));
330 }
331
332 if self.peek() == TokenKind::Whitespace || self.peek() == TokenKind::Newline {
334 let pos = self.current_pos();
335 self.errors.push(ParseError::new(
336 pos,
337 "no whitespace allowed after `>` in attribute".to_string(),
338 ));
339 self.skip_whitespace();
341 }
342
343 self.parse_atom();
344
345 self.builder.finish_node();
346 }
347
348 fn parse_atom(&mut self) {
350 let kind = self.peek();
351 match kind {
352 TokenKind::LBrace => self.parse_object(),
353 TokenKind::LParen => self.parse_sequence(),
354 TokenKind::At => self.parse_tag_or_unit(),
355 TokenKind::BareScalar | TokenKind::QuotedScalar | TokenKind::RawScalar => {
356 self.builder.start_node(SyntaxKind::SCALAR.into());
357 self.bump();
358 self.builder.finish_node();
359 }
360 TokenKind::HeredocStart => self.parse_heredoc(),
361 _ => {
362 let pos = self.current_pos();
364 self.errors.push(ParseError::new(
365 pos,
366 format!("unexpected token: {:?}", kind),
367 ));
368 self.bump();
370 }
371 }
372 }
373
374 fn parse_object(&mut self) {
376 self.builder.start_node(SyntaxKind::OBJECT.into());
377
378 self.bump();
380
381 self.parse_entries(Some(TokenKind::RBrace));
383
384 self.skip_trivia();
386 if self.peek() == TokenKind::RBrace {
387 self.bump();
388 } else {
389 let pos = self.current_pos();
390 self.errors
391 .push(ParseError::new(pos, "unclosed object, expected `}`"));
392 }
393
394 self.builder.finish_node();
395 }
396
397 fn parse_sequence(&mut self) {
399 self.builder.start_node(SyntaxKind::SEQUENCE.into());
400
401 self.bump();
403
404 loop {
406 self.skip_trivia();
407
408 if self.at_eof() {
409 let pos = self.current_pos();
410 self.errors
411 .push(ParseError::new(pos, "unclosed sequence, expected `)`"));
412 break;
413 }
414 if self.peek() == TokenKind::RParen {
415 break;
416 }
417
418 self.builder.start_node(SyntaxKind::ENTRY.into());
420 self.builder.start_node(SyntaxKind::KEY.into());
421 self.parse_atom();
422 self.builder.finish_node();
423 self.builder.finish_node();
424
425 self.skip_whitespace();
427 }
428
429 if self.peek() == TokenKind::RParen {
431 self.bump();
432 }
433
434 self.builder.finish_node();
435 }
436
437 fn parse_tag_or_unit(&mut self) {
439 let at_token = self.lexer.next();
441 let at_end = at_token.as_ref().map(|t| t.span.end).unwrap_or(0);
442
443 let (is_unit, next_start) = match self.lexer.peek() {
445 None => (true, 0),
446 Some(t) => {
447 let is_unit = t.kind != TokenKind::BareScalar || t.span.start != at_end;
450 (is_unit, t.span.start)
451 }
452 };
453 let _ = next_start; if is_unit {
456 self.builder.start_node(SyntaxKind::UNIT.into());
458 if let Some(token) = at_token {
459 self.builder.token(SyntaxKind::AT.into(), token.text);
460 }
461 self.builder.finish_node();
462 } else {
463 self.builder.start_node(SyntaxKind::TAG.into());
465
466 if let Some(token) = at_token {
468 self.builder.token(SyntaxKind::AT.into(), token.text);
469 }
470
471 self.builder.start_node(SyntaxKind::TAG_NAME.into());
473 let name_end = self.lexer.peek().map(|t| t.span.end).unwrap_or(0);
474 self.bump(); self.builder.finish_node();
476
477 let has_immediate_payload = self.lexer.peek().is_some_and(|t| {
481 t.span.start == name_end
482 && matches!(
483 t.kind,
484 TokenKind::LBrace
485 | TokenKind::LParen
486 | TokenKind::QuotedScalar
487 | TokenKind::RawScalar
488 | TokenKind::HeredocStart
489 | TokenKind::At
490 )
491 });
492
493 if has_immediate_payload {
494 self.builder.start_node(SyntaxKind::TAG_PAYLOAD.into());
495 self.parse_atom();
496 self.builder.finish_node();
497 }
498
499 self.builder.finish_node();
500 }
501 }
502
503 fn parse_heredoc(&mut self) {
505 self.builder.start_node(SyntaxKind::HEREDOC.into());
506
507 self.bump();
509
510 if self.peek() == TokenKind::HeredocContent {
512 self.bump();
513 }
514
515 if self.peek() == TokenKind::HeredocEnd {
517 self.bump();
518 } else {
519 let pos = self.current_pos();
520 self.errors
521 .push(ParseError::new(pos, "unterminated heredoc"));
522 }
523
524 self.builder.finish_node();
525 }
526}
527
528#[cfg(test)]
529mod tests {
530 use super::*;
531
532 fn parse_ok(source: &str) -> SyntaxNode {
533 let parse = parse(source);
534 assert!(parse.is_ok(), "parse errors: {:?}", parse.errors());
535 parse.syntax()
536 }
537
538 #[allow(dead_code)]
539 fn debug_tree(node: &SyntaxNode) -> String {
540 format!("{:#?}", node)
541 }
542
543 #[test]
544 fn test_empty_document() {
545 let node = parse_ok("");
546 assert_eq!(node.kind(), SyntaxKind::DOCUMENT);
547 }
548
549 #[test]
550 fn test_simple_entry() {
551 let node = parse_ok("host localhost");
552 assert_eq!(node.kind(), SyntaxKind::DOCUMENT);
553
554 let entry = node.children().next().unwrap();
556 assert_eq!(entry.kind(), SyntaxKind::ENTRY);
557 }
558
559 #[test]
560 fn test_object() {
561 let node = parse_ok("{ host localhost }");
562 let entry = node.children().next().unwrap();
563 assert_eq!(entry.kind(), SyntaxKind::ENTRY);
564
565 let key = entry.children().next().unwrap();
567 assert_eq!(key.kind(), SyntaxKind::KEY);
568
569 let obj = key.children().next().unwrap();
570 assert_eq!(obj.kind(), SyntaxKind::OBJECT);
571 }
572
573 #[test]
574 fn test_sequence() {
575 let node = parse_ok("items (a b c)");
576 let entry = node.children().next().unwrap();
577 let value = entry.children().nth(1).unwrap();
578 assert_eq!(value.kind(), SyntaxKind::VALUE);
579
580 let seq = value.children().next().unwrap();
581 assert_eq!(seq.kind(), SyntaxKind::SEQUENCE);
582 }
583
584 #[test]
585 fn test_roundtrip() {
586 let sources = [
587 "host localhost",
588 "{ a b, c d }",
589 "items (1 2 3)",
590 "name \"hello world\"",
591 "@unit",
592 "@tag payload",
593 "// comment\nkey value",
594 ];
595
596 for source in sources {
597 let parse = parse(source);
598 let reconstructed = parse.syntax().to_string();
599 assert_eq!(source, reconstructed, "roundtrip failed for: {}", source);
600 }
601 }
602
603 #[test]
604 fn test_preserves_whitespace() {
605 let source = " host localhost ";
606 let parse = parse(source);
607 assert_eq!(source, parse.syntax().to_string());
608 }
609
610 #[test]
611 fn test_preserves_comments() {
612 let source = "// header comment\nhost localhost // trailing";
613 let parse = parse(source);
614 assert_eq!(source, parse.syntax().to_string());
615 }
616
617 #[test]
618 fn test_unit() {
619 let node = parse_ok("empty @");
620 let entry = node.children().next().unwrap();
621 let value = entry.children().nth(1).unwrap();
622 let unit = value.children().next().unwrap();
623 assert_eq!(unit.kind(), SyntaxKind::UNIT);
624 }
625
626 #[test]
627 fn test_tag_with_payload() {
628 let node = parse_ok("@Some value");
629 let entry = node.children().next().unwrap();
630 let key = entry.children().next().unwrap();
631 let tag = key.children().next().unwrap();
632 assert_eq!(tag.kind(), SyntaxKind::TAG);
633 }
634
635 #[test]
636 fn test_heredoc() {
637 let source = "content <<EOF\nhello\nworld\nEOF";
638 let parse = parse(source);
639 assert!(parse.is_ok(), "errors: {:?}", parse.errors());
640 assert_eq!(source, parse.syntax().to_string());
641 }
642
643 #[test]
644 fn test_attributes() {
645 let source = "id>main class>\"container\"";
646 let parse = parse(source);
647 assert!(parse.is_ok(), "errors: {:?}", parse.errors());
648 assert_eq!(source, parse.syntax().to_string());
649
650 let entry = parse.syntax().children().next().unwrap();
651 let attrs = entry.children().next().unwrap();
652 assert_eq!(attrs.kind(), SyntaxKind::ATTRIBUTES);
653 }
654
655 #[test]
656 fn test_multiple_values() {
657 let source = "key value1 value2 value3";
658 let parse = parse(source);
659 assert!(parse.is_ok(), "errors: {:?}", parse.errors());
660
661 let entry = parse.syntax().children().next().unwrap();
662 let children: Vec<_> = entry.children().collect();
664 assert_eq!(children.len(), 4);
665 assert_eq!(children[0].kind(), SyntaxKind::KEY);
666 assert_eq!(children[1].kind(), SyntaxKind::VALUE);
667 assert_eq!(children[2].kind(), SyntaxKind::VALUE);
668 assert_eq!(children[3].kind(), SyntaxKind::VALUE);
669 }
670
671 #[test]
672 fn test_showcase_file() {
673 let source = include_str!("../../../examples/showcase.styx");
674 let parse = parse(source);
675
676 assert!(parse.is_ok(), "parse errors: {:?}", parse.errors());
678
679 assert_eq!(source, parse.syntax().to_string(), "roundtrip failed");
681 }
682}