Skip to main content

dmc_parser/
parser.rs

1use crate::ast::*;
2use dmc_diagnostic::Code;
3use dmc_diagnostic::metadata::{Origin, SourceMeta};
4use dmc_lexer::Lexer;
5use dmc_lexer::token::{Token, TokenKind};
6use duck_diagnostic::{Diagnostic, DiagnosticEngine, Span};
7use std::sync::Arc;
8
9/// Token-stream cursor + diagnostic engine. `'tokens` ties borrowed lexemes to
10/// the source; `'eng` ties the engine borrow to the caller.
11pub struct Parser<'eng, 'tokens> {
12  pub tokens: Vec<Token<'tokens>>,
13  pub meta: Arc<SourceMeta>,
14  pub pos: usize,
15  pub diag_engine: &'eng mut DiagnosticEngine<Code>,
16}
17
18impl<'eng, 'tokens> Parser<'eng, 'tokens> {
19  /// Build a parser positioned at the first token.
20  pub fn new(
21    tokens: Vec<Token<'tokens>>,
22    meta: Arc<SourceMeta>,
23    diag_engine: &'eng mut DiagnosticEngine<Code>,
24  ) -> Self {
25    Self { tokens, meta, pos: 0, diag_engine }
26  }
27
28  /// Drive the top-level loop until EOF. Force-advances on no-progress so a
29  /// malformed token cannot wedge the parser.
30  pub fn parse(&mut self) -> Document {
31    let span = self.tokens.first().map(|t| t.span.clone()).unwrap_or_else(default_span);
32    let mut children = Vec::new();
33    while !self.is_eof() {
34      let before = self.pos;
35      if let Some(node) = self.parse_block() {
36        children.push(node);
37      }
38      if self.pos == before {
39        self.advance();
40      }
41    }
42    Document { children, span }
43  }
44
45  /// Forward a fully-built diagnostic to the engine.
46  pub(crate) fn emit_diagnostic(&mut self, diagnostic: Diagnostic<Code>) {
47    self.diag_engine.emit(diagnostic);
48  }
49
50  /// Build a primary-labelled diagnostic at the cursor and emit it.
51  pub(crate) fn diag(&mut self, code: Code, message: impl Into<String>) {
52    let (line, column) = self.tokens.get(self.pos).map(|t| (t.span.line, t.span.column)).unwrap_or((0, 0));
53    let span = Span::from_zero_based(self.meta.path.clone(), line, column, 1);
54    self.emit_diagnostic(duck_diagnostic::diag!(code, span, message.into()));
55  }
56
57  /// Sugar for emitting a warning-severity diagnostic.
58  pub(crate) fn warn(&mut self, code: Code, message: impl Into<String>) {
59    self.diag(code, message);
60  }
61
62  /// Span of the token at the cursor, or a default span at EOF.
63  pub(crate) fn current_span(&self) -> Span {
64    self.tokens.get(self.pos).map(|t| t.span.clone()).unwrap_or_else(default_span)
65  }
66
67  /// Token under the cursor (no consume).
68  pub(crate) fn peek(&'_ self) -> Option<&'_ Token<'_>> {
69    self.tokens.get(self.pos)
70  }
71
72  /// Kind of the token under the cursor (no consume).
73  pub(crate) fn peek_kind(&self) -> Option<&TokenKind> {
74    self.tokens.get(self.pos).map(|t| &t.kind)
75  }
76
77  /// Raw lexeme of the upcoming token with its source-tied `'tokens` lifetime,
78  /// decoupled from the `&self` borrow so callers can hold it across mutations.
79  pub(crate) fn peek_raw(&self) -> Option<&'tokens str> {
80    self.tokens.get(self.pos).map(|t| t.raw)
81  }
82
83  /// Consume one token and return it. No-op at EOF.
84  pub(crate) fn advance(&'_ mut self) -> Option<&'_ Token<'_>> {
85    let t = self.tokens.get(self.pos);
86    if t.is_some() {
87      self.pos += 1;
88    }
89    t
90  }
91
92  /// True at the `Eof` token or past the end of the stream.
93  pub(crate) fn is_eof(&self) -> bool {
94    matches!(self.peek_kind(), Some(TokenKind::Eof) | None)
95  }
96}
97
98/// Lex + parse `source` in one shot, dropping all diagnostics. Convenience for
99/// tests + the `parse` bin; production callers should construct their own
100/// `DiagnosticEngine`.
101pub fn parse(source: &str) -> Document {
102  let meta = Arc::from(SourceMeta { path: Arc::from("<inline>"), origin: Origin::Inline("<inline>") });
103  let mut lex_engine = DiagnosticEngine::new();
104  let mut lexer = Lexer::new(source, meta.clone(), &mut lex_engine);
105  let _ = lexer.scan_tokens();
106  let tokens = std::mem::take(&mut lexer.tokens);
107  drop(lexer);
108
109  let mut parse_engine = DiagnosticEngine::new();
110  let mut p = Parser::new(tokens, meta, &mut parse_engine);
111  p.parse()
112}
113
114/// Lex `s` and run the inline parser on it. Returns the inline `Node`
115/// list (Text, InlineCode, Bold, Italic, Strikethrough, Link, ...).
116/// Used by table cells, which receive raw cell strings rather than
117/// pre-tokenised inline content.
118pub fn parse_inline_str(s: &str) -> Vec<crate::ast::Node> {
119  let meta = Arc::from(SourceMeta { path: Arc::from("<inline>"), origin: Origin::Inline("<inline>") });
120  let mut lex_engine = DiagnosticEngine::new();
121  let mut lexer = Lexer::new(s, meta.clone(), &mut lex_engine);
122  let _ = lexer.scan_tokens();
123  let tokens = std::mem::take(&mut lexer.tokens);
124  drop(lexer);
125  let mut parse_engine = DiagnosticEngine::new();
126  let mut p = Parser::new(tokens, meta, &mut parse_engine);
127  p.collect_inline_until_break()
128}