1use crate::ast::*;
2use crate::refs::{RefMap, parse_link_ref_def};
3use dmc_diagnostic::Code;
4use dmc_diagnostic::metadata::{Origin, SourceMeta};
5use dmc_lexer::Lexer;
6use dmc_lexer::token::{Token, TokenKind};
7use duck_diagnostic::{Diagnostic, DiagnosticEngine, Span};
8use std::sync::Arc;
9
10#[derive(Debug, Clone, Copy, Default)]
13pub struct ParseOptions {
14 pub cm_strict_html_blocks: bool,
17 pub gfm_autolinks: bool,
21 pub legacy_gfm_emphasis: bool,
25}
26
27pub struct Parser<'eng, 'tokens> {
30 pub tokens: Vec<Token<'tokens>>,
31 pub meta: Arc<SourceMeta>,
32 pub pos: usize,
33 pub refs: RefMap,
34 pub diag_engine: &'eng mut DiagnosticEngine<Code>,
35 pub options: ParseOptions,
36 pub source: Option<&'tokens str>,
39 pub link_label_depth: u16,
44 pub jsx_open_stack: Vec<String>,
50}
51
52pub(crate) const MAX_LINK_LABEL_DEPTH: u16 = 12;
56
57impl<'eng, 'tokens> Parser<'eng, 'tokens> {
58 pub fn new(
59 tokens: Vec<Token<'tokens>>,
60 meta: Arc<SourceMeta>,
61 diag_engine: &'eng mut DiagnosticEngine<Code>,
62 ) -> Self {
63 Self {
64 tokens,
65 meta,
66 pos: 0,
67 refs: RefMap::new(),
68 diag_engine,
69 options: ParseOptions::default(),
70 source: None,
71 link_label_depth: 0,
72 jsx_open_stack: Vec::new(),
73 }
74 }
75
76 pub fn new_with_options(
77 tokens: Vec<Token<'tokens>>,
78 meta: Arc<SourceMeta>,
79 diag_engine: &'eng mut DiagnosticEngine<Code>,
80 options: ParseOptions,
81 ) -> Self {
82 Self {
83 tokens,
84 meta,
85 pos: 0,
86 refs: RefMap::new(),
87 diag_engine,
88 options,
89 source: None,
90 link_label_depth: 0,
91 jsx_open_stack: Vec::new(),
92 }
93 }
94
95 pub fn with_source(mut self, source: &'tokens str) -> Self {
98 self.source = Some(source);
99 self
100 }
101
102 pub fn parse(&mut self) -> Document {
105 self.collect_refs();
106 let span = self.tokens.first().map(|t| t.span.clone()).unwrap_or_else(default_span);
107 let mut children = Vec::new();
108 while !self.is_eof() {
109 let before = self.pos;
110 if let Some(node) = self.parse_block() {
111 children.push(node);
112 }
113 if self.pos == before {
114 self.advance();
115 }
116 }
117 Document { children, span }
118 }
119
120 fn collect_refs(&mut self) {
124 let mut in_paragraph = false;
125 let mut on_heading_line = false;
126 for tok in &self.tokens {
127 match &tok.kind {
128 TokenKind::LinkRefDef => {
129 if !in_paragraph && let Some((label, url, title)) = parse_link_ref_def(tok.raw) {
130 let url = crate::inline::decode_entities_in(&unescape_link_part(&url));
131 let title = title.map(|t| crate::inline::decode_entities_in(&unescape_link_part(&t)));
132 self.refs.insert(&label, url, title);
133 }
134 },
135 TokenKind::BlankLine
136 | TokenKind::CodeFenceOpen(_, _)
137 | TokenKind::CodeFenceClose(_, _)
138 | TokenKind::ThematicBreak
139 | TokenKind::FrontmatterEnd(_) => {
140 in_paragraph = false;
141 on_heading_line = false;
142 },
143 TokenKind::Heading(_) => {
144 in_paragraph = false;
145 on_heading_line = true;
146 },
147 TokenKind::BlockQuoteMarker => {
148 in_paragraph = false;
149 on_heading_line = false;
150 },
151 TokenKind::SoftBreak | TokenKind::HardBreak => {
152 if on_heading_line {
153 in_paragraph = false;
154 }
155 on_heading_line = false;
156 },
157 TokenKind::Whitespace(_) | TokenKind::Eof => {},
158 _ => {
159 if !on_heading_line {
160 in_paragraph = true;
161 }
162 },
163 }
164 }
165 }
166
167 pub(crate) fn emit_diagnostic(&mut self, diagnostic: Diagnostic<Code>) {
168 self.diag_engine.emit(diagnostic);
169 }
170
171 pub(crate) fn diag(&mut self, code: Code, message: impl Into<String>) {
172 let (line, column) = self.tokens.get(self.pos).map(|t| (t.span.line, t.span.column)).unwrap_or((0, 0));
173 let span = Span::from_zero_based(self.meta.path.clone(), line, column, 1);
174 self.emit_diagnostic(duck_diagnostic::diag!(code, span, message.into()));
175 }
176
177 pub(crate) fn warn(&mut self, code: Code, message: impl Into<String>) {
178 self.diag(code, message);
179 }
180
181 pub(crate) fn span_at(&self, pos: usize) -> Span {
182 self.tokens.get(pos).map(|t| t.span.clone()).unwrap_or_else(default_span)
183 }
184
185 pub(crate) fn raw_source_for_token_range(&self, start: usize, end: usize) -> String {
191 if start >= end {
192 return String::new();
193 }
194 let Some(start_tok) = self.tokens.get(start) else {
195 return String::new();
196 };
197 let Some(end_tok) = self.tokens.get(end - 1) else {
198 return String::new();
199 };
200
201 if let Some(source) = self.source {
202 let base = source.as_ptr() as usize;
203 let src_lo = base;
204 let src_hi = base + source.len();
205 let lo = start_tok.raw.as_ptr() as usize;
206 let hi = end_tok.raw.as_ptr() as usize + end_tok.raw.len();
207 debug_assert!(lo <= hi, "token slice start pointer exceeded end pointer");
208 debug_assert!(lo >= src_lo, "token slice start pointer fell before the source buffer");
209 debug_assert!(hi <= src_hi, "token slice end pointer exceeded the source buffer");
210 if lo < src_lo || hi > src_hi || lo > hi {
211 return String::new();
212 }
213 let off_lo = lo - base;
214 let off_hi = hi - base;
215 return source.get(off_lo..off_hi).map(|s| s.to_string()).unwrap_or_default();
216 }
217
218 let mut out = String::new();
219 for tok in &self.tokens[start..end] {
220 out.push_str(tok.raw);
221 }
222 out
223 }
224
225 pub(crate) fn current_span(&self) -> Span {
226 self.tokens.get(self.pos).map(|t| t.span.clone()).unwrap_or_else(default_span)
227 }
228
229 pub(crate) fn peek(&'_ self) -> Option<&'_ Token<'_>> {
230 self.tokens.get(self.pos)
231 }
232
233 pub(crate) fn peek_kind(&self) -> Option<&TokenKind> {
234 self.tokens.get(self.pos).map(|t| &t.kind)
235 }
236
237 pub(crate) fn peek_raw(&self) -> Option<&'tokens str> {
240 self.tokens.get(self.pos).map(|t| t.raw)
241 }
242
243 pub(crate) fn advance(&'_ mut self) -> Option<&'_ Token<'_>> {
244 let t = self.tokens.get(self.pos);
245 if t.is_some() {
246 self.pos += 1;
247 }
248 t
249 }
250
251 pub(crate) fn is_eof(&self) -> bool {
252 matches!(self.peek_kind(), Some(TokenKind::Eof) | None)
253 }
254}
255
256fn unescape_link_part(s: &str) -> String {
259 if !s.contains('\\') {
260 return s.to_string();
261 }
262 let mut out = String::with_capacity(s.len());
263 let bytes = s.as_bytes();
264 let mut i = 0;
265 while i < bytes.len() {
266 if bytes[i] == b'\\' && i + 1 < bytes.len() {
267 let nx = bytes[i + 1];
268 if matches!(
269 nx,
270 b'!'
271 | b'"'
272 | b'#'
273 | b'$'
274 | b'%'
275 | b'&'
276 | b'\''
277 | b'('
278 | b')'
279 | b'*'
280 | b'+'
281 | b','
282 | b'-'
283 | b'.'
284 | b'/'
285 | b':'
286 | b';'
287 | b'<'
288 | b'='
289 | b'>'
290 | b'?'
291 | b'@'
292 | b'['
293 | b'\\'
294 | b']'
295 | b'^'
296 | b'_'
297 | b'`'
298 | b'{'
299 | b'|'
300 | b'}'
301 | b'~'
302 ) {
303 out.push(nx as char);
304 i += 2;
305 continue;
306 }
307 }
308 out.push(bytes[i] as char);
309 i += 1;
310 }
311 out
312}
313
314pub fn parse(source: &str) -> Document {
317 parse_with(source, ParseOptions::default())
318}
319
320pub fn parse_with(source: &str, options: ParseOptions) -> Document {
322 let meta = Arc::from(SourceMeta { path: Arc::from("<inline>"), origin: Origin::Inline("<inline>") });
323 let mut lex_engine = DiagnosticEngine::new();
324 let mut lexer = Lexer::new(source, meta.clone(), &mut lex_engine);
325 let _ = lexer.scan_tokens();
326 let tokens = std::mem::take(&mut lexer.tokens);
327 drop(lexer);
328
329 let mut parse_engine = DiagnosticEngine::new();
330 let mut p = Parser::new_with_options(tokens, meta, &mut parse_engine, options).with_source(source);
331 p.parse()
332}
333
334pub fn parse_inline_str(s: &str) -> Vec<crate::ast::Node> {
337 let meta = Arc::from(SourceMeta { path: Arc::from("<inline>"), origin: Origin::Inline("<inline>") });
338 let mut lex_engine = DiagnosticEngine::new();
339 let mut lexer = Lexer::new(s, meta.clone(), &mut lex_engine);
340 let _ = lexer.scan_tokens();
341 let tokens = std::mem::take(&mut lexer.tokens);
342 drop(lexer);
343 let mut parse_engine = DiagnosticEngine::new();
344 let mut p = Parser::new(tokens, meta, &mut parse_engine).with_source(s);
345 p.collect_inline_until_break()
346}