1use crate::ast::*;
2use crate::refs::{RefMap, parse_link_ref_def};
3use dmc_diagnostic::Code;
4use dmc_diagnostic::metadata::{Origin, SourceMeta};
5use dmc_lexer::Lexer;
6use dmc_lexer::token::{Token, TokenKind};
7use duck_diagnostic::{Diagnostic, DiagnosticEngine, Span};
8use std::sync::Arc;
9
10#[derive(Debug, Clone, Copy, Default)]
15pub struct ParseOptions {
16 pub cm_strict_html_blocks: bool,
20 pub gfm_autolinks: bool,
24 pub legacy_gfm_emphasis: bool,
28}
29
30pub struct Parser<'eng, 'tokens> {
33 pub tokens: Vec<Token<'tokens>>,
34 pub meta: Arc<SourceMeta>,
35 pub pos: usize,
36 pub refs: RefMap,
37 pub diag_engine: &'eng mut DiagnosticEngine<Code>,
38 pub options: ParseOptions,
39 pub source: Option<&'tokens str>,
44 pub link_label_depth: u16,
52 pub jsx_open_stack: Vec<String>,
61}
62
63pub(crate) const MAX_LINK_LABEL_DEPTH: u16 = 12;
70
71impl<'eng, 'tokens> Parser<'eng, 'tokens> {
72 pub fn new(
74 tokens: Vec<Token<'tokens>>,
75 meta: Arc<SourceMeta>,
76 diag_engine: &'eng mut DiagnosticEngine<Code>,
77 ) -> Self {
78 Self {
79 tokens,
80 meta,
81 pos: 0,
82 refs: RefMap::new(),
83 diag_engine,
84 options: ParseOptions::default(),
85 source: None,
86 link_label_depth: 0,
87 jsx_open_stack: Vec::new(),
88 }
89 }
90
91 pub fn new_with_options(
93 tokens: Vec<Token<'tokens>>,
94 meta: Arc<SourceMeta>,
95 diag_engine: &'eng mut DiagnosticEngine<Code>,
96 options: ParseOptions,
97 ) -> Self {
98 Self {
99 tokens,
100 meta,
101 pos: 0,
102 refs: RefMap::new(),
103 diag_engine,
104 options,
105 source: None,
106 link_label_depth: 0,
107 jsx_open_stack: Vec::new(),
108 }
109 }
110
111 pub fn with_source(mut self, source: &'tokens str) -> Self {
115 self.source = Some(source);
116 self
117 }
118
119 pub fn parse(&mut self) -> Document {
122 self.collect_refs();
123 let span = self.tokens.first().map(|t| t.span.clone()).unwrap_or_else(default_span);
124 let mut children = Vec::new();
125 while !self.is_eof() {
126 let before = self.pos;
127 if let Some(node) = self.parse_block() {
128 children.push(node);
129 }
130 if self.pos == before {
131 self.advance();
132 }
133 }
134 Document { children, span }
135 }
136
137 fn collect_refs(&mut self) {
141 let mut in_paragraph = false;
147 let mut on_heading_line = false;
148 for tok in &self.tokens {
149 match &tok.kind {
150 TokenKind::LinkRefDef => {
151 if !in_paragraph && let Some((label, url, title)) = parse_link_ref_def(tok.raw) {
152 let url = crate::inline::decode_entities_in(&unescape_link_part(&url));
153 let title = title.map(|t| crate::inline::decode_entities_in(&unescape_link_part(&t)));
154 self.refs.insert(&label, url, title);
155 }
156 },
157 TokenKind::BlankLine
158 | TokenKind::CodeFenceOpen(_, _)
159 | TokenKind::CodeFenceClose(_, _)
160 | TokenKind::ThematicBreak
161 | TokenKind::FrontmatterEnd(_) => {
162 in_paragraph = false;
163 on_heading_line = false;
164 },
165 TokenKind::Heading(_) => {
166 in_paragraph = false;
169 on_heading_line = true;
170 },
171 TokenKind::BlockQuoteMarker => {
172 in_paragraph = false;
173 on_heading_line = false;
174 },
175 TokenKind::SoftBreak | TokenKind::HardBreak => {
176 if on_heading_line {
177 in_paragraph = false;
178 }
179 on_heading_line = false;
180 },
181 TokenKind::Whitespace(_) | TokenKind::Eof => {},
182 _ => {
183 if !on_heading_line {
184 in_paragraph = true;
185 }
186 },
187 }
188 }
189 }
190
191 pub(crate) fn emit_diagnostic(&mut self, diagnostic: Diagnostic<Code>) {
193 self.diag_engine.emit(diagnostic);
194 }
195
196 pub(crate) fn diag(&mut self, code: Code, message: impl Into<String>) {
198 let (line, column) = self.tokens.get(self.pos).map(|t| (t.span.line, t.span.column)).unwrap_or((0, 0));
199 let span = Span::from_zero_based(self.meta.path.clone(), line, column, 1);
200 self.emit_diagnostic(duck_diagnostic::diag!(code, span, message.into()));
201 }
202
203 pub(crate) fn warn(&mut self, code: Code, message: impl Into<String>) {
205 self.diag(code, message);
206 }
207
208 pub(crate) fn span_at(&self, pos: usize) -> Span {
210 self.tokens.get(pos).map(|t| t.span.clone()).unwrap_or_else(default_span)
211 }
212
213 pub(crate) fn raw_source_for_token_range(&self, start: usize, end: usize) -> String {
224 if start >= end {
225 return String::new();
226 }
227 let Some(start_tok) = self.tokens.get(start) else {
228 return String::new();
229 };
230 let Some(end_tok) = self.tokens.get(end - 1) else {
231 return String::new();
232 };
233
234 if let Some(source) = self.source {
235 let base = source.as_ptr() as usize;
236 let src_lo = base;
237 let src_hi = base + source.len();
238 let lo = start_tok.raw.as_ptr() as usize;
239 let hi = end_tok.raw.as_ptr() as usize + end_tok.raw.len();
240 debug_assert!(lo <= hi, "token slice start pointer exceeded end pointer");
241 debug_assert!(lo >= src_lo, "token slice start pointer fell before the source buffer");
242 debug_assert!(hi <= src_hi, "token slice end pointer exceeded the source buffer");
243 if lo < src_lo || hi > src_hi || lo > hi {
244 return String::new();
245 }
246 let off_lo = lo - base;
247 let off_hi = hi - base;
248 return source.get(off_lo..off_hi).map(|s| s.to_string()).unwrap_or_default();
251 }
252
253 let mut out = String::new();
255 for tok in &self.tokens[start..end] {
256 out.push_str(tok.raw);
257 }
258 out
259 }
260
261 pub(crate) fn current_span(&self) -> Span {
263 self.tokens.get(self.pos).map(|t| t.span.clone()).unwrap_or_else(default_span)
264 }
265
266 pub(crate) fn peek(&'_ self) -> Option<&'_ Token<'_>> {
268 self.tokens.get(self.pos)
269 }
270
271 pub(crate) fn peek_kind(&self) -> Option<&TokenKind> {
273 self.tokens.get(self.pos).map(|t| &t.kind)
274 }
275
276 pub(crate) fn peek_raw(&self) -> Option<&'tokens str> {
279 self.tokens.get(self.pos).map(|t| t.raw)
280 }
281
282 pub(crate) fn advance(&'_ mut self) -> Option<&'_ Token<'_>> {
284 let t = self.tokens.get(self.pos);
285 if t.is_some() {
286 self.pos += 1;
287 }
288 t
289 }
290
291 pub(crate) fn is_eof(&self) -> bool {
293 matches!(self.peek_kind(), Some(TokenKind::Eof) | None)
294 }
295}
296
297fn unescape_link_part(s: &str) -> String {
300 if !s.contains('\\') {
301 return s.to_string();
302 }
303 let mut out = String::with_capacity(s.len());
304 let bytes = s.as_bytes();
305 let mut i = 0;
306 while i < bytes.len() {
307 if bytes[i] == b'\\' && i + 1 < bytes.len() {
308 let nx = bytes[i + 1];
309 if matches!(
310 nx,
311 b'!'
312 | b'"'
313 | b'#'
314 | b'$'
315 | b'%'
316 | b'&'
317 | b'\''
318 | b'('
319 | b')'
320 | b'*'
321 | b'+'
322 | b','
323 | b'-'
324 | b'.'
325 | b'/'
326 | b':'
327 | b';'
328 | b'<'
329 | b'='
330 | b'>'
331 | b'?'
332 | b'@'
333 | b'['
334 | b'\\'
335 | b']'
336 | b'^'
337 | b'_'
338 | b'`'
339 | b'{'
340 | b'|'
341 | b'}'
342 | b'~'
343 ) {
344 out.push(nx as char);
345 i += 2;
346 continue;
347 }
348 }
349 out.push(bytes[i] as char);
350 i += 1;
351 }
352 out
353}
354
355pub fn parse(source: &str) -> Document {
359 parse_with(source, ParseOptions::default())
360}
361
362pub fn parse_with(source: &str, options: ParseOptions) -> Document {
365 let meta = Arc::from(SourceMeta { path: Arc::from("<inline>"), origin: Origin::Inline("<inline>") });
366 let mut lex_engine = DiagnosticEngine::new();
367 let mut lexer = Lexer::new(source, meta.clone(), &mut lex_engine);
368 let _ = lexer.scan_tokens();
369 let tokens = std::mem::take(&mut lexer.tokens);
370 drop(lexer);
371
372 let mut parse_engine = DiagnosticEngine::new();
373 let mut p = Parser::new_with_options(tokens, meta, &mut parse_engine, options).with_source(source);
374 p.parse()
375}
376
377pub fn parse_inline_str(s: &str) -> Vec<crate::ast::Node> {
382 let meta = Arc::from(SourceMeta { path: Arc::from("<inline>"), origin: Origin::Inline("<inline>") });
383 let mut lex_engine = DiagnosticEngine::new();
384 let mut lexer = Lexer::new(s, meta.clone(), &mut lex_engine);
385 let _ = lexer.scan_tokens();
386 let tokens = std::mem::take(&mut lexer.tokens);
387 drop(lexer);
388 let mut parse_engine = DiagnosticEngine::new();
389 let mut p = Parser::new(tokens, meta, &mut parse_engine).with_source(s);
390 p.collect_inline_until_break()
391}