1use crate::ast::*;
2use crate::refs::{RefMap, parse_link_ref_def};
3use dmc_diagnostic::Code;
4use dmc_diagnostic::metadata::{Origin, SourceMeta};
5use dmc_lexer::Lexer;
6use dmc_lexer::token::{Token, TokenKind};
7use duck_diagnostic::{Diagnostic, DiagnosticEngine, Span};
8use std::sync::Arc;
9
10#[derive(Debug, Clone, Copy, Default)]
13pub struct ParseOptions {
14 pub cm_strict_html_blocks: bool,
17 pub gfm_autolinks: bool,
21 pub legacy_gfm_emphasis: bool,
25}
26
27pub struct Parser<'eng, 'tokens> {
30 pub tokens: Vec<Token<'tokens>>,
31 pub meta: Arc<SourceMeta>,
32 pub pos: usize,
33 pub refs: RefMap,
34 pub diag_engine: &'eng mut DiagnosticEngine<Code>,
35 pub options: ParseOptions,
36 pub source: Option<&'tokens str>,
39 pub link_label_depth: u16,
44 pub jsx_open_stack: Vec<String>,
50 pub block_depth: usize,
55}
56
57pub(crate) const MAX_BLOCK_NESTING_DEPTH: usize = 128;
63
64pub(crate) const MAX_LINK_LABEL_DEPTH: u16 = 12;
68
69impl<'eng, 'tokens> Parser<'eng, 'tokens> {
70 pub fn new(
71 tokens: Vec<Token<'tokens>>,
72 meta: Arc<SourceMeta>,
73 diag_engine: &'eng mut DiagnosticEngine<Code>,
74 ) -> Self {
75 Self {
76 tokens,
77 meta,
78 pos: 0,
79 refs: RefMap::new(),
80 diag_engine,
81 options: ParseOptions::default(),
82 source: None,
83 link_label_depth: 0,
84 jsx_open_stack: Vec::new(),
85 block_depth: 0,
86 }
87 }
88
89 pub fn new_with_options(
90 tokens: Vec<Token<'tokens>>,
91 meta: Arc<SourceMeta>,
92 diag_engine: &'eng mut DiagnosticEngine<Code>,
93 options: ParseOptions,
94 ) -> Self {
95 Self {
96 tokens,
97 meta,
98 pos: 0,
99 refs: RefMap::new(),
100 diag_engine,
101 options,
102 source: None,
103 link_label_depth: 0,
104 jsx_open_stack: Vec::new(),
105 block_depth: 0,
106 }
107 }
108
109 pub fn with_source(mut self, source: &'tokens str) -> Self {
112 self.source = Some(source);
113 self
114 }
115
116 pub fn parse(&mut self) -> Document {
119 self.collect_refs();
120 let span = self.tokens.first().map(|t| t.span.clone()).unwrap_or_else(default_span);
121 let mut children = Vec::new();
122 while !self.is_eof() {
123 let before = self.pos;
124 if let Some(node) = self.parse_block() {
125 children.push(node);
126 }
127 if self.pos == before {
128 self.advance();
129 }
130 }
131 Document { children, span }
132 }
133
134 fn collect_refs(&mut self) {
138 let mut in_paragraph = false;
139 let mut on_heading_line = false;
140 for tok in &self.tokens {
141 match &tok.kind {
142 TokenKind::LinkRefDef => {
143 if !in_paragraph && let Some((label, url, title)) = parse_link_ref_def(tok.raw) {
144 let url = crate::inline::decode_entities_in(&unescape_link_part(&url));
145 let title = title.map(|t| crate::inline::decode_entities_in(&unescape_link_part(&t)));
146 self.refs.insert(&label, url, title);
147 }
148 },
149 TokenKind::BlankLine
150 | TokenKind::CodeFenceOpen(_, _)
151 | TokenKind::CodeFenceClose(_, _)
152 | TokenKind::ThematicBreak
153 | TokenKind::FrontmatterEnd(_) => {
154 in_paragraph = false;
155 on_heading_line = false;
156 },
157 TokenKind::Heading(_) => {
158 in_paragraph = false;
159 on_heading_line = true;
160 },
161 TokenKind::BlockQuoteMarker => {
162 in_paragraph = false;
163 on_heading_line = false;
164 },
165 TokenKind::SoftBreak | TokenKind::HardBreak => {
166 if on_heading_line {
167 in_paragraph = false;
168 }
169 on_heading_line = false;
170 },
171 TokenKind::Whitespace(_) | TokenKind::Eof => {},
172 _ => {
173 if !on_heading_line {
174 in_paragraph = true;
175 }
176 },
177 }
178 }
179 }
180
181 pub(crate) fn emit_diagnostic(&mut self, diagnostic: Diagnostic<Code>) {
182 self.diag_engine.emit(diagnostic);
183 }
184
185 pub(crate) fn diag(&mut self, code: Code, message: impl Into<String>) {
186 let (line, column) = self.tokens.get(self.pos).map(|t| (t.span.line, t.span.column)).unwrap_or((0, 0));
187 let span = Span::from_zero_based(self.meta.path.clone(), line, column, 1);
188 self.emit_diagnostic(duck_diagnostic::diag!(code, span, message.into()));
189 }
190
191 pub(crate) fn warn(&mut self, code: Code, message: impl Into<String>) {
192 self.diag(code, message);
193 }
194
195 pub(crate) fn span_at(&self, pos: usize) -> Span {
196 self.tokens.get(pos).map(|t| t.span.clone()).unwrap_or_else(default_span)
197 }
198
199 pub(crate) fn raw_source_for_token_range(&self, start: usize, end: usize) -> String {
205 if start >= end {
206 return String::new();
207 }
208 let Some(start_tok) = self.tokens.get(start) else {
209 return String::new();
210 };
211 let Some(end_tok) = self.tokens.get(end - 1) else {
212 return String::new();
213 };
214
215 if let Some(source) = self.source {
216 let base = source.as_ptr() as usize;
217 let src_lo = base;
218 let src_hi = base + source.len();
219 let lo = start_tok.raw.as_ptr() as usize;
220 let hi = end_tok.raw.as_ptr() as usize + end_tok.raw.len();
221 debug_assert!(lo <= hi, "token slice start pointer exceeded end pointer");
222 debug_assert!(lo >= src_lo, "token slice start pointer fell before the source buffer");
223 debug_assert!(hi <= src_hi, "token slice end pointer exceeded the source buffer");
224 if lo < src_lo || hi > src_hi || lo > hi {
225 return String::new();
226 }
227 let off_lo = lo - base;
228 let off_hi = hi - base;
229 return source.get(off_lo..off_hi).map(|s| s.to_string()).unwrap_or_default();
230 }
231
232 let mut out = String::new();
233 for tok in &self.tokens[start..end] {
234 out.push_str(tok.raw);
235 }
236 out
237 }
238
239 pub(crate) fn current_span(&self) -> Span {
240 self.tokens.get(self.pos).map(|t| t.span.clone()).unwrap_or_else(default_span)
241 }
242
243 pub(crate) fn peek(&'_ self) -> Option<&'_ Token<'_>> {
244 self.tokens.get(self.pos)
245 }
246
247 pub(crate) fn peek_kind(&self) -> Option<&TokenKind> {
248 self.tokens.get(self.pos).map(|t| &t.kind)
249 }
250
251 pub(crate) fn peek_raw(&self) -> Option<&'tokens str> {
254 self.tokens.get(self.pos).map(|t| t.raw)
255 }
256
257 pub(crate) fn advance(&'_ mut self) -> Option<&'_ Token<'_>> {
258 let t = self.tokens.get(self.pos);
259 if t.is_some() {
260 self.pos += 1;
261 }
262 t
263 }
264
265 pub(crate) fn is_eof(&self) -> bool {
266 matches!(self.peek_kind(), Some(TokenKind::Eof) | None)
267 }
268}
269
270fn unescape_link_part(s: &str) -> String {
273 if !s.contains('\\') {
274 return s.to_string();
275 }
276 let mut out = String::with_capacity(s.len());
277 let bytes = s.as_bytes();
278 let mut i = 0;
279 while i < bytes.len() {
280 if bytes[i] == b'\\' && i + 1 < bytes.len() {
281 let nx = bytes[i + 1];
282 if matches!(
283 nx,
284 b'!'
285 | b'"'
286 | b'#'
287 | b'$'
288 | b'%'
289 | b'&'
290 | b'\''
291 | b'('
292 | b')'
293 | b'*'
294 | b'+'
295 | b','
296 | b'-'
297 | b'.'
298 | b'/'
299 | b':'
300 | b';'
301 | b'<'
302 | b'='
303 | b'>'
304 | b'?'
305 | b'@'
306 | b'['
307 | b'\\'
308 | b']'
309 | b'^'
310 | b'_'
311 | b'`'
312 | b'{'
313 | b'|'
314 | b'}'
315 | b'~'
316 ) {
317 out.push(nx as char);
318 i += 2;
319 continue;
320 }
321 }
322 out.push(bytes[i] as char);
323 i += 1;
324 }
325 out
326}
327
328pub fn parse(source: &str) -> Document {
331 parse_with(source, ParseOptions::default())
332}
333
334pub fn parse_with(source: &str, options: ParseOptions) -> Document {
336 let meta = Arc::from(SourceMeta { path: Arc::from("<inline>"), origin: Origin::Inline("<inline>") });
337 let mut lex_engine = DiagnosticEngine::new();
338 let mut lexer = Lexer::new(source, meta.clone(), &mut lex_engine);
339 let _ = lexer.scan_tokens();
340 let tokens = std::mem::take(&mut lexer.tokens);
341 drop(lexer);
342
343 let mut parse_engine = DiagnosticEngine::new();
344 let mut p = Parser::new_with_options(tokens, meta, &mut parse_engine, options).with_source(source);
345 p.parse()
346}
347
348pub fn parse_inline_str(s: &str) -> Vec<crate::ast::Node> {
351 let meta = Arc::from(SourceMeta { path: Arc::from("<inline>"), origin: Origin::Inline("<inline>") });
352 let mut lex_engine = DiagnosticEngine::new();
353 let mut lexer = Lexer::new(s, meta.clone(), &mut lex_engine);
354 let _ = lexer.scan_tokens();
355 let tokens = std::mem::take(&mut lexer.tokens);
356 drop(lexer);
357 let mut parse_engine = DiagnosticEngine::new();
358 let mut p = Parser::new(tokens, meta, &mut parse_engine).with_source(s);
359 p.collect_inline_until_break()
360}