use crate::ast::*;
use crate::refs::{RefMap, parse_link_ref_def};
use dmc_diagnostic::Code;
use dmc_diagnostic::metadata::{Origin, SourceMeta};
use dmc_lexer::Lexer;
use dmc_lexer::token::{Token, TokenKind};
use duck_diagnostic::{Diagnostic, DiagnosticEngine, Span};
use std::sync::Arc;
#[derive(Debug, Clone, Copy, Default)]
pub struct ParseOptions {
pub cm_strict_html_blocks: bool,
pub gfm_autolinks: bool,
pub legacy_gfm_emphasis: bool,
}
pub struct Parser<'eng, 'tokens> {
pub tokens: Vec<Token<'tokens>>,
pub meta: Arc<SourceMeta>,
pub pos: usize,
pub refs: RefMap,
pub diag_engine: &'eng mut DiagnosticEngine<Code>,
pub options: ParseOptions,
pub source: Option<&'tokens str>,
pub link_label_depth: u16,
pub jsx_open_stack: Vec<String>,
}
pub(crate) const MAX_LINK_LABEL_DEPTH: u16 = 12;
impl<'eng, 'tokens> Parser<'eng, 'tokens> {
pub fn new(
tokens: Vec<Token<'tokens>>,
meta: Arc<SourceMeta>,
diag_engine: &'eng mut DiagnosticEngine<Code>,
) -> Self {
Self {
tokens,
meta,
pos: 0,
refs: RefMap::new(),
diag_engine,
options: ParseOptions::default(),
source: None,
link_label_depth: 0,
jsx_open_stack: Vec::new(),
}
}
pub fn new_with_options(
tokens: Vec<Token<'tokens>>,
meta: Arc<SourceMeta>,
diag_engine: &'eng mut DiagnosticEngine<Code>,
options: ParseOptions,
) -> Self {
Self {
tokens,
meta,
pos: 0,
refs: RefMap::new(),
diag_engine,
options,
source: None,
link_label_depth: 0,
jsx_open_stack: Vec::new(),
}
}
pub fn with_source(mut self, source: &'tokens str) -> Self {
self.source = Some(source);
self
}
pub fn parse(&mut self) -> Document {
self.collect_refs();
let span = self.tokens.first().map(|t| t.span.clone()).unwrap_or_else(default_span);
let mut children = Vec::new();
while !self.is_eof() {
let before = self.pos;
if let Some(node) = self.parse_block() {
children.push(node);
}
if self.pos == before {
self.advance();
}
}
Document { children, span }
}
fn collect_refs(&mut self) {
let mut in_paragraph = false;
let mut on_heading_line = false;
for tok in &self.tokens {
match &tok.kind {
TokenKind::LinkRefDef => {
if !in_paragraph && let Some((label, url, title)) = parse_link_ref_def(tok.raw) {
let url = crate::inline::decode_entities_in(&unescape_link_part(&url));
let title = title.map(|t| crate::inline::decode_entities_in(&unescape_link_part(&t)));
self.refs.insert(&label, url, title);
}
},
TokenKind::BlankLine
| TokenKind::CodeFenceOpen(_, _)
| TokenKind::CodeFenceClose(_, _)
| TokenKind::ThematicBreak
| TokenKind::FrontmatterEnd(_) => {
in_paragraph = false;
on_heading_line = false;
},
TokenKind::Heading(_) => {
in_paragraph = false;
on_heading_line = true;
},
TokenKind::BlockQuoteMarker => {
in_paragraph = false;
on_heading_line = false;
},
TokenKind::SoftBreak | TokenKind::HardBreak => {
if on_heading_line {
in_paragraph = false;
}
on_heading_line = false;
},
TokenKind::Whitespace(_) | TokenKind::Eof => {},
_ => {
if !on_heading_line {
in_paragraph = true;
}
},
}
}
}
pub(crate) fn emit_diagnostic(&mut self, diagnostic: Diagnostic<Code>) {
self.diag_engine.emit(diagnostic);
}
pub(crate) fn diag(&mut self, code: Code, message: impl Into<String>) {
let (line, column) = self.tokens.get(self.pos).map(|t| (t.span.line, t.span.column)).unwrap_or((0, 0));
let span = Span::from_zero_based(self.meta.path.clone(), line, column, 1);
self.emit_diagnostic(duck_diagnostic::diag!(code, span, message.into()));
}
pub(crate) fn warn(&mut self, code: Code, message: impl Into<String>) {
self.diag(code, message);
}
pub(crate) fn span_at(&self, pos: usize) -> Span {
self.tokens.get(pos).map(|t| t.span.clone()).unwrap_or_else(default_span)
}
pub(crate) fn raw_source_for_token_range(&self, start: usize, end: usize) -> String {
if start >= end {
return String::new();
}
let Some(start_tok) = self.tokens.get(start) else {
return String::new();
};
let Some(end_tok) = self.tokens.get(end - 1) else {
return String::new();
};
if let Some(source) = self.source {
let base = source.as_ptr() as usize;
let src_lo = base;
let src_hi = base + source.len();
let lo = start_tok.raw.as_ptr() as usize;
let hi = end_tok.raw.as_ptr() as usize + end_tok.raw.len();
debug_assert!(lo <= hi, "token slice start pointer exceeded end pointer");
debug_assert!(lo >= src_lo, "token slice start pointer fell before the source buffer");
debug_assert!(hi <= src_hi, "token slice end pointer exceeded the source buffer");
if lo < src_lo || hi > src_hi || lo > hi {
return String::new();
}
let off_lo = lo - base;
let off_hi = hi - base;
return source.get(off_lo..off_hi).map(|s| s.to_string()).unwrap_or_default();
}
let mut out = String::new();
for tok in &self.tokens[start..end] {
out.push_str(tok.raw);
}
out
}
pub(crate) fn current_span(&self) -> Span {
self.tokens.get(self.pos).map(|t| t.span.clone()).unwrap_or_else(default_span)
}
pub(crate) fn peek(&'_ self) -> Option<&'_ Token<'_>> {
self.tokens.get(self.pos)
}
pub(crate) fn peek_kind(&self) -> Option<&TokenKind> {
self.tokens.get(self.pos).map(|t| &t.kind)
}
pub(crate) fn peek_raw(&self) -> Option<&'tokens str> {
self.tokens.get(self.pos).map(|t| t.raw)
}
pub(crate) fn advance(&'_ mut self) -> Option<&'_ Token<'_>> {
let t = self.tokens.get(self.pos);
if t.is_some() {
self.pos += 1;
}
t
}
pub(crate) fn is_eof(&self) -> bool {
matches!(self.peek_kind(), Some(TokenKind::Eof) | None)
}
}
fn unescape_link_part(s: &str) -> String {
if !s.contains('\\') {
return s.to_string();
}
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' && i + 1 < bytes.len() {
let nx = bytes[i + 1];
if matches!(
nx,
b'!'
| b'"'
| b'#'
| b'$'
| b'%'
| b'&'
| b'\''
| b'('
| b')'
| b'*'
| b'+'
| b','
| b'-'
| b'.'
| b'/'
| b':'
| b';'
| b'<'
| b'='
| b'>'
| b'?'
| b'@'
| b'['
| b'\\'
| b']'
| b'^'
| b'_'
| b'`'
| b'{'
| b'|'
| b'}'
| b'~'
) {
out.push(nx as char);
i += 2;
continue;
}
}
out.push(bytes[i] as char);
i += 1;
}
out
}
pub fn parse(source: &str) -> Document {
parse_with(source, ParseOptions::default())
}
pub fn parse_with(source: &str, options: ParseOptions) -> Document {
let meta = Arc::from(SourceMeta { path: Arc::from("<inline>"), origin: Origin::Inline("<inline>") });
let mut lex_engine = DiagnosticEngine::new();
let mut lexer = Lexer::new(source, meta.clone(), &mut lex_engine);
let _ = lexer.scan_tokens();
let tokens = std::mem::take(&mut lexer.tokens);
drop(lexer);
let mut parse_engine = DiagnosticEngine::new();
let mut p = Parser::new_with_options(tokens, meta, &mut parse_engine, options).with_source(source);
p.parse()
}
pub fn parse_inline_str(s: &str) -> Vec<crate::ast::Node> {
let meta = Arc::from(SourceMeta { path: Arc::from("<inline>"), origin: Origin::Inline("<inline>") });
let mut lex_engine = DiagnosticEngine::new();
let mut lexer = Lexer::new(s, meta.clone(), &mut lex_engine);
let _ = lexer.scan_tokens();
let tokens = std::mem::take(&mut lexer.tokens);
drop(lexer);
let mut parse_engine = DiagnosticEngine::new();
let mut p = Parser::new(tokens, meta, &mut parse_engine).with_source(s);
p.collect_inline_until_break()
}