use crate::ast::*;
use crate::parser::Parser;
use dmc_diagnostic::Code;
use dmc_lexer::token::TokenKind;
impl<'eng, 'tokens> Parser<'eng, 'tokens> {
pub(super) fn parse_indented_code_in_bq(&mut self) -> Node {
let span = self.current_span();
let mut buf = String::new();
loop {
let ws_visual = match self.peek_leading_indent() {
Some(n) if n >= 4 => n,
_ => break,
};
self.advance(); let visible = ws_visual.saturating_sub(4);
if visible > 0 {
buf.push_str(&" ".repeat(visible));
}
while let Some(t) = self.peek() {
match &t.kind {
TokenKind::SoftBreak | TokenKind::HardBreak | TokenKind::BlankLine | TokenKind::Eof => break,
_ => {
buf.push_str(t.raw);
self.advance();
},
}
}
buf.push('\n');
let saved = self.pos;
if !matches!(self.peek_kind(), Some(TokenKind::SoftBreak)) {
break;
}
self.advance();
let next_markers = self.count_line_blockquote_markers();
if next_markers == 0 {
self.pos = saved;
break;
}
self.consume_blockquote_markers(next_markers);
let next_aligned = self.peek_leading_indent().is_some_and(|n| n >= 4);
if !next_aligned {
self.pos = saved;
break;
}
}
Node::CodeBlock(CodeBlock { lang: None, meta: None, value: buf, span })
}
pub(super) fn parse_same_indent_fenced_code_in_list(&mut self, content_indent: usize) -> Option<Node> {
if self.peek_leading_indent() != Some(content_indent) {
return None;
}
let raw = match self.tokens.get(self.pos + 1) {
Some(t) if matches!(t.kind, TokenKind::IndentedCodeLine) => t.raw,
_ => return None,
};
let bytes = raw.as_bytes();
let fence_byte = match bytes.first().copied() {
Some(b'`') => b'`',
Some(b'~') => b'~',
_ => return None,
};
let mut fence_n = 0usize;
while fence_n < bytes.len() && bytes[fence_n] == fence_byte {
fence_n += 1;
}
if fence_n < 3 {
return None;
}
let span = self.current_span();
self.advance(); self.advance(); if matches!(self.peek_kind(), Some(TokenKind::SoftBreak) | Some(TokenKind::HardBreak)) {
self.advance();
}
let mut value = String::new();
while let Some(indent) = self.peek_leading_indent() {
if indent < content_indent {
break;
}
self.advance();
let Some(t) = self.peek() else {
break;
};
if !matches!(t.kind, TokenKind::IndentedCodeLine) {
break;
}
let raw = t.raw.to_string();
let close = {
let b = raw.as_bytes();
let mut i = 0usize;
while i < b.len() && b[i] == b' ' && i < 3 {
i += 1;
}
let mut count = 0usize;
while i + count < b.len() && b[i + count] == fence_byte {
count += 1;
}
let mut j = i + count;
while j < b.len() && matches!(b[j], b' ' | b'\t') {
j += 1;
}
count >= fence_n && j == b.len()
};
self.advance();
if close {
if matches!(self.peek_kind(), Some(TokenKind::SoftBreak) | Some(TokenKind::HardBreak)) {
self.advance();
}
break;
}
value.push_str(&raw);
value.push('\n');
if matches!(self.peek_kind(), Some(TokenKind::SoftBreak) | Some(TokenKind::HardBreak)) {
self.advance();
} else {
break;
}
}
Some(Node::CodeBlock(CodeBlock { lang: None, meta: None, value, span }))
}
pub(super) fn parse_indented_code_fallback(&mut self) -> Node {
let span = self.current_span();
let mut buf = String::new();
loop {
let ws_len = match self.peek() {
Some(t) if matches!(t.kind, TokenKind::Whitespace(_)) && t.raw.chars().count() >= 4 => t.raw.chars().count(),
_ => break,
};
if self.peek().is_none_or(|t| t.span.column != 1) {
break;
}
self.advance();
let visible = ws_len.saturating_sub(4);
if visible > 0 {
buf.push_str(&" ".repeat(visible));
}
while let Some(t) = self.peek() {
match &t.kind {
TokenKind::SoftBreak | TokenKind::HardBreak | TokenKind::BlankLine | TokenKind::Eof => break,
_ => {
buf.push_str(t.raw);
self.advance();
},
}
}
buf.push('\n');
let saved = self.pos;
let mut blanks = 0usize;
loop {
match self.peek_kind() {
Some(TokenKind::SoftBreak) => {
self.advance();
blanks += 1;
break;
},
Some(TokenKind::BlankLine) => {
self.advance();
blanks += 2;
},
_ => break,
}
}
if blanks == 0 {
break;
}
let next_aligned = matches!(self.peek(), Some(t) if matches!(t.kind, TokenKind::Whitespace(_)) && t.raw.chars().count() >= 4 && t.span.column == 1);
if !next_aligned {
self.pos = saved;
break;
}
for _ in 1..blanks {
buf.push('\n');
}
}
Node::CodeBlock(CodeBlock { lang: None, meta: None, value: buf, span })
}
pub(super) fn parse_indented_code(&mut self) -> Node {
let span = self.current_span();
let mut buf = String::new();
loop {
let starts_indent = matches!(self.peek_kind(), Some(TokenKind::Whitespace(_)))
&& matches!(self.tokens.get(self.pos + 1).map(|t| &t.kind), Some(TokenKind::IndentedCodeLine));
if !starts_indent {
break;
}
let extra = self
.peek()
.map(|t| {
if t.raw.starts_with('\t') {
t.raw.len() - 1
} else {
t.raw.len().saturating_sub(4)
}
})
.unwrap_or(0);
self.advance();
let mut body = match self.peek() {
Some(t) if matches!(t.kind, TokenKind::IndentedCodeLine) => {
let raw = t.raw.to_string();
self.advance();
raw
},
_ => {
let mut s = String::new();
while let Some(t) = self.peek() {
match &t.kind {
TokenKind::SoftBreak | TokenKind::HardBreak | TokenKind::BlankLine | TokenKind::Eof => break,
_ => {
s.push_str(t.raw);
self.advance();
},
}
}
s
},
};
if extra > 0 {
body = " ".repeat(extra) + &body;
}
buf.push_str(&body);
buf.push('\n');
let saved = self.pos;
let mut blanks: usize = 0;
let mut blank_ws_visible: Vec<usize> = Vec::new();
let mut consumed_softbreak = false;
loop {
let ws_then_break = matches!(self.peek_kind(), Some(TokenKind::Whitespace(_)))
&& self.peek().is_some_and(|t| t.span.column == 1)
&& matches!(
self.tokens.get(self.pos + 1).map(|t| &t.kind),
Some(TokenKind::SoftBreak) | Some(TokenKind::BlankLine)
);
if ws_then_break {
let ws_n = self.peek().map(|t| t.raw.chars().count()).unwrap_or(0);
self.advance();
if matches!(self.peek_kind(), Some(TokenKind::BlankLine)) {
self.advance();
blanks += 2;
blank_ws_visible.push(ws_n.saturating_sub(4));
blank_ws_visible.push(0);
} else {
self.advance();
blanks += 1;
blank_ws_visible.push(ws_n.saturating_sub(4));
}
continue;
}
match self.peek_kind() {
Some(TokenKind::SoftBreak) if !consumed_softbreak => {
self.advance();
blanks += 1;
consumed_softbreak = true;
},
Some(TokenKind::BlankLine) => {
self.advance();
blanks += 2;
blank_ws_visible.push(0);
},
_ => break,
}
}
if blanks == 0 {
break;
}
let next_is_indent = matches!(self.peek_kind(), Some(TokenKind::Whitespace(_)))
&& matches!(self.tokens.get(self.pos + 1).map(|t| &t.kind), Some(TokenKind::IndentedCodeLine));
if !next_is_indent {
self.pos = saved;
break;
}
let blanks_to_emit = blanks.saturating_sub(1);
for i in 0..blanks_to_emit {
let visible = blank_ws_visible.get(i).copied().unwrap_or(0);
if visible > 0 {
buf.push_str(&" ".repeat(visible));
}
buf.push('\n');
}
}
Node::CodeBlock(CodeBlock { lang: None, meta: None, value: buf, span })
}
pub(super) fn parse_code_block(&mut self) -> Node {
self.parse_code_block_with_blockquote_prefix(0)
}
pub(super) fn parse_code_block_in_blockquote(&mut self, bq_depth: usize) -> Node {
self.parse_code_block_with_blockquote_prefix(bq_depth)
}
fn parse_code_block_with_blockquote_prefix(&mut self, bq_depth: usize) -> Node {
let span = self.current_span();
let open_line = self.peek().map(|t| t.span.line).unwrap_or(0);
let (fence_char, fence_n, fence_indent) = match self.peek() {
Some(t) => match t.kind {
TokenKind::CodeFenceOpen(c, n) => (c, n, if bq_depth == 0 { t.span.column.saturating_sub(1) } else { 0 }),
_ => (dmc_lexer::token::FenceChar::Backtick, 3, 0),
},
None => (dmc_lexer::token::FenceChar::Backtick, 3, 0),
};
self.advance();
let info = match self.peek() {
Some(t) if t.span.line == open_line && matches!(t.kind, TokenKind::CodeFenceInfo | TokenKind::Text) => {
let raw = t.raw.to_string();
self.advance();
raw
},
_ => String::new(),
};
let info_trimmed = info.trim();
let (lang, meta) = if info_trimmed.is_empty() {
(None, None)
} else {
let decode = |s: &str| crate::inline::decode_entities_in(&Self::unescape_markdown(s));
match info_trimmed.split_once(char::is_whitespace) {
Some((l, rest)) => {
let rest = rest.trim();
(Some(decode(l)), if rest.is_empty() { None } else { Some(decode(rest)) })
},
None => (Some(decode(info_trimmed)), None),
}
};
let mut value = String::new();
let mut closed = false;
if bq_depth > 0 {
if let Some(t) = self.peek()
&& matches!(t.kind, TokenKind::CodeFenceContent)
{
value.push_str(&Self::strip_blockquote_prefix_from_fence_content(t.raw, bq_depth));
self.advance();
}
if matches!(self.peek_kind(), Some(TokenKind::CodeFenceClose(c, m)) if *c == fence_char && *m >= fence_n) {
self.advance();
closed = true;
}
} else {
while let Some(t) = self.peek() {
match &t.kind {
TokenKind::CodeFenceClose(c, m) if *c == fence_char && *m >= fence_n => {
self.advance();
closed = true;
break;
},
TokenKind::Eof => break,
TokenKind::Text => {
value.push_str(t.raw);
self.advance();
},
_ => {
value.push_str(t.raw);
self.advance();
},
}
}
}
if !closed {
let fence = match fence_char {
dmc_lexer::token::FenceChar::Backtick => "`",
dmc_lexer::token::FenceChar::Tilde => "~",
};
let diagnostic = duck_diagnostic::diag!(
Code::UnterminatedCodeBlockBlock,
span.clone(),
"fenced code block never found a matching closing fence; treating the rest of the file as code"
)
.with_help(format!("add a closing fence with at least {fence_n} `{fence}` characters"));
self.emit_diagnostic(diagnostic);
}
if !value.is_empty() && !value.ends_with('\n') {
value.push('\n');
}
if fence_indent > 0 {
let stripped = value
.split_inclusive('\n')
.map(|line| {
let mut consumed = 0usize;
let bytes = line.as_bytes();
while consumed < fence_indent && consumed < bytes.len() && bytes[consumed] == b' ' {
consumed += 1;
}
&line[consumed..]
})
.collect::<String>();
value = stripped;
}
Node::CodeBlock(CodeBlock { lang, meta, value, span })
}
fn strip_blockquote_prefix_from_fence_content(raw: &str, depth: usize) -> String {
raw
.split_inclusive('\n')
.map(|line| {
let bytes = line.as_bytes();
let mut p = 0usize;
let mut leading = 0usize;
while p < bytes.len() && bytes[p] == b' ' && leading < 3 {
p += 1;
leading += 1;
}
for idx in 0..depth {
if p >= bytes.len() || bytes[p] != b'>' {
return line.to_string();
}
p += 1;
if idx + 1 < depth {
while p < bytes.len() && matches!(bytes[p], b' ' | b'\t') {
p += 1;
}
} else if p < bytes.len() && matches!(bytes[p], b' ' | b'\t') {
p += 1;
}
}
line[p..].to_string()
})
.collect()
}
}