use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TableAlignment {
Left,
Center,
Right,
}
include!(concat!(env!("OUT_DIR"), "/entities_table.rs"));
const RAW_HTML_BLOCK_TAG_NAMES: &[&str] = &["script", "pre", "style", "textarea"];
const BLOCK_ELEMENT_TAG_NAMES: &[&str] = &[
"address", "article", "aside", "base", "basefont", "blockquote",
"body", "caption", "center", "col", "colgroup", "dd", "details",
"dialog", "dir", "div", "dl", "dt", "fieldset", "figcaption",
"figure", "footer", "form", "frame", "frameset", "h1", "h2", "h3",
"h4", "h5", "h6", "head", "header", "hr", "html", "iframe",
"legend", "li", "link", "main", "menu", "menuitem", "nav",
"noframes", "ol", "optgroup", "option", "p", "param", "search",
"section", "summary", "table", "tbody", "td", "tfoot", "th",
"thead", "title", "tr", "track", "ul",
];
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum ParseContext {
Root, ListItem, TableCell, BlockQuote, Inline, }
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
Heading(Vec<Token>, usize),
Emphasis { level: usize, content: Vec<Token> },
StrongEmphasis(Vec<Token>),
Code {
language: String,
content: String,
block: bool,
},
BlockQuote(Vec<Token>),
ListItem {
content: Vec<Token>,
ordered: bool,
number: Option<usize>, marker: char,
checked: Option<bool>,
loose: bool,
},
Link {
content: Vec<Token>,
url: String,
title: Option<String>,
},
Image {
alt: Vec<Token>,
url: String,
title: Option<String>,
},
FootnoteReference(String),
FootnoteDefinition {
label: String,
content: Vec<Token>,
},
InlineFootnote {
label: String,
content: Vec<Token>,
},
DefinitionList {
entries: Vec<DefinitionListEntry>,
},
Text(String),
#[doc(hidden)]
DelimRun {
ch: char,
count: usize,
},
Table {
headers: Vec<Vec<Token>>,
aligns: Vec<TableAlignment>,
rows: Vec<Vec<Vec<Token>>>,
},
TableAlignment(TableAlignment),
HtmlComment(String),
HtmlInline(String),
HtmlBlock(String),
Newline,
HardBreak,
HorizontalRule,
Strikethrough(Vec<Token>),
Highlight(Vec<Token>),
Math { inline: bool, content: String },
Unknown(String),
}
#[derive(Debug, Clone, PartialEq)]
pub struct DefinitionListEntry {
pub term: Vec<Token>,
pub definitions: Vec<Vec<Token>>,
}
impl Token {
pub fn collect_all_text(tokens: &[Token]) -> String {
let mut result = String::new();
for token in tokens {
token.collect_text_recursive(&mut result);
}
result
}
fn collect_text_recursive(&self, result: &mut String) {
match self {
Token::Text(s) => result.push_str(s),
Token::DelimRun { ch, count } => {
for _ in 0..*count {
result.push(*ch);
}
}
Token::Heading(nested, _) => {
for token in nested {
token.collect_text_recursive(result);
}
}
Token::Emphasis { content, .. } => {
for token in content {
token.collect_text_recursive(result);
}
}
Token::StrongEmphasis(nested) => {
for token in nested {
token.collect_text_recursive(result);
}
}
Token::Code { content, .. } => result.push_str(content),
Token::BlockQuote(body) => {
for token in body {
token.collect_text_recursive(result);
}
}
Token::ListItem { content, .. } => {
for token in content {
token.collect_text_recursive(result);
}
}
Token::Link { content, .. } => {
for token in content {
token.collect_text_recursive(result);
}
}
Token::Image { alt, .. } => {
for token in alt {
token.collect_text_recursive(result);
}
}
Token::HtmlComment(comment) => result.push_str(comment),
Token::HtmlInline(html) => result.push_str(html),
Token::HtmlBlock(html) => result.push_str(html),
Token::Unknown(text) => result.push_str(text),
Token::Newline | Token::HardBreak | Token::HorizontalRule => {
}
Token::Strikethrough(nested) | Token::Highlight(nested) => {
for token in nested {
token.collect_text_recursive(result);
}
}
Token::Math { content, .. } => result.push_str(content),
Token::FootnoteReference(label) => {
result.push_str(label);
}
Token::FootnoteDefinition { content, .. }
| Token::InlineFootnote { content, .. } => {
for token in content {
token.collect_text_recursive(result);
}
}
Token::DefinitionList { entries } => {
for entry in entries {
for token in &entry.term {
token.collect_text_recursive(result);
}
for def in &entry.definitions {
for token in def {
token.collect_text_recursive(result);
}
}
}
}
Token::Table {
headers,
aligns: _,
rows,
} => {
for header in headers {
for token in header {
token.collect_text_recursive(result);
}
}
for row in rows {
for cell in row {
for token in cell {
token.collect_text_recursive(result);
}
}
}
}
Token::TableAlignment(_) => {
}
}
}
}
fn try_decode_entity(chars: &[char], start: usize) -> Option<(String, usize)> {
if chars.get(start) != Some(&'&') {
return None;
}
let mut end = start + 1;
while end < chars.len() && end - start < 64 {
if chars[end] == ';' {
break;
}
end += 1;
}
if end >= chars.len() || chars[end] != ';' {
return None;
}
let body: String = chars[start + 1..end].iter().collect();
let consumed = end - start + 1;
if let Some(rest) = body.strip_prefix('#') {
let (radix, digits) = if rest.starts_with('x') || rest.starts_with('X') {
(16, &rest[1..])
} else {
(10, rest)
};
if digits.is_empty() {
return None;
}
let max_digits = if radix == 16 { 6 } else { 7 };
if digits.len() > max_digits {
return None;
}
let Ok(code) = u32::from_str_radix(digits, radix) else {
return None;
};
let ch = if code == 0 || (0xD800..=0xDFFF).contains(&code) || code > 0x10FFFF {
'\u{FFFD}'
} else {
match char::from_u32(code) {
Some(c) => c,
None => '\u{FFFD}',
}
};
return Some((ch.to_string(), consumed));
}
NAMED_ENTITIES
.get(body.as_str())
.map(|s| ((*s).to_string(), consumed))
}
fn strip_leading_cols(chars: &[char], from: usize, to: usize, strip_cols: usize) -> String {
let mut leading = String::new();
let mut col = 0usize;
let mut i = from;
while i < to {
match chars[i] {
' ' => {
leading.push(' ');
col += 1;
i += 1;
}
'\t' => {
let span = 4 - (col % 4);
for _ in 0..span {
leading.push(' ');
}
col += span;
i += 1;
}
_ => break,
}
}
let stripped: String = leading.chars().skip(strip_cols).collect();
let mut out = stripped;
while i < to {
out.push(chars[i]);
i += 1;
}
out
}
fn strip_atx_trailing_hashes(line: &str) -> String {
let chars: Vec<char> = line.chars().collect();
let mut end = chars.len();
while end > 0 && (chars[end - 1] == ' ' || chars[end - 1] == '\t') {
end -= 1;
}
let mut hash_run_start = end;
while hash_run_start > 0 && chars[hash_run_start - 1] == '#' {
hash_run_start -= 1;
}
if hash_run_start == end {
return chars.iter().collect();
}
let mut backslashes = 0;
let mut p = hash_run_start;
while p > 0 && chars[p - 1] == '\\' {
backslashes += 1;
p -= 1;
}
if backslashes % 2 == 1 {
return chars.iter().collect();
}
if hash_run_start == 0 {
return String::new();
}
let prev = chars[hash_run_start - 1];
if prev != ' ' && prev != '\t' {
return chars.iter().collect();
}
let mut new_end = hash_run_start;
while new_end > 0 && (chars[new_end - 1] == ' ' || chars[new_end - 1] == '\t') {
new_end -= 1;
}
chars[..new_end].iter().collect()
}
fn decode_escapes_and_entities(s: &str) -> String {
let chars: Vec<char> = s.chars().collect();
let mut out = String::with_capacity(s.len());
let mut i = 0;
while i < chars.len() {
let c = chars[i];
if c == '\\' && i + 1 < chars.len() && is_ascii_punctuation(chars[i + 1]) {
out.push(chars[i + 1]);
i += 2;
continue;
}
if c == '&' {
if let Some((decoded, consumed)) = try_decode_entity(&chars, i) {
out.push_str(&decoded);
i += consumed;
continue;
}
}
out.push(c);
i += 1;
}
out
}
fn try_parse_definition(
chars: &[char],
start: usize,
) -> Option<(String, String, Option<String>, usize)> {
let mut i = start;
let mut leading = 0usize;
while i < chars.len() && chars[i] == ' ' && leading < 3 {
i += 1;
leading += 1;
}
if chars.get(i) != Some(&'[') {
return None;
}
i += 1;
let label_start = i;
loop {
if i >= chars.len() {
return None;
}
let c = chars[i];
if c == ']' {
break;
}
if c == '[' {
return None;
}
if c == '\\' && i + 1 < chars.len() && is_ascii_punctuation(chars[i + 1]) {
i += 2;
continue;
}
if c == '\n' {
let mut j = i + 1;
while j < chars.len() && (chars[j] == ' ' || chars[j] == '\t') {
j += 1;
}
if j >= chars.len() || chars[j] == '\n' {
return None;
}
i += 1;
continue;
}
i += 1;
}
let label: String = chars[label_start..i].iter().collect();
if label.trim().is_empty() {
return None;
}
i += 1;
if chars.get(i) != Some(&':') {
return None;
}
i += 1;
let mut newlines = 0usize;
while i < chars.len() {
match chars[i] {
' ' | '\t' => i += 1,
'\n' => {
newlines += 1;
if newlines > 1 {
return None;
}
i += 1;
}
_ => break,
}
}
if i >= chars.len() {
return None;
}
let url = if chars[i] == '<' {
i += 1;
let s = i;
loop {
if i >= chars.len() {
return None;
}
let c = chars[i];
if c == '>' {
break;
}
if c == '<' || c == '\n' {
return None;
}
if c == '\\' && i + 1 < chars.len() && is_ascii_punctuation(chars[i + 1]) {
i += 2;
continue;
}
i += 1;
}
let raw: String = chars[s..i].iter().collect();
i += 1; decode_escapes_and_entities(&raw)
} else {
let s = i;
while i < chars.len() && !chars[i].is_whitespace() {
if chars[i] == '\\' && i + 1 < chars.len() && is_ascii_punctuation(chars[i + 1]) {
i += 2;
continue;
}
i += 1;
}
if i == s {
return None;
}
let raw: String = chars[s..i].iter().collect();
decode_escapes_and_entities(&raw)
};
let after_url = i;
let mut newlines_after_url = 0usize;
let mut q = after_url;
while q < chars.len() {
match chars[q] {
' ' | '\t' => q += 1,
'\n' => {
newlines_after_url += 1;
if newlines_after_url > 1 {
break;
}
q += 1;
}
_ => break,
}
}
let no_title_def = || -> Option<(String, String, Option<String>, usize)> {
let mut k = after_url;
while k < chars.len() && (chars[k] == ' ' || chars[k] == '\t') {
k += 1;
}
if k < chars.len() && chars[k] != '\n' {
return None;
}
let end = if k < chars.len() { k + 1 } else { k };
Some((label.clone(), url.clone(), None, end))
};
if q >= chars.len() || (newlines_after_url > 1) {
return no_title_def();
}
let title_open = chars[q];
if !matches!(title_open, '"' | '\'' | '(') {
return no_title_def();
}
if q == after_url {
return None;
}
let close = match title_open {
'"' => '"',
'\'' => '\'',
'(' => ')',
_ => unreachable!(),
};
let mut t = q + 1;
let title_start = t;
loop {
if t >= chars.len() {
return no_title_def();
}
let c = chars[t];
if c == close {
break;
}
if c == '\\' && t + 1 < chars.len() && is_ascii_punctuation(chars[t + 1]) {
t += 2;
continue;
}
if c == '\n' {
let mut j = t + 1;
while j < chars.len() && (chars[j] == ' ' || chars[j] == '\t') {
j += 1;
}
if j >= chars.len() || chars[j] == '\n' {
return no_title_def();
}
}
t += 1;
}
let title_raw: String = chars[title_start..t].iter().collect();
let title = decode_escapes_and_entities(&title_raw);
t += 1; let mut k = t;
while k < chars.len() && (chars[k] == ' ' || chars[k] == '\t') {
k += 1;
}
if k < chars.len() && chars[k] != '\n' {
return no_title_def();
}
let end = if k < chars.len() { k + 1 } else { k };
Some((label, url, Some(title), end))
}
fn propagate_loose_tight(tokens: &mut [Token]) {
let mut i = 0;
while i < tokens.len() {
if !matches!(tokens[i], Token::ListItem { .. }) {
i += 1;
continue;
}
let run_start = i;
let mut has_blank_between = false;
let mut last_item_end = i;
if item_has_internal_blank(&tokens[i]) {
has_blank_between = true;
}
loop {
i += 1;
let mut newlines = 0;
while i < tokens.len() && matches!(tokens[i], Token::Newline) {
newlines += 1;
i += 1;
}
if i >= tokens.len() || !matches!(tokens[i], Token::ListItem { .. }) {
break;
}
if newlines >= 1 {
has_blank_between = true;
}
if item_has_internal_blank(&tokens[i]) {
has_blank_between = true;
}
last_item_end = i;
}
let loose = has_blank_between;
for tok in &mut tokens[run_start..=last_item_end] {
if let Token::ListItem { loose: l, .. } = tok {
*l = *l || loose;
}
}
i = last_item_end + 1;
}
for tok in tokens.iter_mut() {
match tok {
Token::ListItem { content, .. } => propagate_loose_tight(content),
Token::BlockQuote(body) => propagate_loose_tight(body),
_ => {}
}
}
}
fn item_has_internal_blank(tok: &Token) -> bool {
let Token::ListItem { content, .. } = tok else {
return false;
};
let mut run = 0;
for t in content {
if matches!(t, Token::Newline) {
run += 1;
if run >= 2 {
return true;
}
} else {
run = 0;
}
}
false
}
fn is_paragraph_breaking_line_chars(chars: &[char], start: usize, end: usize) -> bool {
let mut p = start;
let mut indent = 0;
while p < end && chars[p] == ' ' && indent < 3 {
p += 1;
indent += 1;
}
if p >= end {
return true; }
if chars[p] == '#' {
let mut h = 0;
while p + h < end && chars[p + h] == '#' {
h += 1;
}
if (1..=6).contains(&h) {
let after = chars.get(p + h);
if after.is_none()
|| matches!(after, Some(' ') | Some('\t') | Some('\n'))
{
return true;
}
}
}
if matches!(chars[p], '-' | '*' | '_') {
let marker = chars[p];
let mut count = 0;
let mut q = p;
while q < end {
if chars[q] == marker {
count += 1;
} else if chars[q] != ' ' && chars[q] != '\t' {
return false;
}
q += 1;
}
if count >= 3 {
return true;
}
}
false
}
fn footnote_continuation_indent(chars: &[char], pos: usize) -> Option<usize> {
let mut p = pos;
let mut cols = 0usize;
while p < chars.len() {
match chars[p] {
' ' => {
p += 1;
cols += 1;
}
'\t' => {
p += 1;
cols += 4;
}
_ => break,
}
}
if cols < 4 {
return None;
}
if p >= chars.len() || chars[p] == '\n' {
return None;
}
Some(p - pos)
}
fn is_definition_marker_line(chars: &[char], pos: usize) -> bool {
let mut p = pos;
let mut lead = 0;
while p < chars.len() && chars[p] == ' ' && lead < 3 {
p += 1;
lead += 1;
}
if p >= chars.len() || chars[p] != ':' {
return false;
}
p += 1;
if p >= chars.len() {
return false;
}
matches!(chars[p], ' ' | '\t' | '\n')
}
fn line_starts_block_construct(line: &[char]) -> bool {
let mut p = 0;
let mut lead = 0;
while p < line.len() && line[p] == ' ' && lead < 3 {
p += 1;
lead += 1;
}
if p >= line.len() {
return true; }
let c = line[p];
match c {
'#' | '>' | '|' | '`' | '~' | '<' => true,
'*' | '-' | '+' | '_' => true,
'0'..='9' => {
let mut q = p;
while q < line.len() && line[q].is_ascii_digit() {
q += 1;
}
matches!(line.get(q), Some(&'.') | Some(&')'))
}
'[' => true,
':' => true,
_ => false,
}
}
fn normalize_label(s: &str) -> String {
let mut out = String::new();
let mut prev_ws = true;
for c in s.chars() {
if c.is_whitespace() {
if !prev_ws {
out.push(' ');
}
prev_ws = true;
} else {
case_fold_char(c, &mut out);
prev_ws = false;
}
}
while out.ends_with(' ') {
out.pop();
}
out
}
fn case_fold_char(c: char, out: &mut String) {
match c {
'ẞ' | 'ß' => out.push_str("ss"),
'\u{0130}' => {
out.push('i');
out.push('\u{0307}');
}
'\u{0149}' => {
out.push('\u{02BC}');
out.push('n');
}
'\u{017F}' => out.push('s'),
_ => {
for ch in c.to_lowercase() {
out.push(ch);
}
}
}
}
fn strip_code_span_outer_space(s: String) -> String {
if s.len() >= 2 && s.starts_with(' ') && s.ends_with(' ') && !s.chars().all(|c| c == ' ') {
s[1..s.len() - 1].to_string()
} else {
s
}
}
fn is_md_punctuation(c: char) -> bool {
if is_ascii_punctuation(c) {
return true;
}
if (c as u32) < 0x80 {
return false;
}
matches!(c,
'\u{00A1}'..='\u{00BF}'
| '\u{00D7}'
| '\u{00F7}'
| '\u{2000}'..='\u{206F}'
| '\u{2070}'..='\u{209F}'
| '\u{20A0}'..='\u{20CF}'
| '\u{2100}'..='\u{214F}'
| '\u{2150}'..='\u{218F}'
| '\u{2190}'..='\u{21FF}'
| '\u{2200}'..='\u{22FF}'
| '\u{2300}'..='\u{23FF}'
| '\u{2400}'..='\u{243F}'
| '\u{2500}'..='\u{257F}'
| '\u{2580}'..='\u{259F}'
| '\u{25A0}'..='\u{25FF}'
| '\u{2600}'..='\u{26FF}'
| '\u{2700}'..='\u{27BF}'
| '\u{27C0}'..='\u{27EF}'
| '\u{27F0}'..='\u{27FF}'
| '\u{2800}'..='\u{28FF}'
| '\u{2900}'..='\u{297F}'
| '\u{2980}'..='\u{29FF}'
| '\u{2A00}'..='\u{2AFF}'
| '\u{2B00}'..='\u{2BFF}'
| '\u{3000}'..='\u{303F}'
| '\u{FE30}'..='\u{FE4F}'
| '\u{FE50}'..='\u{FE6F}'
| '\u{FF00}'..='\u{FFEF}'
) && !matches!(c, '\u{00A0}' | '\u{2028}' | '\u{2029}')
}
#[derive(Debug, Clone)]
struct EmDelim {
pos: usize,
ch: char,
count: usize,
can_open: bool,
can_close: bool,
}
fn resolve_emphasis(tokens: &mut Vec<Token>) {
let mut result: Vec<Token> = Vec::with_capacity(tokens.len());
let mut chunk: Vec<Token> = Vec::new();
let original = std::mem::take(tokens);
let mut i = 0;
while i < original.len() {
if i + 1 < original.len()
&& matches!(original[i], Token::Newline)
&& matches!(original[i + 1], Token::Newline)
{
if !chunk.is_empty() {
resolve_emphasis_chunk(&mut chunk);
result.append(&mut chunk);
}
while i < original.len() && matches!(original[i], Token::Newline) {
result.push(Token::Newline);
i += 1;
}
continue;
}
chunk.push(original[i].clone());
i += 1;
}
if !chunk.is_empty() {
resolve_emphasis_chunk(&mut chunk);
result.append(&mut chunk);
}
*tokens = result;
}
fn resolve_emphasis_chunk(tokens: &mut Vec<Token>) {
let mut delims = find_em_delims(tokens);
if delims.is_empty() {
return;
}
let mut active: Vec<bool> = vec![true; delims.len()];
let mut openers_bottom = [[[0usize; 2]; 3]; 2];
fn ch_idx(c: char) -> usize {
if c == '*' {
0
} else {
1
}
}
let mut ci = 0;
while ci < delims.len() {
if !active[ci] || !delims[ci].can_close {
ci += 1;
continue;
}
let c_ch = delims[ci].ch;
let c_can_open = delims[ci].can_open;
let c_count = delims[ci].count;
let bottom = openers_bottom[ch_idx(c_ch)][c_count % 3]
[if c_can_open { 1 } else { 0 }];
let mut oi_found: Option<usize> = None;
let mut oi = ci;
while oi > bottom {
oi -= 1;
if !active[oi] {
continue;
}
let o = &delims[oi];
if !o.can_open || o.ch != c_ch {
continue;
}
if (o.can_open && o.can_close) || c_can_open {
let sum = o.count + c_count;
if sum % 3 == 0
&& !(o.count % 3 == 0 && c_count % 3 == 0)
{
continue;
}
}
oi_found = Some(oi);
break;
}
if let Some(oi) = oi_found {
let opener = delims[oi].clone();
let closer = delims[ci].clone();
let n = if opener.count >= 2 && closer.count >= 2 { 2 } else { 1 };
for k in (oi + 1)..ci {
active[k] = false;
}
wrap_emphasis_pair(
tokens,
opener.pos,
closer.pos,
opener.count,
closer.count,
n,
);
let old_len = closer.pos - opener.pos + 1;
let new_len = 1
+ (if opener.count > n { 1 } else { 0 })
+ (if closer.count > n { 1 } else { 0 });
let shift: i64 = new_len as i64 - old_len as i64;
for d in delims.iter_mut().skip(ci + 1) {
d.pos = ((d.pos as i64) + shift) as usize;
}
let new_opener_pos = opener.pos;
let new_closer_pos = opener.pos
+ (if opener.count > n { 1 } else { 0 })
+ 1;
delims[oi].count -= n;
delims[oi].pos = new_opener_pos;
if delims[oi].count == 0 {
active[oi] = false;
}
delims[ci].count -= n;
delims[ci].pos = new_closer_pos;
if delims[ci].count == 0 {
active[ci] = false;
ci += 1;
}
} else {
openers_bottom[ch_idx(c_ch)][c_count % 3]
[if c_can_open { 1 } else { 0 }] = ci;
if !c_can_open {
active[ci] = false;
}
ci += 1;
}
}
for t in tokens.iter_mut() {
if let Token::DelimRun { ch, count } = t {
*t = Token::Text(ch.to_string().repeat(*count));
}
}
}
fn find_em_delims(tokens: &[Token]) -> Vec<EmDelim> {
let mut out = Vec::new();
for (i, tok) in tokens.iter().enumerate() {
let Token::DelimRun { ch, count } = tok else { continue };
let (can_open, can_close) = compute_em_flanking(tokens, i, *ch);
out.push(EmDelim { pos: i, ch: *ch, count: *count, can_open, can_close });
}
out
}
fn compute_em_flanking(tokens: &[Token], idx: usize, ch: char) -> (bool, bool) {
let before = char_before_token(tokens, idx);
let after = char_after_token(tokens, idx);
let lf = em_is_left_flanking(before, after);
let rf = em_is_right_flanking(before, after);
if ch == '*' {
(lf, rf)
} else {
let can_open =
lf && (!rf || matches!(before, Some(c) if is_md_punctuation(c)));
let can_close =
rf && (!lf || matches!(after, Some(c) if is_md_punctuation(c)));
(can_open, can_close)
}
}
fn em_is_left_flanking(before: Option<char>, after: Option<char>) -> bool {
let Some(a) = after else { return false };
if a.is_whitespace() {
return false;
}
if !is_md_punctuation(a) {
return true;
}
match before {
None => true,
Some(b) => b.is_whitespace() || is_md_punctuation(b),
}
}
fn em_is_right_flanking(before: Option<char>, after: Option<char>) -> bool {
let Some(b) = before else { return false };
if b.is_whitespace() {
return false;
}
if !is_md_punctuation(b) {
return true;
}
match after {
None => true,
Some(a) => a.is_whitespace() || is_md_punctuation(a),
}
}
fn char_before_token(tokens: &[Token], idx: usize) -> Option<char> {
for i in (0..idx).rev() {
if let Some(c) = last_meaningful_char(&tokens[i]) {
return Some(c);
}
}
None
}
fn char_after_token(tokens: &[Token], idx: usize) -> Option<char> {
for i in (idx + 1)..tokens.len() {
if let Some(c) = first_meaningful_char(&tokens[i]) {
return Some(c);
}
}
None
}
fn last_meaningful_char(tok: &Token) -> Option<char> {
match tok {
Token::Text(s) => s.chars().last(),
Token::DelimRun { ch, .. } => Some(*ch),
Token::Code { content, .. } => content.chars().last().or(Some('`')),
Token::Math { content, .. } => content.chars().last().or(Some('$')),
Token::HtmlInline(s) | Token::HtmlComment(s) => s.chars().last(),
Token::Emphasis { content, .. } => last_meaningful_in_slice(content),
Token::StrongEmphasis(content) => last_meaningful_in_slice(content),
Token::Strikethrough(content) => last_meaningful_in_slice(content),
Token::Highlight(content) => last_meaningful_in_slice(content),
Token::Link { content, .. } => last_meaningful_in_slice(content),
Token::Image { alt, .. } => last_meaningful_in_slice(alt),
Token::Heading(content, _) => last_meaningful_in_slice(content),
Token::Newline | Token::HardBreak => Some(' '),
Token::Unknown(s) => s.chars().last(),
_ => None,
}
}
fn first_meaningful_char(tok: &Token) -> Option<char> {
match tok {
Token::Text(s) => s.chars().next(),
Token::DelimRun { ch, .. } => Some(*ch),
Token::Code { content, .. } => content.chars().next().or(Some('`')),
Token::Math { content, .. } => content.chars().next().or(Some('$')),
Token::HtmlInline(s) | Token::HtmlComment(s) => s.chars().next(),
Token::Emphasis { content, .. } => first_meaningful_in_slice(content),
Token::StrongEmphasis(content) => first_meaningful_in_slice(content),
Token::Strikethrough(content) => first_meaningful_in_slice(content),
Token::Highlight(content) => first_meaningful_in_slice(content),
Token::Link { content, .. } => first_meaningful_in_slice(content),
Token::Image { alt, .. } => first_meaningful_in_slice(alt),
Token::Heading(content, _) => first_meaningful_in_slice(content),
Token::Newline | Token::HardBreak => Some(' '),
Token::Unknown(s) => s.chars().next(),
_ => None,
}
}
fn last_meaningful_in_slice(slice: &[Token]) -> Option<char> {
for t in slice.iter().rev() {
if let Some(c) = last_meaningful_char(t) {
return Some(c);
}
}
None
}
fn first_meaningful_in_slice(slice: &[Token]) -> Option<char> {
for t in slice {
if let Some(c) = first_meaningful_char(t) {
return Some(c);
}
}
None
}
fn wrap_emphasis_pair(
tokens: &mut Vec<Token>,
opener_pos: usize,
closer_pos: usize,
opener_count: usize,
closer_count: usize,
n: usize,
) {
let opener_ch = match &tokens[opener_pos] {
Token::DelimRun { ch, .. } => *ch,
_ => return,
};
let closer_ch = match &tokens[closer_pos] {
Token::DelimRun { ch, .. } => *ch,
_ => return,
};
let opener_remaining = opener_count - n;
let closer_remaining = closer_count - n;
let mut inside: Vec<Token> = tokens[opener_pos + 1..closer_pos].to_vec();
resolve_emphasis_chunk(&mut inside);
let emph = Token::Emphasis { level: n, content: inside };
let mut replacement = Vec::new();
if opener_remaining > 0 {
replacement.push(Token::DelimRun { ch: opener_ch, count: opener_remaining });
}
replacement.push(emph);
if closer_remaining > 0 {
replacement.push(Token::DelimRun { ch: closer_ch, count: closer_remaining });
}
tokens.splice(opener_pos..closer_pos + 1, replacement);
}
fn is_ascii_punctuation(c: char) -> bool {
matches!(
c,
'!' | '"'
| '#'
| '$'
| '%'
| '&'
| '\''
| '('
| ')'
| '*'
| '+'
| ','
| '-'
| '.'
| '/'
| ':'
| ';'
| '<'
| '='
| '>'
| '?'
| '@'
| '['
| '\\'
| ']'
| '^'
| '_'
| '`'
| '{'
| '|'
| '}'
| '~'
)
}
fn is_backslash_escaped(chars: &[char], idx: usize) -> bool {
let mut backslashes = 0usize;
let mut p = idx;
while p > 0 && chars[p - 1] == '\\' {
backslashes += 1;
p -= 1;
}
backslashes % 2 == 1
}
pub(crate) fn slugify(text: &str) -> String {
let mut out = String::with_capacity(text.len());
let mut last_was_dash = true;
for ch in text.chars() {
if ch.is_ascii_alphanumeric() {
out.push(ch.to_ascii_lowercase());
last_was_dash = false;
} else if ch.is_whitespace() || ch == '-' || ch == '_' {
if !last_was_dash {
out.push('-');
last_was_dash = true;
}
}
}
while out.ends_with('-') {
out.pop();
}
out
}
#[derive(Debug)]
pub enum LexerError {
UnexpectedEndOfInput { line: usize, column: usize },
UnknownToken {
message: String,
line: usize,
column: usize,
},
}
impl LexerError {
pub fn position(&self) -> (usize, usize) {
match self {
LexerError::UnexpectedEndOfInput { line, column } => (*line, *column),
LexerError::UnknownToken { line, column, .. } => (*line, *column),
}
}
}
impl std::fmt::Display for LexerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexerError::UnexpectedEndOfInput { line, column } => write!(
f,
"unexpected end of input at line {}, column {}",
line, column
),
LexerError::UnknownToken {
message,
line,
column,
} => write!(f, "{} (line {}, column {})", message, line, column),
}
}
}
impl std::error::Error for LexerError {}
const MAX_PARSE_DEPTH: usize = 32;
pub struct Lexer {
input: Vec<char>,
position: usize,
pending_hard_break: bool,
in_heading: bool,
last_emitted_list_item: bool,
last_emitted_was_paragraph_text: bool,
definitions: HashMap<String, (String, Option<String>)>,
suppress_setext: bool,
pending: std::collections::VecDeque<Token>,
depth: usize,
inline_footnote_seq: std::rc::Rc<std::cell::Cell<usize>>,
}
impl Lexer {
pub fn new(input: String) -> Self {
let input = if let Some(stripped) = input.strip_prefix('\u{FEFF}') {
stripped.to_string()
} else {
input
};
let normalized: String = input
.replace("\r\n", "\n")
.replace('\r', "\n")
.replace('\u{0}', "\u{FFFD}");
Lexer {
input: normalized.chars().collect(),
position: 0,
pending_hard_break: false,
in_heading: false,
last_emitted_list_item: false,
last_emitted_was_paragraph_text: false,
definitions: HashMap::new(),
suppress_setext: false,
pending: std::collections::VecDeque::new(),
depth: 0,
inline_footnote_seq: std::rc::Rc::new(std::cell::Cell::new(0)),
}
}
fn sub_lexer(&self, input: String) -> Lexer {
let mut l = Lexer::new(input);
l.depth = self.depth.saturating_add(1);
l.inline_footnote_seq = std::rc::Rc::clone(&self.inline_footnote_seq);
l
}
fn nesting_error(&self) -> LexerError {
let (line, column) = self.pos_to_line_col(self.position.min(self.input.len()));
LexerError::UnknownToken {
message: format!("maximum nesting depth ({}) exceeded", MAX_PARSE_DEPTH),
line,
column,
}
}
pub fn parse(&mut self) -> Result<Vec<Token>, LexerError> {
self.extract_definitions();
let mut tokens = self.parse_with_context(ParseContext::Root)?;
propagate_loose_tight(&mut tokens);
Ok(tokens)
}
fn extract_definitions(&mut self) {
let chars = self.input.clone();
let mut definitions = HashMap::new();
let mut kept: Vec<char> = Vec::with_capacity(chars.len());
let mut i = 0usize;
let mut may_start_def = true;
let mut line_start = 0usize;
while i < chars.len() {
let at_line_start = i == 0 || chars[i - 1] == '\n';
if at_line_start {
line_start = i;
let footnote_skip = {
let mut p = i;
let mut lead = 0usize;
while p < chars.len() && chars[p] == ' ' && lead < 3 {
p += 1;
lead += 1;
}
p + 1 < chars.len() && chars[p] == '[' && chars[p + 1] == '^'
};
if footnote_skip {
kept.push(chars[i]);
i += 1;
may_start_def = false;
continue;
}
if may_start_def {
if let Some((label, url, title, end)) = try_parse_definition(&chars, i) {
definitions
.entry(normalize_label(&label))
.or_insert((url, title));
i = end;
may_start_def = true;
continue;
}
let mut peel = i;
let mut leading = 0usize;
while peel < chars.len() && chars[peel] == ' ' && leading < 3 {
peel += 1;
leading += 1;
}
let prefix_start = peel;
let mut any_marker = false;
while peel < chars.len() && chars[peel] == '>' {
any_marker = true;
peel += 1;
if peel < chars.len()
&& (chars[peel] == ' ' || chars[peel] == '\t')
{
peel += 1;
}
}
if any_marker {
let line_end = (peel..chars.len())
.find(|&j| chars[j] == '\n')
.unwrap_or(chars.len());
if let Some((label, url, title, def_end)) =
try_parse_definition(&chars, peel)
{
let single_line = def_end <= line_end
|| (def_end == line_end + 1
&& line_end < chars.len());
if single_line {
definitions
.entry(normalize_label(&label))
.or_insert((url, title));
for c in &chars[i..prefix_start] {
kept.push(*c);
}
for c in &chars[prefix_start..peel] {
if *c == '>' {
kept.push('>');
}
}
i = if line_end < chars.len() {
kept.push('\n');
line_end + 1
} else {
line_end
};
may_start_def = true;
continue;
}
}
}
}
}
if chars[i] == '\n' {
may_start_def =
is_paragraph_breaking_line_chars(&chars, line_start, i);
}
kept.push(chars[i]);
i += 1;
}
self.input = kept;
self.position = 0;
self.definitions = definitions;
}
pub(crate) fn parse_with_context(
&mut self,
ctx: ParseContext,
) -> Result<Vec<Token>, LexerError> {
if self.depth > MAX_PARSE_DEPTH {
return Err(self.nesting_error());
}
let mut tokens = Vec::new();
while self.position < self.input.len() || !self.pending.is_empty() {
if let Some(token) = self.next_token(ctx)? {
let mut newlines_in_a_row = 0;
match &token {
Token::ListItem { .. } => {
self.last_emitted_list_item = true;
self.last_emitted_was_paragraph_text = false;
}
Token::Newline => {
let mut n = tokens.len();
while n > 0 && matches!(tokens[n - 1], Token::Newline) {
newlines_in_a_row += 1;
n -= 1;
}
if newlines_in_a_row >= 1 {
self.last_emitted_was_paragraph_text = false;
}
}
Token::Heading(_, _)
| Token::HorizontalRule
| Token::BlockQuote(_)
| Token::Table { .. }
| Token::HtmlComment(_) => {
self.last_emitted_list_item = false;
self.last_emitted_was_paragraph_text = false;
}
Token::Code { block: true, .. } => {
self.last_emitted_list_item = false;
self.last_emitted_was_paragraph_text = false;
}
_ => {
self.last_emitted_list_item = false;
self.last_emitted_was_paragraph_text = true;
}
}
tokens.push(token);
}
}
resolve_emphasis(&mut tokens);
Ok(tokens)
}
fn parse_nested_content<F>(
&mut self,
is_delimiter: F,
ctx: ParseContext,
) -> Result<Vec<Token>, LexerError>
where
F: Fn(char) -> bool,
{
self.depth = self.depth.saturating_add(1);
if self.depth > MAX_PARSE_DEPTH {
self.depth -= 1;
return Err(self.nesting_error());
}
let result = self.parse_nested_content_inner(is_delimiter, ctx);
self.depth -= 1;
result
}
fn parse_nested_content_inner<F>(
&mut self,
is_delimiter: F,
ctx: ParseContext,
) -> Result<Vec<Token>, LexerError>
where
F: Fn(char) -> bool,
{
let mut content = Vec::new();
let initial_indent = self.get_current_indent();
loop {
while let Some(tok) = self.pending.pop_front() {
content.push(tok);
}
if self.position >= self.input.len() {
break;
}
let ch = self.current_char();
if ch == '\n' && self.input.get(self.position + 1) == Some(&'\n') {
break;
}
if is_delimiter(ch) {
break;
}
if self.is_at_line_start() {
let current_indent = self.get_current_indent();
if current_indent > initial_indent
&& !matches!(ctx, ParseContext::Inline | ParseContext::TableCell)
{
self.position += current_indent;
match self.current_char() {
'-' | '+' => {
if !self.check_horizontal_rule()? {
content.push(self.parse_list_item(false, ctx)?);
continue;
}
}
'*' => {
if self.is_list_marker('*') {
content.push(self.parse_list_item(false, ctx)?);
continue;
}
}
'0'..='9' => {
if self.check_ordered_list_marker().is_some() {
content.push(self.parse_list_item(true, ctx)?);
continue;
}
}
_ => {}
}
}
}
if let Some(token) = self.next_token(ctx)? {
content.push(token);
}
}
if !matches!(ctx, ParseContext::Inline) {
resolve_emphasis(&mut content);
}
Ok(content)
}
fn next_token(&mut self, ctx: ParseContext) -> Result<Option<Token>, LexerError> {
if let Some(tok) = self.pending.pop_front() {
return Ok(Some(tok));
}
if self.pending_hard_break {
self.pending_hard_break = false;
return Ok(Some(Token::HardBreak));
}
if matches!(ctx, ParseContext::Root | ParseContext::BlockQuote)
&& self.is_at_line_start()
&& self.get_current_indent() >= 4
&& self.can_start_indented_code()
{
return Ok(Some(self.parse_indented_code_block()));
}
if !self.is_after_special_token() {
self.skip_whitespace();
}
if self.position >= self.input.len() {
return Ok(None);
}
let current_char = self.current_char();
let is_line_start = self.is_at_line_start();
let is_block_start = self.is_block_marker_start();
let allow_block_tokens = |context: ParseContext| -> bool {
matches!(
context,
ParseContext::Root | ParseContext::ListItem | ParseContext::BlockQuote
)
};
if is_block_start
&& matches!(ctx, ParseContext::Root | ParseContext::BlockQuote)
&& !(matches!(current_char, '*' | '_' | '-')
&& self.is_thematic_break_line())
{
if let Some(level) = self.peek_setext_level() {
return Ok(Some(self.consume_setext_heading(level)?));
}
}
if is_block_start && matches!(ctx, ParseContext::Root | ParseContext::BlockQuote) {
if let Some(tok) = self.try_parse_definition_list()? {
return Ok(Some(tok));
}
}
let token = match current_char {
'#' if is_block_start && allow_block_tokens(ctx) && self.is_atx_heading_start() => {
self.parse_heading()?
}
'*' if is_block_start && allow_block_tokens(ctx) && self.is_thematic_break_line() => {
self.consume_current_line();
Token::HorizontalRule
}
'_' if is_block_start && allow_block_tokens(ctx) && self.is_thematic_break_line() => {
self.consume_current_line();
Token::HorizontalRule
}
'*' if is_block_start && allow_block_tokens(ctx) && self.is_list_marker('*') => {
self.parse_list_item(false, ctx)?
}
'*' => self.parse_emphasis()?,
'_' if !self.is_intra_word_underscore_run(self.position) => {
self.parse_emphasis()?
}
'_' => self.parse_text(ctx)?,
'`' => self.parse_code()?,
'~' if is_block_start
&& allow_block_tokens(ctx)
&& self.count_consecutive('~') >= 3 =>
{
self.parse_tilde_fence()?
}
'~' if self.count_consecutive('~') >= 2 => self.parse_strikethrough()?,
'~' => self.parse_text(ctx)?,
'=' if self.count_consecutive('=') >= 2 => self.parse_highlight()?,
'=' => self.parse_text(ctx)?,
'>' if is_block_start && allow_block_tokens(ctx) => self.parse_blockquote()?,
'-' | '+' if is_block_start && allow_block_tokens(ctx) => {
if self.is_thematic_break_line() {
self.consume_current_line();
Token::HorizontalRule
} else if self.check_horizontal_rule()? {
Token::HorizontalRule
} else if self.is_list_marker(current_char) {
self.parse_list_item(false, ctx)?
} else {
self.parse_text(ctx)?
}
}
'0'..='9' if is_block_start && allow_block_tokens(ctx) => {
if let Some(n) = self.check_ordered_list_marker() {
let can_open = n == 1
|| self.last_emitted_list_item
|| self.previous_line_is_blank_or_bof();
if can_open {
self.parse_list_item(true, ctx)?
} else {
self.parse_text(ctx)?
}
} else {
self.parse_text(ctx)?
}
}
'[' => self.parse_bracket_dispatch(is_block_start)?,
'!' => {
if self.position + 1 < self.input.len() && self.input[self.position + 1] == '[' {
self.parse_image()?
} else {
self.parse_text(ctx)?
}
}
'<' if is_block_start && allow_block_tokens(ctx) => {
if let Some(block) = self.try_parse_html_block() {
block
} else if self.is_html_comment_start() {
self.parse_html_comment()?
} else if let Some(autolink) = self.try_parse_autolink() {
autolink
} else if let Some(len) = self.try_match_html_tag_len() {
let html: String = self.input[self.position..self.position + len]
.iter()
.collect();
self.position += len;
Token::HtmlInline(html)
} else if let Some(len) = self.try_match_inline_raw_html_special() {
let html: String = self.input[self.position..self.position + len]
.iter()
.collect();
self.position += len;
Token::HtmlInline(html)
} else {
self.parse_text(ctx)?
}
}
'<' if self.is_html_comment_start() => self.parse_html_comment()?,
'<' => {
if let Some(autolink) = self.try_parse_autolink() {
autolink
} else if let Some(len) = self.try_match_html_tag_len() {
let html: String = self.input[self.position..self.position + len]
.iter()
.collect();
self.position += len;
Token::HtmlInline(html)
} else if let Some(len) = self.try_match_inline_raw_html_special() {
let html: String = self.input[self.position..self.position + len]
.iter()
.collect();
self.position += len;
Token::HtmlInline(html)
} else {
self.parse_text(ctx)?
}
}
'\n' => self.parse_newline()?,
'|' if is_line_start => {
if self.is_table_start() {
self.parse_table()?
} else {
self.parse_text(ctx)?
}
}
'^' => {
if let Some(tok) = self.try_parse_inline_footnote(ctx)? {
tok
} else {
self.parse_text(ctx)?
}
}
'$' if self.scan_math().is_some() => self.parse_math(),
_ => self.parse_text(ctx)?,
};
Ok(Some(token))
}
fn is_atx_heading_start(&self) -> bool {
if self.current_char() != '#' {
return false;
}
let mut p = self.position;
let mut count = 0usize;
while p < self.input.len() && self.input[p] == '#' {
count += 1;
p += 1;
}
if !(1..=6).contains(&count) {
return false;
}
match self.input.get(p) {
None => true,
Some(&c) => c == ' ' || c == '\t' || c == '\n',
}
}
fn parse_heading(&mut self) -> Result<Token, LexerError> {
let mut level = 0usize;
while self.current_char() == '#' && level < 6 {
level += 1;
self.advance();
}
self.skip_whitespace();
let line_start = self.position;
while self.position < self.input.len() && self.current_char() != '\n' {
self.advance();
}
let raw_line: String = self.input[line_start..self.position].iter().collect();
let stripped = strip_atx_trailing_hashes(&raw_line);
let mut sub = self.sub_lexer(stripped);
sub.in_heading = true;
sub.definitions = self.definitions.clone();
let content = sub.parse_with_context(ParseContext::Inline)?;
Ok(Token::Heading(content, level))
}
fn parse_emphasis(&mut self) -> Result<Token, LexerError> {
let delimiter = self.current_char();
let mut count = 0;
while self.position < self.input.len() && self.current_char() == delimiter {
count += 1;
self.advance();
}
Ok(Token::DelimRun { ch: delimiter, count })
}
fn parse_code(&mut self) -> Result<Token, LexerError> {
let opener_pos = self.position;
let is_block = self.is_block_marker_start();
let opener_indent_cols = if is_block {
let mut p = opener_pos;
while p > 0 && self.input[p - 1] != '\n' {
p -= 1;
}
let mut col = 0usize;
for &c in &self.input[p..opener_pos] {
match c {
' ' => col += 1,
'\t' => col += 4 - (col % 4),
_ => col += 1,
}
}
col
} else {
0
};
let start_backticks = self.count_backticks();
let is_fence = start_backticks >= 3
&& is_block
&& self.no_backticks_on_rest_of_line(opener_pos, start_backticks);
if !is_fence {
return Ok(self.parse_inline_code_span_body(start_backticks));
}
self.skip_whitespace();
let info_string = self.read_until_newline();
let language = decode_escapes_and_entities(
info_string.split_whitespace().next().unwrap_or(""),
);
if self.position < self.input.len() && self.current_char() == '\n' {
self.advance();
}
let mut content_lines: Vec<String> = Vec::new();
loop {
if self.position >= self.input.len() {
break;
}
let line_start = self.position;
let mut col = 0usize;
let mut q = line_start;
while q < self.input.len()
&& (self.input[q] == ' ' || self.input[q] == '\t')
&& col < 4
{
if self.input[q] == ' ' {
col += 1;
} else {
col += 4 - (col % 4);
}
q += 1;
}
if col < 4 {
let mut close_count = 0usize;
let mut r = q;
while r < self.input.len() && self.input[r] == '`' {
close_count += 1;
r += 1;
}
if close_count >= start_backticks {
let mut tail = r;
while tail < self.input.len()
&& (self.input[tail] == ' ' || self.input[tail] == '\t')
{
tail += 1;
}
if tail >= self.input.len() || self.input[tail] == '\n' {
self.position = tail;
let body = content_lines.join("\n");
return Ok(Token::Code {
language,
content: body,
block: true,
});
}
}
}
let mut p = line_start;
while p < self.input.len() && self.input[p] != '\n' {
p += 1;
}
content_lines.push(strip_leading_cols(
&self.input,
line_start,
p,
opener_indent_cols,
));
if p < self.input.len() {
self.position = p + 1;
} else {
self.position = p;
}
}
let body = content_lines.join("\n");
Ok(Token::Code {
language,
content: body,
block: true,
})
}
fn no_backticks_on_rest_of_line(&self, opener_pos: usize, count: usize) -> bool {
let mut p = opener_pos + count;
while p < self.input.len() && self.input[p] != '\n' {
if self.input[p] == '`' {
return false;
}
p += 1;
}
true
}
fn parse_inline_code_span_body(&mut self, opener_count: usize) -> Token {
let body_start = self.position;
let mut content = String::new();
while self.position < self.input.len() {
let ch = self.current_char();
if ch == '\n' {
if self.input.get(self.position + 1) == Some(&'\n') {
self.position = body_start;
return Token::Text("`".repeat(opener_count));
}
let next_line_start = self.position + 1;
let mut p = next_line_start;
let mut cols = 0usize;
while p < self.input.len() && cols < 3 {
match self.input[p] {
' ' => {
cols += 1;
p += 1;
}
'\t' => {
cols += 4 - (cols % 4);
p += 1;
}
_ => break,
}
}
if p < self.input.len() && self.line_starts_new_block_at(p) {
self.position = body_start;
return Token::Text("`".repeat(opener_count));
}
content.push(' ');
self.advance();
continue;
}
if ch == '`' {
let close_count = self.count_consecutive('`');
if close_count == opener_count {
for _ in 0..close_count {
self.advance();
}
return Token::Code {
language: String::new(),
content: strip_code_span_outer_space(content),
block: false,
};
}
for _ in 0..close_count {
content.push('`');
self.advance();
}
continue;
}
content.push(ch);
self.advance();
}
self.position = body_start;
Token::Text("`".repeat(opener_count))
}
fn count_consecutive(&self, c: char) -> usize {
let mut count = 0;
let mut p = self.position;
while p < self.input.len() && self.input[p] == c {
count += 1;
p += 1;
}
count
}
fn parse_strikethrough(&mut self) -> Result<Token, LexerError> {
let mut level = 0;
while self.current_char() == '~' {
level += 1;
self.advance();
}
let after_opener = self.position;
let close_level = 2;
let content = self.parse_nested_content(|c| c == '~', ParseContext::Inline)?;
let mut found = 0usize;
while found < close_level && self.current_char() == '~' {
self.advance();
found += 1;
}
if found < close_level {
self.position = after_opener;
let mut run = "~".repeat(level);
if self.position < self.input.len() && self.current_char() == ' ' {
run.push(' ');
self.advance();
}
return Ok(Token::Text(run));
}
let mut content = content;
resolve_emphasis(&mut content);
Ok(Token::Strikethrough(content))
}
fn parse_highlight(&mut self) -> Result<Token, LexerError> {
let mut level = 0;
while self.current_char() == '=' {
level += 1;
self.advance();
}
let after_opener = self.position;
let close_level = 2;
let content = self.parse_nested_content(|c| c == '=', ParseContext::Inline)?;
let mut found = 0usize;
while found < close_level && self.current_char() == '=' {
self.advance();
found += 1;
}
if found < close_level {
self.position = after_opener;
let mut run = "=".repeat(level);
if self.position < self.input.len() && self.current_char() == ' ' {
run.push(' ');
self.advance();
}
return Ok(Token::Text(run));
}
let mut content = content;
resolve_emphasis(&mut content);
Ok(Token::Highlight(content))
}
fn scan_math(&self) -> Option<(bool, usize, usize, usize)> {
if self.current_char() != '$' {
return None;
}
let n = self.input.len();
let dollars = self.count_consecutive('$');
if dollars >= 2 {
let content_start = self.position + 2;
let mut i = content_start;
while i + 1 < n {
if self.input[i] == '\n' && self.input[i + 1] == '\n' {
return None;
}
if self.input[i] == '$'
&& self.input[i + 1] == '$'
&& !is_backslash_escaped(&self.input, i)
{
return Some((false, content_start, i, i + 2));
}
i += 1;
}
return None;
}
let after_open = *self.input.get(self.position + 1)?;
if after_open.is_whitespace() || after_open == '$' {
return None;
}
let content_start = self.position + 1;
let mut i = content_start;
while i < n {
let c = self.input[i];
if c == '\n' {
return None;
}
if c == '$' && !is_backslash_escaped(&self.input, i) {
let before = self.input[i - 1];
let after = self.input.get(i + 1).copied();
if !before.is_whitespace()
&& !matches!(after, Some(d) if d.is_ascii_digit())
{
return Some((true, content_start, i, i + 1));
}
}
i += 1;
}
None
}
fn parse_math(&mut self) -> Token {
match self.scan_math() {
Some((inline, content_start, content_end, after_close)) => {
let content: String =
self.input[content_start..content_end].iter().collect();
self.position = after_close;
let content = if inline {
content
} else {
content.trim().to_string()
};
Token::Math { inline, content }
}
None => {
let run = self.count_consecutive('$');
for _ in 0..run.max(1) {
self.advance();
}
Token::Text("$".repeat(run.max(1)))
}
}
}
fn parse_tilde_fence(&mut self) -> Result<Token, LexerError> {
let opener_pos = self.position;
let opener_indent_cols = {
let mut p = opener_pos;
while p > 0 && self.input[p - 1] != '\n' {
p -= 1;
}
let mut col = 0usize;
for &c in &self.input[p..opener_pos] {
match c {
' ' => col += 1,
'\t' => col += 4 - (col % 4),
_ => col += 1,
}
}
col
};
let mut start_tildes = 0;
while self.current_char() == '~' {
start_tildes += 1;
self.advance();
}
self.skip_whitespace();
let info_string = self.read_until_newline();
let language = decode_escapes_and_entities(
info_string.split_whitespace().next().unwrap_or(""),
);
if self.position < self.input.len() && self.current_char() == '\n' {
self.advance();
}
let mut content_lines: Vec<String> = Vec::new();
loop {
if self.position >= self.input.len() {
break;
}
let line_start = self.position;
let mut col = 0usize;
let mut q = line_start;
while q < self.input.len()
&& (self.input[q] == ' ' || self.input[q] == '\t')
&& col < 4
{
if self.input[q] == ' ' {
col += 1;
} else {
col += 4 - (col % 4);
}
q += 1;
}
if col < 4 {
let mut close_count = 0usize;
let mut r = q;
while r < self.input.len() && self.input[r] == '~' {
close_count += 1;
r += 1;
}
if close_count >= start_tildes {
let mut tail = r;
while tail < self.input.len() && self.input[tail] != '\n' {
tail += 1;
}
self.position = tail;
if self.position < self.input.len() && self.current_char() == '\n' {
self.advance();
}
return Ok(Token::Code {
language,
content: content_lines.join("\n"),
block: true,
});
}
}
let mut p = line_start;
while p < self.input.len() && self.input[p] != '\n' {
p += 1;
}
content_lines.push(strip_leading_cols(
&self.input,
line_start,
p,
opener_indent_cols,
));
if p < self.input.len() {
self.position = p + 1;
} else {
self.position = p;
}
}
Ok(Token::Code {
language,
content: content_lines.join("\n"),
block: true,
})
}
fn count_backticks(&mut self) -> usize {
let mut count = 0;
while self.position < self.input.len() && self.current_char() == '`' {
count += 1;
self.advance();
}
count
}
fn parse_blockquote(&mut self) -> Result<Token, LexerError> {
while self.position > 0 && self.input[self.position - 1] != '\n' {
self.position -= 1;
}
let mut body_lines: Vec<String> = Vec::new();
let mut had_lazy = false;
loop {
if self.position >= self.input.len() || !self.is_at_line_start() {
break;
}
let line_start = self.position;
let mut peek = line_start;
let mut leading = 0usize;
while peek < self.input.len() && self.input[peek] == ' ' && leading < 3 {
peek += 1;
leading += 1;
}
let is_marked =
peek < self.input.len() && self.input[peek] == '>';
if is_marked {
self.position = peek;
self.advance(); let mut body_prefix = String::new();
let mut orig_col: usize = 1;
if self.position < self.input.len() {
match self.current_char() {
' ' => {
self.advance();
orig_col += 1;
}
'\t' => {
self.advance();
let span = 4 - (orig_col % 4);
for _ in 0..(span - 1) {
body_prefix.push(' ');
}
orig_col += span;
}
_ => {}
}
}
while self.position < self.input.len() {
match self.current_char() {
'\t' => {
let span = 4 - (orig_col % 4);
for _ in 0..span {
body_prefix.push(' ');
}
orig_col += span;
self.advance();
}
' ' => {
body_prefix.push(' ');
orig_col += 1;
self.advance();
}
_ => break,
}
}
let rest = self.read_until_newline();
body_lines.push(body_prefix + &rest);
if self.position < self.input.len() && self.current_char() == '\n' {
self.advance();
}
continue;
}
if body_lines.is_empty() {
break;
}
if peek >= self.input.len() || self.input[peek] == '\n' {
break;
}
let last_was_paragraph = body_lines
.last()
.map(|l| {
if l.trim().is_empty() {
return false;
}
let leading: usize =
l.chars().take_while(|&c| c == ' ').count();
if leading >= 4 {
return false;
}
let chars: Vec<char> = l.chars().collect();
let p = leading;
if p < chars.len()
&& (chars[p] == '`' || chars[p] == '~')
{
let marker = chars[p];
let mut cnt = 0;
while p + cnt < chars.len() && chars[p + cnt] == marker {
cnt += 1;
}
if cnt >= 3 {
return false;
}
}
true
})
.unwrap_or(false);
if !last_was_paragraph {
break;
}
if self.line_starts_new_block_at(peek) {
break;
}
self.position = line_start;
let lazy_line = self.read_until_newline();
body_lines.push(lazy_line);
had_lazy = true;
if self.position < self.input.len() && self.current_char() == '\n' {
self.advance();
}
}
let body_text = body_lines.join("\n");
let mut sub = self.sub_lexer(body_text);
sub.suppress_setext = had_lazy;
let body = sub.parse_with_context(ParseContext::BlockQuote)?;
Ok(Token::BlockQuote(body))
}
fn line_starts_new_block_at(&mut self, pos: usize) -> bool {
if pos >= self.input.len() {
return false;
}
let c = self.input[pos];
match c {
'#' | '-' | '+' | '*' | '_' | '`' | '~' | '0'..='9' => {}
_ => return false,
}
if self.line_starts_with_list_marker(pos) {
if !matches!(c, '-' | '*' | '_') {
return true;
}
}
let savepos = self.position;
self.position = pos;
if c == '#' && self.is_atx_heading_start() {
self.position = savepos;
return true;
}
if (c == '-' || c == '*' || c == '_') && self.is_thematic_break_line() {
self.position = savepos;
return true;
}
self.position = savepos;
if matches!(c, '-' | '*') && self.line_starts_with_list_marker(pos) {
return true;
}
if c == '`' || c == '~' {
let mut p = pos;
while p < self.input.len() && self.input[p] == c {
p += 1;
}
if p - pos >= 3 {
return true;
}
}
false
}
#[inline(never)]
fn parse_bracket_dispatch(&mut self, is_block_start: bool) -> Result<Token, LexerError> {
if self.position + 1 < self.input.len()
&& self.input[self.position + 1] == '^'
{
if is_block_start {
if let Some(tok) = self.try_parse_footnote_definition()? {
return Ok(tok);
}
}
if let Some(tok) = self.try_parse_footnote_reference()? {
return Ok(tok);
}
}
if let Some(tok) = self.try_parse_wikilink() {
return Ok(tok);
}
self.parse_link()
}
fn try_parse_wikilink(&mut self) -> Option<Token> {
if self.position + 1 >= self.input.len()
|| self.input[self.position + 1] != '['
{
return None;
}
let body_start = self.position + 2;
let mut i = body_start;
let close = loop {
if i + 1 >= self.input.len() {
return None;
}
let c = self.input[i];
if c == '\n' {
return None;
}
if c == ']' && self.input[i + 1] == ']' {
break i;
}
i += 1;
};
let body: &[char] = &self.input[body_start..close];
let pipe = body.iter().position(|&c| c == '|');
let (target, label): (&[char], Option<&[char]>) = match pipe {
Some(p) => (&body[..p], Some(&body[p + 1..])),
None => (body, None),
};
let target: String = target.iter().collect();
let slug = slugify(target.trim());
if slug.is_empty() {
return None;
}
let visible: String = match label {
Some(l) => l.iter().collect::<String>().trim().to_string(),
None => target.trim().to_string(),
};
let visible = if visible.is_empty() {
target.trim().to_string()
} else {
visible
};
self.position = close + 2; Some(Token::Link {
content: vec![Token::Text(visible)],
url: format!("#{}", slug),
title: None,
})
}
fn try_parse_footnote_reference(&mut self) -> Result<Option<Token>, LexerError> {
let start = self.position;
let mut i = start + 2;
let label_start = i;
while i < self.input.len() {
let c = self.input[i];
if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
i += 1;
} else {
break;
}
}
if i == label_start || i >= self.input.len() || self.input[i] != ']' {
return Ok(None);
}
let label: String = self.input[label_start..i].iter().collect();
self.position = i + 1; Ok(Some(Token::FootnoteReference(label)))
}
fn try_parse_footnote_definition(&mut self) -> Result<Option<Token>, LexerError> {
let start = self.position;
let mut i = start + 2;
let label_start = i;
while i < self.input.len() {
let c = self.input[i];
if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
i += 1;
} else {
break;
}
}
if i == label_start || i + 1 >= self.input.len() {
return Ok(None);
}
if self.input[i] != ']' || self.input[i + 1] != ':' {
return Ok(None);
}
let label: String = self.input[label_start..i].iter().collect();
let mut content_start = i + 2;
if content_start < self.input.len() && self.input[content_start] == ' ' {
content_start += 1;
}
let mut content_end = content_start;
while content_end < self.input.len() && self.input[content_end] != '\n' {
content_end += 1;
}
let mut body: String = self.input[content_start..content_end].iter().collect();
let mut cursor = content_end;
while cursor < self.input.len() && self.input[cursor] == '\n' {
let line_start = cursor + 1;
if line_start >= self.input.len() {
break;
}
let indent_width = footnote_continuation_indent(&self.input, line_start);
let Some(indent_width) = indent_width else {
break;
};
let body_start = line_start + indent_width;
let mut line_end = body_start;
while line_end < self.input.len() && self.input[line_end] != '\n' {
line_end += 1;
}
if line_end == body_start {
break;
}
body.push(' ');
body.extend(&self.input[body_start..line_end]);
cursor = line_end;
content_end = line_end;
}
let inner_tokens = if body.is_empty() {
Vec::new()
} else {
let mut sub = self.sub_lexer(body);
sub.parse_with_context(ParseContext::Inline)?
};
self.position = content_end;
Ok(Some(Token::FootnoteDefinition {
label,
content: inner_tokens,
}))
}
fn try_parse_inline_footnote(
&mut self,
_ctx: ParseContext,
) -> Result<Option<Token>, LexerError> {
let start = self.position;
if start + 1 >= self.input.len() || self.input[start + 1] != '[' {
return Ok(None);
}
let body_start = start + 2;
let mut i = body_start;
let mut depth = 1usize;
while i < self.input.len() {
match self.input[i] {
'\\' if i + 1 < self.input.len() => {
i += 2;
continue;
}
'[' => depth += 1,
']' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
i += 1;
}
if depth != 0 {
return Ok(None);
}
let close = i;
let body: String = self.input[body_start..close].iter().collect();
if body.trim().is_empty() {
return Ok(None);
}
let inner_tokens = {
let mut sub = self.sub_lexer(body);
sub.parse_with_context(ParseContext::Inline)?
};
let n = self.inline_footnote_seq.get() + 1;
self.inline_footnote_seq.set(n);
let label = format!("\u{1}ifn{}", n);
self.position = close + 1; Ok(Some(Token::InlineFootnote {
label,
content: inner_tokens,
}))
}
#[inline(never)]
fn try_parse_definition_list(&mut self) -> Result<Option<Token>, LexerError> {
let start = self.position;
let term_line_start = start;
let mut probe = start;
while probe < self.input.len() && self.input[probe] != '\n' {
probe += 1;
}
let term_line_end = probe;
if term_line_end == term_line_start {
return Ok(None);
}
let term_slice = &self.input[term_line_start..term_line_end];
if !term_slice.iter().any(|c| !c.is_whitespace()) {
return Ok(None);
}
if line_starts_block_construct(term_slice) {
return Ok(None);
}
if probe >= self.input.len() || self.input[probe] != '\n' {
return Ok(None);
}
if !is_definition_marker_line(&self.input, probe + 1) {
return Ok(None);
}
self.position = start;
let mut entries: Vec<DefinitionListEntry> = Vec::new();
loop {
let t_start = self.position;
while self.position < self.input.len() && self.current_char() != '\n' {
self.advance();
}
let term_text: String = self.input[t_start..self.position].iter().collect();
if self.position < self.input.len() {
self.advance(); }
let term_trimmed = term_text.trim();
let term_tokens = if term_trimmed.is_empty() {
Vec::new()
} else {
let mut sub = self.sub_lexer(term_trimmed.to_string());
sub.parse_with_context(ParseContext::Inline)?
};
let mut definitions: Vec<Vec<Token>> = Vec::new();
loop {
if !is_definition_marker_line(&self.input, self.position) {
break;
}
let mut q = self.position;
let mut lead = 0;
while q < self.input.len() && self.input[q] == ' ' && lead < 3 {
q += 1;
lead += 1;
}
q += 1;
if q < self.input.len() && (self.input[q] == ' ' || self.input[q] == '\t') {
q += 1;
}
let d_start = q;
let mut d_end = q;
while d_end < self.input.len() && self.input[d_end] != '\n' {
d_end += 1;
}
let def_text: String = self.input[d_start..d_end].iter().collect();
let def_trimmed = def_text.trim();
let def_tokens = if def_trimmed.is_empty() {
Vec::new()
} else {
let mut sub = self.sub_lexer(def_trimmed.to_string());
sub.parse_with_context(ParseContext::Inline)?
};
definitions.push(def_tokens);
self.position = d_end;
if self.position < self.input.len() {
self.advance(); }
}
entries.push(DefinitionListEntry {
term: term_tokens,
definitions,
});
let save = self.position;
let mut p = save;
if p < self.input.len() && self.input[p] == '\n' {
p += 1;
}
let next_term_start = p;
while p < self.input.len() && self.input[p] != '\n' {
p += 1;
}
if next_term_start == p {
break;
}
let candidate = &self.input[next_term_start..p];
if !candidate.iter().any(|c| !c.is_whitespace()) {
break;
}
if line_starts_block_construct(candidate) {
break;
}
if p >= self.input.len() || self.input[p] != '\n' {
break;
}
if !is_definition_marker_line(&self.input, p + 1) {
break;
}
self.position = save;
if self.position < self.input.len() && self.input[self.position] == '\n' {
self.advance();
}
}
if entries.is_empty() {
self.position = start;
return Ok(None);
}
Ok(Some(Token::DefinitionList { entries }))
}
fn parse_link(&mut self) -> Result<Token, LexerError> {
let bracket_pos = self.position;
self.advance(); let label_text_start = self.position;
let content = self.parse_nested_content(|c| c == ']', ParseContext::Inline)?;
let label_text_end = self.position;
if self.position >= self.input.len() || self.current_char() != ']' {
let only_flat_text = content
.iter()
.all(|t| matches!(t, Token::Text(_)));
if only_flat_text {
let mut s = String::from("[");
for t in &content {
if let Token::Text(t) = t {
s.push_str(t);
}
}
return Ok(Token::Text(s));
}
self.pending_hard_break = false;
for t in content {
self.pending.push_back(t);
}
return Ok(Token::Text("[".to_string()));
}
if content
.iter()
.any(|t| matches!(t, Token::Link { .. }))
{
self.advance(); for t in content {
self.pending.push_back(t);
}
self.pending.push_back(Token::Text("]".to_string()));
return Ok(Token::Text("[".to_string()));
}
self.advance();
if self.position < self.input.len() && self.current_char() == '(' {
let save = self.position;
self.advance(); let (url, title) = self.read_link_destination_and_title();
if self.position < self.input.len() && self.current_char() == ')' {
self.advance(); let mut content = content;
resolve_emphasis(&mut content);
return Ok(Token::Link { content, url, title });
}
self.position = save;
}
let raw_label_text: String = self.input[label_text_start..label_text_end]
.iter()
.collect();
if self.position < self.input.len() && self.current_char() == '[' {
let second_bracket = self.position;
self.advance(); let label_str = self.read_until_char_with_escapes(']');
let saw_closing_bracket = self.position < self.input.len()
&& self.current_char() == ']';
if saw_closing_bracket {
self.advance();
}
let key = if label_str.trim().is_empty() {
normalize_label(&raw_label_text)
} else {
normalize_label(&label_str)
};
if let Some((url, title)) = self.definitions.get(&key).cloned() {
let mut content = content;
resolve_emphasis(&mut content);
return Ok(Token::Link { content, url, title });
}
let only_text = content
.iter()
.all(|t| matches!(t, Token::Text(_) | Token::Newline));
if label_str.trim().is_empty() {
let text_str = Token::collect_all_text(&content);
let bracket_label = if !saw_closing_bracket {
String::new()
} else {
"[]".to_string()
};
if only_text {
return Ok(Token::Text(format!(
"[{}]{}",
text_str, bracket_label
)));
}
self.pending_hard_break = false;
self.position = bracket_pos + 1;
return Ok(Token::Text("[".to_string()));
}
self.position = second_bracket;
let text_str = Token::collect_all_text(&content);
if only_text {
return Ok(Token::Text(format!("[{}]", text_str)));
}
self.pending_hard_break = false;
self.position = bracket_pos + 1;
return Ok(Token::Text("[".to_string()));
}
let key = normalize_label(&raw_label_text);
if let Some((url, title)) = self.definitions.get(&key).cloned() {
let mut content = content;
resolve_emphasis(&mut content);
return Ok(Token::Link { content, url, title });
}
let only_text = content
.iter()
.all(|t| matches!(t, Token::Text(_) | Token::Newline));
if only_text {
let text_str = Token::collect_all_text(&content);
return Ok(Token::Text(format!("[{}]", text_str)));
}
self.pending_hard_break = false;
self.position = bracket_pos + 1;
Ok(Token::Text("[".to_string()))
}
fn read_link_url_plain(&mut self) -> String {
let mut url = String::new();
let mut depth: i32 = 0;
while self.position < self.input.len() {
let c = self.current_char();
if c == '\\' && self.position + 1 < self.input.len() {
let next = self.input[self.position + 1];
if is_ascii_punctuation(next) {
url.push(next);
self.advance();
self.advance();
continue;
}
}
if c == '&' {
if let Some((decoded, consumed)) =
try_decode_entity(&self.input, self.position)
{
url.push_str(&decoded);
for _ in 0..consumed {
self.advance();
}
continue;
}
}
if c == '\n' {
break;
}
if c == '(' {
depth += 1;
} else if c == ')' {
if depth == 0 {
break;
}
depth -= 1;
} else if (c == ' ' || c == '\t') && depth == 0 {
let mut p = self.position;
while p < self.input.len()
&& (self.input[p] == ' ' || self.input[p] == '\t')
{
p += 1;
}
if p < self.input.len() {
let next = self.input[p];
if next == '"' || next == '\'' || next == '(' {
break;
}
}
break;
}
url.push(c);
self.advance();
}
url.trim_end().to_string()
}
fn read_link_destination_and_title(&mut self) -> (String, Option<String>) {
while self.position < self.input.len()
&& (self.current_char() == ' ' || self.current_char() == '\t')
{
self.advance();
}
let url = if self.position < self.input.len() && self.current_char() == '<' {
let save_pos = self.position;
self.advance(); let mut s = String::new();
let mut ok = false;
while self.position < self.input.len() {
let c = self.current_char();
if c == '\\' && self.position + 1 < self.input.len() {
let next = self.input[self.position + 1];
if is_ascii_punctuation(next) {
s.push(next);
self.advance();
self.advance();
continue;
}
}
if c == '&' {
if let Some((decoded, consumed)) =
try_decode_entity(&self.input, self.position)
{
s.push_str(&decoded);
for _ in 0..consumed {
self.advance();
}
continue;
}
}
if c == '>' {
self.advance();
ok = true;
break;
}
if c == '<' || c == '\n' {
break;
}
s.push(c);
self.advance();
}
if ok {
s
} else {
let _ = save_pos;
s
}
} else {
self.read_link_url_plain()
};
let mut newlines_between = 0usize;
while self.position < self.input.len() {
match self.current_char() {
' ' | '\t' => self.advance(),
'\n' => {
newlines_between += 1;
if newlines_between > 1 {
break;
}
self.advance();
}
_ => break,
}
}
let title = if self.position < self.input.len() && newlines_between <= 1 {
match self.current_char() {
'"' => Some(self.read_title_delimited('"', '"')),
'\'' => Some(self.read_title_delimited('\'', '\'')),
'(' => Some(self.read_title_delimited('(', ')')),
_ => None,
}
} else {
None
};
let mut trailing_newlines = 0usize;
while self.position < self.input.len() {
match self.current_char() {
' ' | '\t' => self.advance(),
'\n' => {
trailing_newlines += 1;
if trailing_newlines > 1 {
break;
}
self.advance();
}
_ => break,
}
}
(url, title)
}
fn read_title_delimited(&mut self, _open: char, close: char) -> String {
self.advance(); let mut out = String::new();
while self.position < self.input.len() && self.current_char() != close {
let ch = self.current_char();
if ch == '\n' {
break;
}
if ch == '\\' && self.position + 1 < self.input.len() {
let next = self.input[self.position + 1];
if is_ascii_punctuation(next) {
out.push(next);
self.advance();
self.advance();
continue;
}
}
if ch == '&' {
if let Some((decoded, consumed)) =
try_decode_entity(&self.input, self.position)
{
out.push_str(&decoded);
for _ in 0..consumed {
self.advance();
}
continue;
}
}
out.push(ch);
self.advance();
}
if self.position < self.input.len() && self.current_char() == close {
self.advance(); }
out
}
fn parse_image(&mut self) -> Result<Token, LexerError> {
let start_pos = self.position;
self.advance();
if self.position >= self.input.len() || self.current_char() != '[' {
self.position = start_pos;
return self.parse_text(ParseContext::Inline);
}
self.advance();
let alt_text_start = self.position;
let alt_text_end = {
let mut depth: i32 = 1;
let mut p = self.position;
while p < self.input.len() {
match self.input[p] {
'\\' if p + 1 < self.input.len()
&& is_ascii_punctuation(self.input[p + 1]) =>
{
p += 2;
continue;
}
'[' => depth += 1,
']' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
p += 1;
}
if depth != 0 {
let alt = self.parse_nested_content(|c| c == ']', ParseContext::Inline)?;
let mut s = String::from("![");
s.push_str(&Token::collect_all_text(&alt));
return Ok(Token::Text(s));
}
p
};
let alt_chars: Vec<char> = self.input[alt_text_start..alt_text_end]
.iter()
.copied()
.collect();
let alt_input: String = alt_chars.iter().collect();
let mut sub_alt = self.sub_lexer(alt_input);
sub_alt.definitions = self.definitions.clone();
let alt = sub_alt.parse_with_context(ParseContext::Inline)?;
self.position = alt_text_end;
self.advance();
if self.position < self.input.len() && self.current_char() == '(' {
self.advance(); let (url, title) = self.read_link_destination_and_title();
if self.position < self.input.len() && self.current_char() == ')' {
self.advance(); }
let mut alt = alt;
resolve_emphasis(&mut alt);
return Ok(Token::Image { alt, url, title });
}
let raw_alt_text: String = self.input[alt_text_start..alt_text_end]
.iter()
.collect();
if self.position < self.input.len() && self.current_char() == '[' {
self.advance();
let label_str = self.read_until_char_with_escapes(']');
if self.position < self.input.len() && self.current_char() == ']' {
self.advance();
}
let alt_text = Token::collect_all_text(&alt);
let key = if label_str.trim().is_empty() {
normalize_label(&raw_alt_text)
} else {
normalize_label(&label_str)
};
if let Some((url, title)) = self.definitions.get(&key).cloned() {
let mut alt = alt;
resolve_emphasis(&mut alt);
return Ok(Token::Image { alt, url, title });
}
let display_label = decode_escapes_and_entities(&label_str);
let bracket_label = if label_str.is_empty() {
"[]".to_string()
} else {
format!("[{}]", display_label)
};
return Ok(Token::Text(format!("![{}]{}", alt_text, bracket_label)));
}
let key = normalize_label(&raw_alt_text);
if let Some((url, title)) = self.definitions.get(&key).cloned() {
let mut alt = alt;
resolve_emphasis(&mut alt);
return Ok(Token::Image { alt, url, title });
}
let alt_text = Token::collect_all_text(&alt);
Ok(Token::Text(format!("![{}]", alt_text)))
}
fn try_match_html_tag_len(&self) -> Option<usize> {
if self.current_char() != '<' {
return None;
}
let chars = &self.input;
let start = self.position;
let mut p = start + 1;
if p >= chars.len() {
return None;
}
let is_closing = chars[p] == '/';
if is_closing {
p += 1;
if p >= chars.len() || !chars[p].is_ascii_alphabetic() {
return None;
}
} else {
if !chars[p].is_ascii_alphabetic() {
return None;
}
}
while p < chars.len()
&& (chars[p].is_ascii_alphanumeric() || chars[p] == '-')
{
p += 1;
}
if is_closing {
while p < chars.len() && (chars[p] == ' ' || chars[p] == '\t') {
p += 1;
}
if chars.get(p) == Some(&'>') {
return Some(p - start + 1);
}
return None;
}
loop {
let ws_start = p;
while p < chars.len()
&& (chars[p] == ' ' || chars[p] == '\t' || chars[p] == '\n')
{
p += 1;
}
if p >= chars.len() {
return None;
}
if chars[p] == '>' {
return Some(p - start + 1);
}
if chars[p] == '/' {
p += 1;
if chars.get(p) == Some(&'>') {
return Some(p - start + 1);
}
return None;
}
if p == ws_start {
return None;
}
if !(chars[p].is_ascii_alphabetic() || chars[p] == '_' || chars[p] == ':') {
return None;
}
p += 1;
while p < chars.len()
&& (chars[p].is_ascii_alphanumeric()
|| chars[p] == '_'
|| chars[p] == ':'
|| chars[p] == '-'
|| chars[p] == '.')
{
p += 1;
}
let attr_end = p;
while p < chars.len() && (chars[p] == ' ' || chars[p] == '\t') {
p += 1;
}
if chars.get(p) == Some(&'=') {
p += 1;
while p < chars.len() && (chars[p] == ' ' || chars[p] == '\t') {
p += 1;
}
if p >= chars.len() {
return None;
}
match chars[p] {
'"' => {
p += 1;
while p < chars.len() && chars[p] != '"' {
p += 1;
}
if chars.get(p) != Some(&'"') {
return None;
}
p += 1;
}
'\'' => {
p += 1;
while p < chars.len() && chars[p] != '\'' {
p += 1;
}
if chars.get(p) != Some(&'\'') {
return None;
}
p += 1;
}
_ => {
if "\"'=<>`".contains(chars[p]) {
return None;
}
while p < chars.len()
&& !chars[p].is_whitespace()
&& !"\"'=<>`".contains(chars[p])
{
p += 1;
}
}
}
} else {
p = attr_end;
}
}
}
fn try_match_inline_raw_html_special(&self) -> Option<usize> {
if self.current_char() != '<' {
return None;
}
let pos = self.position;
let chars = &self.input;
if pos + 1 >= chars.len() {
return None;
}
if chars[pos + 1] == '?' {
let mut p = pos + 2;
while p + 1 < chars.len() {
if chars[p] == '?' && chars[p + 1] == '>' {
return Some(p + 2 - pos);
}
p += 1;
}
return None;
}
if pos + 8 < chars.len()
&& chars[pos + 1] == '!'
&& chars[pos + 2] == '['
&& chars[pos + 3] == 'C'
&& chars[pos + 4] == 'D'
&& chars[pos + 5] == 'A'
&& chars[pos + 6] == 'T'
&& chars[pos + 7] == 'A'
&& chars[pos + 8] == '['
{
let mut p = pos + 9;
while p + 2 < chars.len() {
if chars[p] == ']' && chars[p + 1] == ']' && chars[p + 2] == '>' {
return Some(p + 3 - pos);
}
p += 1;
}
return None;
}
if pos + 2 < chars.len()
&& chars[pos + 1] == '!'
&& chars[pos + 2].is_ascii_alphabetic()
{
let mut p = pos + 3;
while p < chars.len() {
if chars[p] == '>' {
return Some(p + 1 - pos);
}
p += 1;
}
return None;
}
None
}
fn looks_like_autolink_start(&self) -> bool {
if self.current_char() != '<' {
return false;
}
let start = self.position + 1;
let mut p = start;
while p < self.input.len() {
let c = self.input[p];
if c == '>' {
break;
}
if c == '\n' || c == ' ' || c == '\t' || c == '<' {
return false;
}
p += 1;
}
if p >= self.input.len() || self.input[p] != '>' {
return false;
}
let body: String = self.input[start..p].iter().collect();
if body.is_empty() {
return false;
}
let has_scheme = {
let mut chars = body.chars();
let first = chars.next();
matches!(first, Some(c) if c.is_ascii_alphabetic())
&& body.contains(':')
};
if has_scheme {
return true;
}
if let Some(at_pos) = body.find('@') {
let (local, domain) = body.split_at(at_pos);
let domain = &domain[1..];
if !local.is_empty() && domain.contains('.') {
return true;
}
}
false
}
fn try_parse_autolink(&mut self) -> Option<Token> {
if self.current_char() != '<' {
return None;
}
let start = self.position + 1;
let mut p = start;
while p < self.input.len() {
let c = self.input[p];
if c == '>' {
break;
}
if c == '\n' || c == ' ' || c == '\t' || c == '<' {
return None;
}
p += 1;
}
if p >= self.input.len() || self.input[p] != '>' {
return None;
}
let body: String = self.input[start..p].iter().collect();
if body.is_empty() {
return None;
}
let mut chars = body.chars();
let first = chars.next();
let is_url_scheme = matches!(first, Some(c) if c.is_ascii_alphabetic())
&& {
let mut found_colon = false;
let mut scheme_len = 1;
for c in chars {
if c == ':' {
found_colon = true;
break;
}
if c.is_ascii_alphanumeric() || c == '+' || c == '-' || c == '.' {
scheme_len += 1;
} else {
break;
}
}
found_colon && scheme_len >= 2
};
let is_email = !is_url_scheme && body.contains('@') && {
let mut parts = body.splitn(2, '@');
let local = parts.next().unwrap_or("");
let domain = parts.next().unwrap_or("");
let local_ok = !local.is_empty()
&& local.chars().all(|c| {
c.is_ascii_alphanumeric()
|| matches!(
c,
'.' | '!' | '#' | '$' | '%' | '&' | '\'' | '*'
| '+' | '/' | '=' | '?' | '^' | '_' | '`' | '{'
| '|' | '}' | '~' | '-'
)
});
let domain_ok = !domain.is_empty()
&& domain.split('.').all(|label| {
!label.is_empty()
&& label.len() <= 63
&& !label.starts_with('-')
&& !label.ends_with('-')
&& label.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
});
local_ok && domain_ok && domain.contains('.')
};
if !is_url_scheme && !is_email {
return None;
}
self.position = p + 1;
Some(if is_email {
Token::Link {
content: vec![Token::Text(body.clone())],
url: format!("mailto:{}", body),
title: None,
}
} else {
Token::Link {
content: vec![Token::Text(body.clone())],
url: body,
title: None,
}
})
}
fn parse_newline(&mut self) -> Result<Token, LexerError> {
self.advance();
Ok(Token::Newline)
}
fn parse_text(&mut self, ctx: ParseContext) -> Result<Token, LexerError> {
let mut content = String::new();
let start_pos = self.position;
if self.position > 0 && self.current_char() == ' ' {
content.push(' ');
self.advance();
}
let mut last_was_escape = false;
while self.position < self.input.len() {
let ch = self.current_char();
if ch == '\\' && self.position + 1 < self.input.len() {
let next = self.input[self.position + 1];
if is_ascii_punctuation(next) {
content.push(next);
self.advance();
self.advance();
last_was_escape = true;
continue;
}
}
if ch == '&' {
if let Some((decoded, consumed)) =
try_decode_entity(&self.input, self.position)
{
content.push_str(&decoded);
for _ in 0..consumed {
self.advance();
}
last_was_escape = false;
continue;
}
}
if ch == '\n' || self.is_start_of_special_token(ctx) {
break;
}
content.push(ch);
self.advance();
last_was_escape = false;
}
if self.position < self.input.len()
&& self.current_char() == '\n'
&& !self.in_heading
{
let has_follow = self.has_content_after_newline(self.position);
if content.ends_with(" ") && has_follow {
while content.ends_with(' ') {
content.pop();
}
self.advance();
self.pending_hard_break = true;
} else if !last_was_escape
&& content.ends_with('\\')
&& has_follow
{
content.pop();
self.advance();
self.pending_hard_break = true;
} else {
while content.ends_with(' ') {
content.pop();
}
}
}
if content.is_empty() {
if self.position < self.input.len() {
let c = self.current_char();
content.push(c);
self.advance();
} else {
let (line, column) = self.pos_to_line_col(start_pos);
return Err(LexerError::UnknownToken {
message: "Unexpected character".to_string(),
line,
column,
});
}
}
Ok(Token::Text(content))
}
fn parse_html_comment(&mut self) -> Result<Token, LexerError> {
let opener = self.position;
self.position += 4;
if self.position < self.input.len() && self.input[self.position] == '>' {
self.position += 1;
return Ok(Token::HtmlComment(String::new()));
}
if self.position + 1 < self.input.len()
&& self.input[self.position] == '-'
&& self.input[self.position + 1] == '>'
{
self.position += 2;
return Ok(Token::HtmlComment("-".to_string()));
}
let start = self.position;
while self.position + 2 < self.input.len() {
if self.input[self.position] == '-'
&& self.input[self.position + 1] == '-'
&& self.input[self.position + 2] == '>'
{
break;
}
self.advance();
}
if self.position + 2 < self.input.len() {
let comment: String = self.input[start..self.position].iter().collect();
self.position += 3; Ok(Token::HtmlComment(comment))
} else {
let raw: String = self.input[opener..].iter().collect();
self.position = self.input.len();
Ok(Token::Text(raw))
}
}
fn try_parse_html_block(&mut self) -> Option<Token> {
let block_start = {
let mut p = self.position;
while p > 0 && self.input[p - 1] == ' ' {
p -= 1;
}
p
};
if self.input[self.position] == '<'
&& self.is_raw_html_block_opener_at(self.position + 1)
{
let end = self.scan_to_raw_html_block_close(self.position);
let content: String = self.input[block_start..end].iter().collect();
self.position = end;
return Some(Token::HtmlBlock(content));
}
if self.position + 3 < self.input.len()
&& self.input[self.position] == '<'
&& self.input[self.position + 1] == '!'
&& self.input[self.position + 2] == '-'
&& self.input[self.position + 3] == '-'
{
let end = self.scan_html_block_to_terminator(self.position, "-->")?;
let content: String = self.input[block_start..end].iter().collect();
self.position = end;
return Some(Token::HtmlBlock(content));
}
if self.position + 2 < self.input.len()
&& self.input[self.position] == '<'
&& self.input[self.position + 1] == '!'
&& self.input[self.position + 2].is_ascii_alphabetic()
{
let end = self.scan_html_block_to_terminator(self.position, ">")?;
let content: String = self.input[block_start..end].iter().collect();
self.position = end;
return Some(Token::HtmlBlock(content));
}
if self.position + 8 < self.input.len()
&& self.input[self.position] == '<'
&& self.input[self.position + 1] == '!'
&& self.input[self.position + 2] == '['
&& self.input[self.position + 3] == 'C'
&& self.input[self.position + 4] == 'D'
&& self.input[self.position + 5] == 'A'
&& self.input[self.position + 6] == 'T'
&& self.input[self.position + 7] == 'A'
&& self.input[self.position + 8] == '['
{
let end = self.scan_html_block_to_terminator(self.position, "]]>")?;
let content: String = self.input[block_start..end].iter().collect();
self.position = end;
return Some(Token::HtmlBlock(content));
}
if self.position + 1 < self.input.len()
&& self.input[self.position] == '<'
&& self.input[self.position + 1] == '?'
{
let end = self.scan_html_block_to_terminator(self.position, "?>")?;
let content: String = self.input[block_start..end].iter().collect();
self.position = end;
return Some(Token::HtmlBlock(content));
}
if self.input[self.position] == '<'
&& self.is_block_element_opener_at(self.position)
{
let mut after_opener_line = self.position;
while after_opener_line < self.input.len()
&& self.input[after_opener_line] != '\n'
{
after_opener_line += 1;
}
if after_opener_line < self.input.len() {
after_opener_line += 1;
}
let end = self.scan_to_blank_line(after_opener_line);
let content: String = self.input[block_start..end].iter().collect();
self.position = end;
return Some(Token::HtmlBlock(content));
}
if self.input[self.position] == '<' && self.previous_line_is_blank_or_bof() {
if let Some(tag_name) = self.extract_html_tag_name_at(self.position) {
let name_lower = tag_name.to_ascii_lowercase();
let is_block_element = BLOCK_ELEMENT_TAG_NAMES
.iter()
.any(|t| t.eq_ignore_ascii_case(&name_lower));
let is_raw_content = RAW_HTML_BLOCK_TAG_NAMES
.iter()
.any(|t| t.eq_ignore_ascii_case(&name_lower));
if !is_block_element && !is_raw_content {
if let Some(tag_len) = self.try_match_html_tag_len() {
let after_tag = self.position + tag_len;
let tag_spans_newline = self.input[self.position..after_tag]
.iter()
.any(|c| *c == '\n');
if !tag_spans_newline
&& self.is_only_whitespace_to_eol(after_tag)
{
let mut after_opener_line = after_tag;
while after_opener_line < self.input.len()
&& self.input[after_opener_line] != '\n'
{
after_opener_line += 1;
}
if after_opener_line < self.input.len() {
after_opener_line += 1;
}
let end = self.scan_to_blank_line(after_opener_line);
let content: String = self.input[block_start..end].iter().collect();
self.position = end;
return Some(Token::HtmlBlock(content));
}
}
}
}
}
None
}
fn scan_html_block_to_terminator(&self, start: usize, terminator: &str) -> Option<usize> {
let term: Vec<char> = terminator.chars().collect();
let mut p = start;
while p + term.len() <= self.input.len() {
if self.input[p..p + term.len()] == term[..] {
let after = p + term.len();
let mut tail = after;
while tail < self.input.len() && self.input[tail] != '\n' {
tail += 1;
}
if tail < self.input.len() {
tail += 1; }
return Some(tail);
}
p += 1;
}
None
}
fn is_raw_html_block_opener_at(&self, pos: usize) -> bool {
const TAGS: &[&str] = &["script", "pre", "style", "textarea"];
for &tag in TAGS {
let len = tag.chars().count();
if pos + len > self.input.len() {
continue;
}
let ok = self.input[pos..pos + len]
.iter()
.zip(tag.chars())
.all(|(a, b)| a.eq_ignore_ascii_case(&b));
if !ok {
continue;
}
match self.input.get(pos + len).copied() {
None | Some(' ') | Some('\t') | Some('\n') | Some('>') => return true,
_ => continue,
}
}
false
}
fn extract_html_tag_name_at(&self, pos: usize) -> Option<String> {
if pos >= self.input.len() || self.input[pos] != '<' {
return None;
}
let mut p = pos + 1;
if p < self.input.len() && self.input[p] == '/' {
p += 1;
}
if p >= self.input.len() || !self.input[p].is_ascii_alphabetic() {
return None;
}
let name_start = p;
while p < self.input.len()
&& (self.input[p].is_ascii_alphanumeric() || self.input[p] == '-')
{
p += 1;
}
let name: String = self.input[name_start..p].iter().collect();
Some(name.to_ascii_lowercase())
}
fn is_block_element_opener_at(&self, pos: usize) -> bool {
if pos >= self.input.len() || self.input[pos] != '<' {
return false;
}
let mut p = pos + 1;
if p < self.input.len() && self.input[p] == '/' {
p += 1;
}
if p >= self.input.len() || !self.input[p].is_ascii_alphabetic() {
return false;
}
let name_start = p;
while p < self.input.len()
&& (self.input[p].is_ascii_alphanumeric() || self.input[p] == '-')
{
p += 1;
}
let name: String = self.input[name_start..p].iter().collect();
let name_lower = name.to_ascii_lowercase();
if !BLOCK_ELEMENT_TAG_NAMES
.iter()
.any(|t| *t == name_lower.as_str())
{
return false;
}
match self.input.get(p).copied() {
None | Some(' ') | Some('\t') | Some('\n') | Some('>') => true,
Some('/') => self.input.get(p + 1).copied() == Some('>'),
_ => false,
}
}
fn is_only_whitespace_to_eol(&self, pos: usize) -> bool {
let mut p = pos;
while p < self.input.len() && self.input[p] != '\n' {
if self.input[p] != ' ' && self.input[p] != '\t' {
return false;
}
p += 1;
}
true
}
fn scan_to_blank_line(&self, start: usize) -> usize {
let mut p = start;
while p < self.input.len() {
let line_start = p;
let mut line_end = line_start;
while line_end < self.input.len() && self.input[line_end] != '\n' {
line_end += 1;
}
let is_blank = self.input[line_start..line_end]
.iter()
.all(|c| *c == ' ' || *c == '\t');
if is_blank {
return line_start;
}
p = if line_end < self.input.len() {
line_end + 1
} else {
line_end
};
}
self.input.len()
}
fn scan_to_raw_html_block_close(&self, start: usize) -> usize {
const CLOSERS: &[&str] = &["</script>", "</pre>", "</style>", "</textarea>"];
let mut line_start = start;
while line_start < self.input.len() {
let mut line_end = line_start;
while line_end < self.input.len() && self.input[line_end] != '\n' {
line_end += 1;
}
let line_lower: String = self.input[line_start..line_end]
.iter()
.flat_map(|c| c.to_lowercase())
.collect();
for &closer in CLOSERS {
if line_lower.contains(closer) {
return if line_end < self.input.len() {
line_end + 1
} else {
line_end
};
}
}
line_start = if line_end < self.input.len() {
line_end + 1
} else {
line_end
};
}
self.input.len()
}
fn is_at_line_start(&self) -> bool {
self.position == 0 || self.input.get(self.position - 1) == Some(&'\n')
}
fn is_block_marker_start(&self) -> bool {
let mut p = self.position;
let mut spaces = 0usize;
while p > 0 {
match self.input[p - 1] {
' ' => {
spaces += 1;
if spaces > 3 {
return false;
}
p -= 1;
}
'\n' => return true,
_ => return false,
}
}
true
}
fn skip_whitespace(&mut self) {
while self.position < self.input.len()
&& self.current_char().is_whitespace()
&& self.current_char() != '\n'
{
self.advance();
}
}
fn advance(&mut self) {
self.position += 1;
}
fn current_char(&self) -> char {
*self.input.get(self.position).unwrap_or(&'\0')
}
fn read_until_newline(&mut self) -> String {
let start = self.position;
while self.position < self.input.len() && self.current_char() != '\n' {
self.advance();
}
self.input[start..self.position].iter().collect()
}
fn read_until_char_with_escapes(&mut self, delimiter: char) -> String {
let mut out = String::new();
while self.position < self.input.len() {
let ch = self.current_char();
if ch == '\\' && self.position + 1 < self.input.len() {
let next = self.input[self.position + 1];
if is_ascii_punctuation(next) {
out.push('\\');
out.push(next);
self.advance();
self.advance();
continue;
}
}
if ch == delimiter {
break;
}
out.push(ch);
self.advance();
}
out
}
fn is_html_comment_start(&self) -> bool {
let p = self.position;
p + 3 < self.input.len()
&& self.input[p] == '<'
&& self.input[p + 1] == '!'
&& self.input[p + 2] == '-'
&& self.input[p + 3] == '-'
}
fn is_start_of_special_token(&self, ctx: ParseContext) -> bool {
let ch = self.current_char();
match ch {
'*' | '`' | '[' => true,
']' if matches!(ctx, ParseContext::Inline) => true,
'_' => !self.is_intra_word_underscore_run(self.position),
'~' => self.count_consecutive('~') >= 2,
'=' => self.count_consecutive('=') >= 2,
'$' => self.scan_math().is_some(),
'^' => {
self.position + 1 < self.input.len()
&& self.input[self.position + 1] == '['
}
'!' => {
if self.position + 1 < self.input.len() {
self.input[self.position + 1] == '['
} else {
false
}
}
'<' => {
if matches!(ctx, ParseContext::Root) && self.is_html_comment_start() {
return true;
}
if self.looks_like_autolink_start() {
return true;
}
if self.try_match_html_tag_len().is_some() {
return true;
}
self.try_match_inline_raw_html_special().is_some()
}
_ => false,
}
}
pub fn pos_to_line_col(&self, pos: usize) -> (usize, usize) {
let mut line = 1usize;
let mut col = 1usize;
let limit = pos.min(self.input.len());
for ch in &self.input[..limit] {
if *ch == '\n' {
line += 1;
col = 1;
} else {
col += 1;
}
}
(line, col)
}
fn is_intra_word_underscore_run(&self, pos: usize) -> bool {
if self.input.get(pos) != Some(&'_') {
return false;
}
let mut start = pos;
while start > 0 && self.input[start - 1] == '_' {
start -= 1;
}
let mut end = pos;
while end + 1 < self.input.len() && self.input[end + 1] == '_' {
end += 1;
}
let before = if start == 0 {
None
} else {
self.input.get(start - 1).copied()
};
let after = self.input.get(end + 1).copied();
matches!(
(before, after),
(Some(a), Some(b)) if a.is_alphanumeric() && b.is_alphanumeric()
)
}
fn is_after_special_token(&self) -> bool {
if self.position == 0 {
return false;
}
matches!(
self.input[self.position - 1],
'`' | ')' | ']' | '>' | '*' | '_' | '~' | '=' | '$'
)
}
fn has_content_after_newline(&self, pos: usize) -> bool {
let mut p = pos + 1;
while p < self.input.len() {
match self.input[p] {
'\n' => return false,
' ' | '\t' => p += 1,
_ => return true,
}
}
false
}
fn check_horizontal_rule(&mut self) -> Result<bool, LexerError> {
if self.current_char() == '-' {
let mut count = 1;
let mut pos = self.position + 1;
while pos < self.input.len() && self.input[pos] == '-' {
count += 1;
pos += 1;
}
if count < 3 {
return Ok(false);
}
let mut tail = pos;
while tail < self.input.len() && self.input[tail] != '\n' {
if self.input[tail] != ' ' && self.input[tail] != '\t' {
return Ok(false);
}
tail += 1;
}
self.position = pos;
return Ok(true);
}
Ok(false)
}
fn is_thematic_break_line(&self) -> bool {
let mut p = self.position;
let mut leading = 0usize;
while p < self.input.len() && self.input[p] == ' ' && leading < 3 {
p += 1;
leading += 1;
}
let marker = match self.input.get(p) {
Some(&c) if c == '-' || c == '*' || c == '_' => c,
_ => return false,
};
let mut count = 0usize;
while p < self.input.len() && self.input[p] != '\n' {
let c = self.input[p];
if c == marker {
count += 1;
} else if c == ' ' || c == '\t' {
} else {
return false;
}
p += 1;
}
count >= 3
}
fn consume_current_line(&mut self) {
while self.position < self.input.len() && self.current_char() != '\n' {
self.advance();
}
if self.position < self.input.len() && self.current_char() == '\n' {
self.advance();
}
}
fn peek_setext_level(&self) -> Option<usize> {
if self.suppress_setext {
return None;
}
let scan_start = {
let mut p = self.position;
let mut leading = 0usize;
while p < self.input.len() && self.input[p] == ' ' && leading < 3 {
p += 1;
leading += 1;
}
p
};
if scan_start < self.input.len() {
let c = self.input[scan_start];
if c == '-' || c == '+' || c == '*' {
if let Some(&n) = self.input.get(scan_start + 1) {
if n == ' ' || n == '\t' || n == '\n' {
return None;
}
} else {
return None;
}
}
if c.is_ascii_digit() {
let mut q = scan_start;
while q < self.input.len() && self.input[q].is_ascii_digit() {
q += 1;
}
if q < self.input.len()
&& (self.input[q] == '.' || self.input[q] == ')')
{
if let Some(&n) = self.input.get(q + 1) {
if n == ' ' || n == '\t' || n == '\n' {
return None;
}
}
}
}
if c == '#' {
let savepos = self.position;
let _ = savepos;
let mut q = scan_start;
let mut hashes = 0usize;
while q < self.input.len() && self.input[q] == '#' {
hashes += 1;
q += 1;
}
if (1..=6).contains(&hashes) {
if q >= self.input.len() {
return None;
}
let n = self.input[q];
if n == ' ' || n == '\t' || n == '\n' {
return None;
}
}
}
if c == '>' {
return None;
}
}
let mut p = self.position;
let mut lines_seen = 0usize;
loop {
let mut has_content = false;
while p < self.input.len() && self.input[p] != '\n' {
if !self.input[p].is_whitespace() {
has_content = true;
}
p += 1;
}
if !has_content {
return None;
}
lines_seen += 1;
if p >= self.input.len() {
return None;
}
p += 1;
let next_line_start = p;
let mut leading = 0usize;
while p < self.input.len() && self.input[p] == ' ' && leading < 3 {
p += 1;
leading += 1;
}
let underline_char = match self.input.get(p) {
Some(&'=') => Some('='),
Some(&'-') => Some('-'),
_ => None,
};
if let Some(ch) = underline_char {
let mut count = 0usize;
let mut q = p;
while q < self.input.len() && self.input[q] == ch {
count += 1;
q += 1;
}
if count > 0 {
let mut r = q;
while r < self.input.len()
&& (self.input[r] == ' ' || self.input[r] == '\t')
{
r += 1;
}
if r >= self.input.len() || self.input[r] == '\n' {
return Some(if ch == '=' { 1 } else { 2 });
}
}
}
if p >= self.input.len() {
return None;
}
let c = self.input[p];
if matches!(c, '-' | '+' | '*') {
if let Some(&n) = self.input.get(p + 1) {
if n == ' ' || n == '\t' {
return None;
}
}
}
if c.is_ascii_digit() {
let mut q = p;
while q < self.input.len() && self.input[q].is_ascii_digit() {
q += 1;
}
if q < self.input.len()
&& (self.input[q] == '.' || self.input[q] == ')')
{
if let Some(&n) = self.input.get(q + 1) {
if n == ' ' || n == '\t' {
return None;
}
}
}
}
if c == '>' || c == '#' || c == '`' || c == '~' {
return None;
}
p = next_line_start;
if lines_seen > 100 {
return None;
}
}
}
fn consume_setext_heading(&mut self, level: usize) -> Result<Token, LexerError> {
let underline_char = if level == 1 { '=' } else { '-' };
let mut content_lines: Vec<String> = Vec::new();
loop {
let line_start = self.position;
while self.position < self.input.len() && self.current_char() != '\n' {
self.advance();
}
let line: String =
self.input[line_start..self.position].iter().collect();
let trimmed = line.trim_start_matches(' ');
let after_leading = line.len() - trimmed.len();
let is_underline = after_leading <= 3
&& !trimmed.is_empty()
&& trimmed.chars().next() == Some(underline_char)
&& trimmed
.chars()
.take_while(|c| *c == underline_char)
.count()
> 0
&& trimmed
.chars()
.skip_while(|c| *c == underline_char)
.all(|c| c == ' ' || c == '\t');
if is_underline {
if self.position < self.input.len() && self.current_char() == '\n' {
self.advance();
}
break;
}
content_lines.push(line);
if self.position < self.input.len() && self.current_char() == '\n' {
self.advance();
} else {
break;
}
}
let joined = content_lines.join("\n");
let mut sub = self.sub_lexer(joined.trim().to_string());
sub.in_heading = true;
sub.definitions = self.definitions.clone();
let content = sub.parse_with_context(ParseContext::Inline)?;
Ok(Token::Heading(content, level))
}
fn check_ordered_list_marker(&mut self) -> Option<usize> {
let start_pos = self.position;
let mut pos = start_pos;
let mut number_str = String::new();
while pos < self.input.len() && self.input[pos].is_ascii_digit() {
number_str.push(self.input[pos]);
pos += 1;
}
if number_str.is_empty() || number_str.len() > 9 {
return None;
}
if pos < self.input.len()
&& (self.input[pos] == '.' || self.input[pos] == ')')
{
let after = pos + 1;
let after_ch = self.input.get(after).copied();
let trailing_ok = match after_ch {
None => self.last_emitted_list_item || self.previous_line_is_blank_or_bof(),
Some(' ') | Some('\t') => true,
Some('\n') | Some('\r') => {
self.last_emitted_list_item || self.previous_line_is_blank_or_bof()
}
_ => false,
};
if !trailing_ok {
return None;
}
if let Ok(number) = number_str.parse::<usize>() {
return Some(number);
}
}
None
}
fn parse_list_item(
&mut self,
ordered: bool,
parent_ctx: ParseContext,
) -> Result<Token, LexerError> {
self.depth = self.depth.saturating_add(1);
if self.depth > MAX_PARSE_DEPTH {
self.depth -= 1;
return Err(self.nesting_error());
}
let result = self.parse_list_item_inner(ordered, parent_ctx);
self.depth -= 1;
result
}
fn parse_list_item_inner(
&mut self,
ordered: bool,
parent_ctx: ParseContext,
) -> Result<Token, LexerError> {
let marker_col = {
let mut p = self.position;
while p > 0 && self.input[p - 1] != '\n' {
p -= 1;
}
let mut col = 0usize;
for &c in &self.input[p..self.position] {
match c {
' ' => col += 1,
'\t' => col += 4 - (col % 4),
_ => col += 1,
}
}
col
};
let mut number = None;
let marker_char: char;
if !ordered {
marker_char = self.current_char();
self.advance();
} else {
number = self.check_ordered_list_marker();
while self.position < self.input.len() && self.current_char().is_ascii_digit() {
self.advance();
}
marker_char = if self.position < self.input.len()
&& (self.current_char() == '.' || self.current_char() == ')')
{
let m = self.current_char();
self.advance();
m
} else {
'.'
};
}
let marker_width = if ordered {
let n = number.unwrap_or(1);
let mut digits = 1usize;
let mut tmp = n;
while tmp >= 10 {
tmp /= 10;
digits += 1;
}
digits + 1 } else {
1 };
let mut probe = self.position;
let mut spaces_after = 0usize;
while probe < self.input.len()
&& (self.input[probe] == ' ' || self.input[probe] == '\t')
{
if self.input[probe] == ' ' {
spaces_after += 1;
} else {
spaces_after += 4 - (spaces_after % 4);
}
probe += 1;
}
let following_is_eol = probe >= self.input.len() || self.input[probe] == '\n';
let separator = if following_is_eol {
1 } else if spaces_after >= 1 && spaces_after <= 4 {
spaces_after
} else {
1
};
let content_offset = marker_col + marker_width + separator;
let first_line_is_indented_code = spaces_after >= 5 && !following_is_eol;
if !first_line_is_indented_code {
self.skip_whitespace();
}
let mut checked: Option<bool> = None;
if self.position + 2 < self.input.len()
&& self.input[self.position] == '['
&& self.input[self.position + 2] == ']'
&& (self.position + 3 >= self.input.len()
|| self.input[self.position + 3] == ' '
|| self.input[self.position + 3] == '\t'
|| self.input[self.position + 3] == '\n')
{
match self.input[self.position + 1] {
' ' => {
checked = Some(false);
self.position += 3;
self.skip_whitespace();
}
'x' | 'X' => {
checked = Some(true);
self.position += 3;
self.skip_whitespace();
}
_ => {}
}
}
let mut content = Vec::new();
if first_line_is_indented_code {
let line_end = (self.position..self.input.len())
.find(|&i| self.input[i] == '\n')
.unwrap_or(self.input.len());
let mut col = marker_col + marker_width;
let mut expanded = String::new();
let mut i = self.position;
while i < line_end {
match self.input[i] {
'\t' => {
let span = 4 - (col % 4);
for _ in 0..span {
expanded.push(' ');
}
col += span;
i += 1;
}
' ' => {
expanded.push(' ');
col += 1;
i += 1;
}
_ => break,
}
}
while i < line_end {
expanded.push(self.input[i]);
i += 1;
}
let stripped: String = expanded.chars().skip(separator).collect();
self.position = line_end;
let mut sub = self.sub_lexer(stripped);
let sub_tokens = sub.parse_with_context(ParseContext::Root)?;
content.extend(sub_tokens);
}
let mut first_line_handled = first_line_is_indented_code;
if !first_line_handled
&& self.position < self.input.len()
&& self.current_char() != '\n'
{
let ch = self.current_char();
if self.is_thematic_break_line() {
self.consume_current_line();
content.push(Token::HorizontalRule);
first_line_handled = true;
} else if ch == '#' && self.is_atx_heading_start() {
content.push(self.parse_heading()?);
first_line_handled = true;
} else if (ch == '`' || ch == '~') && self.count_consecutive(ch) >= 3 {
let line_end = (self.position..self.input.len())
.find(|&i| self.input[i] == '\n')
.unwrap_or(self.input.len());
let first_line: String = self.input[self.position..line_end]
.iter()
.collect();
self.position = if line_end < self.input.len() {
line_end + 1
} else {
line_end
};
let rest = self.collect_list_item_block_content(content_offset);
let full = if rest.is_empty() {
first_line
} else {
format!("{}\n{}", first_line, rest)
};
let mut sub = self.sub_lexer(full);
let sub_tokens = sub.parse_with_context(ParseContext::Root)?;
content.extend(sub_tokens);
first_line_handled = true;
} else if ch == '>' {
let line_end = (self.position..self.input.len())
.find(|&i| self.input[i] == '\n')
.unwrap_or(self.input.len());
let first_line: String = self.input[self.position..line_end]
.iter()
.collect();
self.position = if line_end < self.input.len() {
line_end + 1
} else {
line_end
};
let rest = self.collect_list_item_block_content(content_offset);
let mut full = if rest.is_empty() {
first_line
} else {
format!("{}\n{}", first_line, rest)
};
loop {
if self.position >= self.input.len() {
break;
}
let lz_start = self.position;
let lz_end = (lz_start..self.input.len())
.find(|&i| self.input[i] == '\n')
.unwrap_or(self.input.len());
if self.input[lz_start..lz_end]
.iter()
.all(|&c| c == ' ' || c == '\t')
{
break;
}
let mut cols = 0usize;
let mut q = lz_start;
while q < lz_end
&& (self.input[q] == ' ' || self.input[q] == '\t')
{
if self.input[q] == ' ' {
cols += 1;
} else {
cols += 4 - (cols % 4);
}
q += 1;
}
if cols >= content_offset {
break;
}
if self.line_starts_new_block_at(q) {
break;
}
if !full.is_empty() {
full.push('\n');
}
for c in &self.input[lz_start..lz_end] {
full.push(*c);
}
self.position = if lz_end < self.input.len() {
lz_end + 1
} else {
lz_end
};
}
let mut sub = self.sub_lexer(full);
let sub_tokens = sub.parse_with_context(ParseContext::Root)?;
content.extend(sub_tokens);
first_line_handled = true;
} else if (ch == '-' || ch == '+') && self.is_list_marker(ch) {
content.push(self.parse_list_item(false, parent_ctx)?);
first_line_handled = true;
} else if ch == '*' && self.is_list_marker('*') {
content.push(self.parse_list_item(false, parent_ctx)?);
first_line_handled = true;
} else if ch.is_ascii_digit() && self.check_ordered_list_marker().is_some() {
content.push(self.parse_list_item(true, parent_ctx)?);
first_line_handled = true;
}
}
if !first_line_handled {
while self.position < self.input.len() && self.current_char() != '\n' {
if let Some(token) = self.next_token(ParseContext::ListItem)? {
content.push(token);
}
}
}
if self.position < self.input.len() && self.current_char() == '\n' {
self.advance();
}
if !first_line_handled
&& !content.is_empty()
&& content
.iter()
.all(|t| !matches!(t, Token::HorizontalRule | Token::Heading(_, _)))
{
let next_line_start = self.position;
let mut p = next_line_start;
let mut indent_cols = 0usize;
while p < self.input.len() && self.input[p] == ' ' {
p += 1;
indent_cols += 1;
}
if indent_cols >= content_offset
&& p < self.input.len()
&& (self.input[p] == '=' || self.input[p] == '-')
{
let underline_char = self.input[p];
let underline_start = p;
while p < self.input.len() && self.input[p] == underline_char {
p += 1;
}
let run_len = p - underline_start;
let mut tail = p;
while tail < self.input.len()
&& (self.input[tail] == ' ' || self.input[tail] == '\t')
{
tail += 1;
}
let ends_line =
tail >= self.input.len() || self.input[tail] == '\n';
if run_len >= 1 && ends_line {
let level = if underline_char == '=' { 1 } else { 2 };
let inner = std::mem::take(&mut content);
content.push(Token::Heading(inner, level));
self.position = if tail < self.input.len() {
tail + 1
} else {
tail
};
}
}
}
loop {
if self.position >= self.input.len() {
break;
}
if !self.is_at_line_start() {
break;
}
let line_start = self.position;
let cur_indent = self.get_current_indent();
let mut after_indent = line_start;
while after_indent < self.input.len()
&& (self.input[after_indent] == ' ' || self.input[after_indent] == '\t')
{
after_indent += 1;
}
if after_indent >= self.input.len() || self.input[after_indent] == '\n' {
let item_has_content = content.iter().any(|t| !matches!(t, Token::Newline));
if !item_has_content {
break;
}
let mut p = line_start;
while p < self.input.len() {
let line_end = (p..self.input.len())
.find(|&i| self.input[i] == '\n')
.unwrap_or(self.input.len());
let only_ws = self.input[p..line_end]
.iter()
.all(|c| *c == ' ' || *c == '\t');
if !only_ws {
break;
}
if line_end >= self.input.len() {
p = line_end;
break;
}
p = line_end + 1;
}
if p >= self.input.len() {
break;
}
let mut next_indent = 0usize;
let mut q = p;
while q < self.input.len() {
match self.input[q] {
' ' => {
next_indent += 1;
q += 1;
}
'\t' => {
next_indent += 4 - (next_indent % 4);
q += 1;
}
_ => break,
}
}
if next_indent < content_offset {
break;
}
self.position = p;
content.push(Token::Newline);
content.push(Token::Newline);
let raw = self.collect_list_item_block_content(content_offset);
if !raw.is_empty() {
let mut sub = self.sub_lexer(raw);
let sub_tokens =
sub.parse_with_context(ParseContext::Root)?;
content.extend(sub_tokens);
}
continue;
}
let is_marker_line = self.line_starts_with_list_marker(after_indent);
let next_ch = self.input[after_indent];
if cur_indent >= content_offset {
if is_marker_line {
self.position = after_indent;
match next_ch {
'-' | '+' => {
if !self.check_horizontal_rule()? {
content.push(self.parse_list_item(false, parent_ctx)?);
continue;
}
self.position = line_start;
break;
}
'*' => {
if self.is_list_marker('*') {
content.push(self.parse_list_item(false, parent_ctx)?);
continue;
}
self.position = line_start;
break;
}
'0'..='9' => {
if self.check_ordered_list_marker().is_some() {
content.push(self.parse_list_item(true, parent_ctx)?);
continue;
}
self.position = line_start;
}
_ => {}
}
}
let item_has_content =
content.iter().any(|t| !matches!(t, Token::Newline));
let starts_block = next_ch == '>'
|| ((next_ch == '`' || next_ch == '~') && {
let mut p = after_indent;
while p < self.input.len() && self.input[p] == next_ch {
p += 1;
}
p - after_indent >= 3
})
|| !item_has_content;
if !is_marker_line && starts_block {
let rest = self.collect_list_item_block_content(content_offset);
if !rest.is_empty() {
let mut sub = self.sub_lexer(rest);
let sub_tokens =
sub.parse_with_context(ParseContext::Root)?;
content.extend(sub_tokens);
}
continue;
}
} else {
if is_marker_line && cur_indent < 4 {
break;
}
if next_ch == '#' {
let savepos = self.position;
self.position = after_indent;
let is_atx = self.is_atx_heading_start();
self.position = savepos;
if is_atx {
break;
}
}
if next_ch == '>' {
break;
}
let savepos = self.position;
self.position = after_indent;
let is_hr = self.is_thematic_break_line();
self.position = savepos;
if is_hr {
break;
}
}
self.position = after_indent;
content.push(Token::Newline);
while self.position < self.input.len() && self.current_char() != '\n' {
if let Some(tok) = self.next_token(ParseContext::Inline)? {
content.push(tok);
}
}
if self.position < self.input.len() && self.current_char() == '\n' {
self.advance();
}
}
let mut content = content;
resolve_emphasis(&mut content);
Ok(Token::ListItem {
content,
ordered,
number,
marker: marker_char,
checked,
loose: false,
})
}
fn line_starts_with_list_marker(&self, pos: usize) -> bool {
if pos >= self.input.len() {
return false;
}
let trailing_ok = |idx: usize| -> bool {
match self.input.get(idx) {
None => true,
Some(&c) => c == ' ' || c == '\t' || c == '\n',
}
};
match self.input[pos] {
'-' | '+' | '*' => trailing_ok(pos + 1),
c if c.is_ascii_digit() => {
let mut p = pos;
while p < self.input.len() && self.input[p].is_ascii_digit() {
p += 1;
}
if p >= self.input.len() {
return false;
}
let term = self.input[p];
(term == '.' || term == ')') && trailing_ok(p + 1)
}
_ => false,
}
}
fn is_table_start(&self) -> bool {
let n = self.input.len();
let mut i = self.position;
while i < n && self.input[i] != '\n' {
i += 1;
}
if i >= n {
return false; }
let mut j = i + 1;
while j < n && self.input[j] != '\n' {
if self.input[j] == '-' {
return true;
}
j += 1;
}
false
}
fn parse_table(&mut self) -> Result<Token, LexerError> {
let header_line = self.read_until_newline();
let header_cells: Vec<String> = header_line
.trim_matches('|')
.split('|')
.map(|s| s.trim().to_string())
.collect();
if self.current_char() == '\n' {
self.advance();
}
let align_line = self.read_until_newline();
let aligns: Vec<TableAlignment> = align_line
.trim_matches('|')
.split('|')
.map(|s| {
let s = s.trim();
match (s.starts_with(':'), s.ends_with(':')) {
(true, true) => TableAlignment::Center,
(true, false) => TableAlignment::Left,
(false, true) => TableAlignment::Right,
_ => TableAlignment::Left,
}
})
.collect();
if self.current_char() == '\n' {
self.advance();
}
let mut headers = Vec::new();
for cell in header_cells {
let mut cell_lexer = self.sub_lexer(cell);
let parsed = cell_lexer.parse_with_context(ParseContext::TableCell)?;
headers.push(parsed);
}
let mut rows = Vec::new();
while self.position < self.input.len() {
let line = self.read_until_newline();
if line.trim().is_empty() {
break;
}
let cell_texts: Vec<String> = line
.trim_matches('|')
.split('|')
.map(|s| s.trim().to_string())
.collect();
let mut row_tokens = Vec::new();
for cell in cell_texts {
let mut cell_lexer = self.sub_lexer(cell);
let parsed = cell_lexer.parse_with_context(ParseContext::TableCell)?;
row_tokens.push(parsed);
}
rows.push(row_tokens);
if self.current_char() == '\n' {
self.advance();
}
}
let mut aligns = aligns;
match aligns.len().cmp(&headers.len()) {
std::cmp::Ordering::Less => {
aligns.resize(headers.len(), TableAlignment::Left);
}
std::cmp::Ordering::Greater => {
aligns.truncate(headers.len());
}
std::cmp::Ordering::Equal => {}
}
Ok(Token::Table {
headers,
aligns,
rows,
})
}
fn previous_line_is_blank_or_bof(&self) -> bool {
let mut p = self.position;
while p > 0 && (self.input[p - 1] == ' ' || self.input[p - 1] == '\t') {
p -= 1;
}
if p == 0 {
return true;
}
if self.input.get(p - 1) != Some(&'\n') {
return false;
}
let mut prev_line_start = p - 1; while prev_line_start > 0 && self.input[prev_line_start - 1] != '\n' {
prev_line_start -= 1;
}
let prev_line_end = p - 1;
self.input[prev_line_start..prev_line_end]
.iter()
.all(|c| *c == ' ' || *c == '\t')
}
fn can_start_indented_code(&self) -> bool {
if self.previous_line_is_blank_or_bof() {
return true;
}
!self.last_emitted_was_paragraph_text
}
fn collect_list_item_block_content(&mut self, content_offset: usize) -> String {
let mut lines: Vec<String> = Vec::new();
loop {
if self.position >= self.input.len() {
break;
}
let line_start = self.position;
let mut col = 0usize;
let mut q = line_start;
while q < self.input.len()
&& (self.input[q] == ' ' || self.input[q] == '\t')
{
if self.input[q] == ' ' {
col += 1;
} else {
col += 4 - (col % 4);
}
q += 1;
}
let line_is_blank = q >= self.input.len() || self.input[q] == '\n';
if line_is_blank {
let mut scan = q;
if scan < self.input.len() && self.input[scan] == '\n' {
scan += 1;
}
let mut still_continues = false;
while scan < self.input.len() {
let mut c = 0usize;
let mut r = scan;
while r < self.input.len()
&& (self.input[r] == ' ' || self.input[r] == '\t')
{
if self.input[r] == ' ' {
c += 1;
} else {
c += 4 - (c % 4);
}
r += 1;
}
if r >= self.input.len() || self.input[r] == '\n' {
if r >= self.input.len() {
break;
}
scan = r + 1;
continue;
}
still_continues = c >= content_offset;
break;
}
if !still_continues {
break;
}
lines.push(String::new());
self.position = if q < self.input.len() { q + 1 } else { q };
continue;
}
if col < content_offset {
break;
}
let mut p = line_start;
while p < self.input.len() && self.input[p] != '\n' {
p += 1;
}
lines.push(strip_leading_cols(
&self.input,
line_start,
p,
content_offset,
));
if p < self.input.len() {
self.position = p + 1;
} else {
self.position = p;
break;
}
}
lines.join("\n")
}
fn parse_indented_code_block(&mut self) -> Token {
let mut content = String::new();
loop {
if !self.is_at_line_start() {
break;
}
let indent = self.get_current_indent();
if indent < 4 {
let line_start = self.position;
let mut p = self.position;
while p < self.input.len() && (self.input[p] == ' ' || self.input[p] == '\t') {
p += 1;
}
if p < self.input.len() && self.input[p] == '\n' {
let mut q = p + 1;
let mut found_code = false;
loop {
let mut next_indent = 0usize;
let mut r = q;
while r < self.input.len() {
match self.input[r] {
' ' => next_indent += 1,
'\t' => next_indent += 4 - (next_indent % 4),
_ => break,
}
r += 1;
}
if r >= self.input.len() {
break;
}
if self.input[r] == '\n' {
q = r + 1;
continue;
}
if next_indent >= 4 {
found_code = true;
}
break;
}
if found_code {
content.push('\n');
self.position = p + 1;
continue;
}
}
self.position = line_start;
break;
}
let mut consumed_cols = 0usize;
while consumed_cols < 4 && self.position < self.input.len() {
match self.current_char() {
' ' => {
consumed_cols += 1;
self.advance();
}
'\t' => {
let span = 4 - (consumed_cols % 4);
if consumed_cols + span <= 4 {
consumed_cols += span;
self.advance();
} else {
break;
}
}
_ => break,
}
}
while self.position < self.input.len() && self.current_char() != '\n' {
content.push(self.current_char());
self.advance();
}
if self.position < self.input.len() && self.current_char() == '\n' {
content.push('\n');
self.advance();
}
}
Token::Code {
language: String::new(),
content: content.trim_matches('\n').to_string(),
block: true,
}
}
pub fn get_current_indent(&self) -> usize {
let mut count = 0usize;
let mut pos = self.position;
while pos < self.input.len() {
match self.input[pos] {
' ' => count += 1,
'\t' => count += 4 - (count % 4),
_ => break,
}
pos += 1;
}
count
}
fn is_list_marker(&self, marker: char) -> bool {
if self.current_char() != marker {
return false;
}
if self.position + 1 < self.input.len() {
let next_char = self.input[self.position + 1];
if next_char == ' ' || next_char == '\t' {
return true;
}
if next_char == '\n' || next_char == '\r' {
return self.last_emitted_list_item
|| self.previous_line_is_blank_or_bof();
}
false
} else {
self.last_emitted_list_item || self.previous_line_is_blank_or_bof()
}
}
}