use crate::options::ParserOptions;
use crate::syntax::SyntaxKind;
use rowan::GreenNodeBuilder;
use crate::parser::utils::container_stack::{
Container, ContainerStack, leading_indent, leading_indent_from,
};
use crate::parser::utils::helpers::{strip_newline, trim_end_newlines};
use crate::parser::utils::list_item_buffer::ListItemBuffer;
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum ListMarker {
Bullet(char),
Ordered(OrderedMarker),
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum OrderedMarker {
Decimal {
number: String,
style: ListDelimiter,
},
Hash,
LowerAlpha {
letter: char,
style: ListDelimiter,
},
UpperAlpha {
letter: char,
style: ListDelimiter,
},
LowerRoman {
numeral: String,
style: ListDelimiter,
},
UpperRoman {
numeral: String,
style: ListDelimiter,
},
Example {
label: Option<String>,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum ListDelimiter {
Period,
RightParen,
Parens,
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct ListMarkerMatch {
pub(crate) marker: ListMarker,
pub(crate) marker_len: usize,
pub(crate) spaces_after_cols: usize,
pub(crate) spaces_after_bytes: usize,
pub(crate) virtual_marker_space: bool,
}
#[derive(Debug, Clone, Copy)]
pub(in crate::parser) struct ListItemEmissionInput<'a> {
pub content: &'a str,
pub marker_len: usize,
pub spaces_after_cols: usize,
pub spaces_after_bytes: usize,
pub indent_cols: usize,
pub indent_bytes: usize,
pub virtual_marker_space: bool,
}
fn try_parse_roman_numeral(text: &str, uppercase: bool) -> Option<usize> {
let bytes = text.as_bytes();
let mut count = 0usize;
while count < bytes.len() {
let b = bytes[count];
let valid = if uppercase {
matches!(b, b'I' | b'V' | b'X' | b'L' | b'C' | b'D' | b'M')
} else {
matches!(b, b'i' | b'v' | b'x' | b'l' | b'c' | b'd' | b'm')
};
if !valid {
break;
}
count += 1;
}
if count == 0 {
return None;
}
if count == 1 {
let upper = bytes[0] & !0x20;
if !matches!(upper, b'I' | b'V' | b'X') {
return None;
}
}
let mut run_byte = 0u8;
let mut run_len = 0usize;
for &b in &bytes[..count] {
let upper = b & !0x20;
if upper == run_byte {
run_len += 1;
} else {
run_byte = upper;
run_len = 1;
}
if (run_len > 3 && matches!(upper, b'I' | b'X' | b'C'))
|| (run_len > 1 && matches!(upper, b'V' | b'L' | b'D'))
{
return None;
}
}
fn val(upper: u8) -> u32 {
match upper {
b'I' => 1,
b'V' => 5,
b'X' => 10,
b'L' => 50,
b'C' => 100,
b'D' => 500,
b'M' => 1000,
_ => 0,
}
}
for i in 0..count.saturating_sub(1) {
let curr = bytes[i] & !0x20;
let next = bytes[i + 1] & !0x20;
let cv = val(curr);
let nv = val(next);
if cv < nv {
match (curr, next) {
(b'I', b'V') | (b'I', b'X') => {}
(b'X', b'L') | (b'X', b'C') => {}
(b'C', b'D') | (b'C', b'M') => {}
_ => return None,
}
}
}
Some(count)
}
fn marker_spaces_after(after_marker: &str, marker_end_col: usize) -> (usize, usize, bool) {
let (effective_cols, n_bytes) = leading_indent_from(after_marker, marker_end_col);
let after_ws = &after_marker[n_bytes..];
let has_content = !trim_end_newlines(after_ws).is_empty();
if has_content && effective_cols >= 5 {
let bytes = match after_marker.as_bytes().first() {
Some(b' ') => 1,
Some(b'\t') => {
let span = 4 - (marker_end_col % 4);
if span == 1 { 1 } else { 0 }
}
_ => 0,
};
(1, bytes, bytes == 0)
} else {
(effective_cols, n_bytes, false)
}
}
pub(crate) fn try_parse_list_marker(line: &str, config: &ParserOptions) -> Option<ListMarkerMatch> {
let line = trim_end_newlines(line);
let (_indent_cols, indent_bytes) = leading_indent(line);
let trimmed = &line[indent_bytes..];
if let Some(ch) = trimmed.chars().next()
&& matches!(ch, '*' | '+' | '-')
{
let after_marker = &trimmed[1..];
let trimmed_after = after_marker.trim_start();
let is_task = trimmed_after.starts_with('[')
&& trimmed_after.len() >= 3
&& matches!(
trimmed_after.chars().nth(1),
Some(' ') | Some('x') | Some('X')
)
&& trimmed_after.chars().nth(2) == Some(']');
if after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty()
|| is_task
{
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + 1);
return Some(ListMarkerMatch {
marker: ListMarker::Bullet(ch),
marker_len: 1,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
if config.extensions.fancy_lists
&& let Some(after_marker) = trimmed.strip_prefix("#.")
&& (after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty())
{
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + 2);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::Hash),
marker_len: 2,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
if config.extensions.example_lists
&& let Some(rest) = trimmed.strip_prefix("(@")
{
let label_end = rest
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '-')
.count();
if rest.len() > label_end && rest.chars().nth(label_end) == Some(')') {
let label = if label_end > 0 {
Some(rest[..label_end].to_string())
} else {
None
};
let after_marker = &rest[label_end + 1..];
if after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty()
{
let marker_len = 2 + label_end + 1; let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + marker_len);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::Example { label }),
marker_len,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
}
if let Some(rest) = trimmed.strip_prefix('(') {
if config.extensions.fancy_lists {
let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
if digit_count > 0
&& rest.len() > digit_count
&& rest.chars().nth(digit_count) == Some(')')
{
let number = &rest[..digit_count];
let after_marker = &rest[digit_count + 1..];
if after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty()
{
let marker_len = 2 + digit_count;
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + marker_len);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::Decimal {
number: number.to_string(),
style: ListDelimiter::Parens,
}),
marker_len,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
}
if config.extensions.fancy_lists {
if let Some(len) = try_parse_roman_numeral(rest, false)
&& rest.len() > len
&& rest.as_bytes()[len] == b')'
{
let after_marker = &rest[len + 1..];
if after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty()
{
let marker_len = len + 2;
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + marker_len);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
numeral: rest[..len].to_string(),
style: ListDelimiter::Parens,
}),
marker_len,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
if let Some(len) = try_parse_roman_numeral(rest, true)
&& rest.len() > len
&& rest.as_bytes()[len] == b')'
{
let after_marker = &rest[len + 1..];
if after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty()
{
let marker_len = len + 2;
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + marker_len);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
numeral: rest[..len].to_string(),
style: ListDelimiter::Parens,
}),
marker_len,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
if let Some(ch) = rest.chars().next()
&& ch.is_ascii_lowercase()
&& rest.len() > 1
&& rest.chars().nth(1) == Some(')')
{
let after_marker = &rest[2..];
if after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty()
{
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + 3);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::LowerAlpha {
letter: ch,
style: ListDelimiter::Parens,
}),
marker_len: 3,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
if let Some(ch) = rest.chars().next()
&& ch.is_ascii_uppercase()
&& rest.len() > 1
&& rest.chars().nth(1) == Some(')')
{
let after_marker = &rest[2..];
if after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty()
{
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + 3);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::UpperAlpha {
letter: ch,
style: ListDelimiter::Parens,
}),
marker_len: 3,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
}
}
let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
if digit_count > 0 && trimmed.len() > digit_count {
if config.dialect == crate::Dialect::CommonMark && digit_count > 9 {
return None;
}
let number = &trimmed[..digit_count];
let delim = trimmed.chars().nth(digit_count);
let (style, marker_len) = match delim {
Some('.') => (ListDelimiter::Period, digit_count + 1),
Some(')') => (ListDelimiter::RightParen, digit_count + 1),
_ => return None,
};
if style == ListDelimiter::RightParen
&& !config.extensions.fancy_lists
&& config.dialect != crate::Dialect::CommonMark
{
return None;
}
let after_marker = &trimmed[marker_len..];
if after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty()
{
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + marker_len);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::Decimal {
number: number.to_string(),
style,
}),
marker_len,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
if config.extensions.fancy_lists {
if let Some(len) = try_parse_roman_numeral(trimmed, false)
&& trimmed.len() > len
&& let delim = trimmed.as_bytes()[len]
&& (delim == b'.' || delim == b')')
{
let style = if delim == b'.' {
ListDelimiter::Period
} else {
ListDelimiter::RightParen
};
let marker_len = len + 1;
let after_marker = &trimmed[marker_len..];
if after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty()
{
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + marker_len);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::LowerRoman {
numeral: trimmed[..len].to_string(),
style,
}),
marker_len,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
if let Some(len) = try_parse_roman_numeral(trimmed, true)
&& trimmed.len() > len
&& let delim = trimmed.as_bytes()[len]
&& (delim == b'.' || delim == b')')
{
let style = if delim == b'.' {
ListDelimiter::Period
} else {
ListDelimiter::RightParen
};
let marker_len = len + 1;
let after_marker = &trimmed[marker_len..];
let min_spaces = if delim == b'.' && len == 1 { 2 } else { 1 };
let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
if (after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty())
&& (after_marker.is_empty() || effective_cols >= min_spaces)
{
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + marker_len);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::UpperRoman {
numeral: trimmed[..len].to_string(),
style,
}),
marker_len,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
if let Some(ch) = trimmed.chars().next()
&& ch.is_ascii_lowercase()
&& trimmed.len() > 1
&& let Some(delim) = trimmed.chars().nth(1)
&& (delim == '.' || delim == ')')
{
let style = if delim == '.' {
ListDelimiter::Period
} else {
ListDelimiter::RightParen
};
let marker_len = 2;
let after_marker = &trimmed[marker_len..];
if after_marker.starts_with(' ')
|| after_marker.starts_with('\t')
|| after_marker.is_empty()
{
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + marker_len);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::LowerAlpha { letter: ch, style }),
marker_len,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
if let Some(ch) = trimmed.chars().next()
&& ch.is_ascii_uppercase()
&& trimmed.len() > 1
&& let Some(delim) = trimmed.chars().nth(1)
&& (delim == '.' || delim == ')')
{
let style = if delim == '.' {
ListDelimiter::Period
} else {
ListDelimiter::RightParen
};
let marker_len = 2;
let after_marker = &trimmed[marker_len..];
let min_spaces = if delim == '.' { 2 } else { 1 };
let (effective_cols, _) = leading_indent_from(after_marker, _indent_cols + marker_len);
if (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
&& effective_cols >= min_spaces
{
let (spaces_after_cols, spaces_after_bytes, virtual_marker_space) =
marker_spaces_after(after_marker, _indent_cols + marker_len);
return Some(ListMarkerMatch {
marker: ListMarker::Ordered(OrderedMarker::UpperAlpha { letter: ch, style }),
marker_len,
spaces_after_cols,
spaces_after_bytes,
virtual_marker_space,
});
}
}
}
None
}
pub(crate) fn markers_match(a: &ListMarker, b: &ListMarker, dialect: crate::Dialect) -> bool {
match (a, b) {
(ListMarker::Bullet(ca), ListMarker::Bullet(cb)) => match dialect {
crate::Dialect::CommonMark => ca == cb,
_ => true,
},
(ListMarker::Ordered(OrderedMarker::Hash), ListMarker::Ordered(OrderedMarker::Hash)) => {
true
}
(
ListMarker::Ordered(OrderedMarker::Decimal { style: s1, .. }),
ListMarker::Ordered(OrderedMarker::Decimal { style: s2, .. }),
) => s1 == s2,
(
ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s1, .. }),
ListMarker::Ordered(OrderedMarker::LowerAlpha { style: s2, .. }),
) => s1 == s2,
(
ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s1, .. }),
ListMarker::Ordered(OrderedMarker::UpperAlpha { style: s2, .. }),
) => s1 == s2,
(
ListMarker::Ordered(OrderedMarker::LowerRoman { style: s1, .. }),
ListMarker::Ordered(OrderedMarker::LowerRoman { style: s2, .. }),
) => s1 == s2,
(
ListMarker::Ordered(OrderedMarker::UpperRoman { style: s1, .. }),
ListMarker::Ordered(OrderedMarker::UpperRoman { style: s2, .. }),
) => s1 == s2,
(
ListMarker::Ordered(OrderedMarker::Example { .. }),
ListMarker::Ordered(OrderedMarker::Example { .. }),
) => true, _ => false,
}
}
pub(in crate::parser) fn emit_list_item(
builder: &mut GreenNodeBuilder<'static>,
item: &ListItemEmissionInput<'_>,
) -> (usize, String) {
builder.start_node(SyntaxKind::LIST_ITEM.into());
if item.indent_bytes > 0 {
builder.token(
SyntaxKind::WHITESPACE.into(),
&item.content[..item.indent_bytes],
);
}
let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
if item.spaces_after_bytes > 0 {
let space_start = item.indent_bytes + item.marker_len;
let space_end = space_start + item.spaces_after_bytes;
if space_end <= item.content.len() {
builder.token(
SyntaxKind::WHITESPACE.into(),
&item.content[space_start..space_end],
);
}
}
let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
let text_to_buffer = if content_start < item.content.len() {
let rest = &item.content[content_start..];
if (rest.starts_with("[ ]") || rest.starts_with("[x]") || rest.starts_with("[X]"))
&& rest
.as_bytes()
.get(3)
.is_some_and(|b| (*b as char).is_whitespace())
{
builder.token(SyntaxKind::TASK_CHECKBOX.into(), &rest[..3]);
rest[3..].to_string()
} else {
rest.to_string()
}
} else {
String::new()
};
(content_col, text_to_buffer)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::options::ParserOptions;
#[test]
fn detects_bullet_markers() {
let config = ParserOptions::default();
assert!(try_parse_list_marker("* item", &config).is_some());
assert!(try_parse_list_marker("*\titem", &config).is_some());
}
#[test]
fn detects_fancy_alpha_markers() {
let mut config = ParserOptions::default();
config.extensions.fancy_lists = true;
assert!(
try_parse_list_marker("a. item", &config).is_some(),
"a. should parse"
);
assert!(
try_parse_list_marker("b. item", &config).is_some(),
"b. should parse"
);
assert!(
try_parse_list_marker("c. item", &config).is_some(),
"c. should parse"
);
assert!(
try_parse_list_marker("a) item", &config).is_some(),
"a) should parse"
);
assert!(
try_parse_list_marker("b) item", &config).is_some(),
"b) should parse"
);
}
}
#[test]
fn markers_match_fancy_lists() {
use ListDelimiter::*;
use ListMarker::*;
use OrderedMarker::*;
let a_period = Ordered(LowerAlpha {
letter: 'a',
style: Period,
});
let b_period = Ordered(LowerAlpha {
letter: 'b',
style: Period,
});
assert!(
markers_match(&a_period, &b_period, crate::Dialect::Pandoc),
"a. and b. should match"
);
let i_period = Ordered(LowerRoman {
numeral: "i".to_string(),
style: Period,
});
let ii_period = Ordered(LowerRoman {
numeral: "ii".to_string(),
style: Period,
});
assert!(
markers_match(&i_period, &ii_period, crate::Dialect::Pandoc),
"i. and ii. should match"
);
let a_paren = Ordered(LowerAlpha {
letter: 'a',
style: RightParen,
});
assert!(
!markers_match(&a_period, &a_paren, crate::Dialect::Pandoc),
"a. and a) should not match"
);
}
#[test]
fn markers_match_bullet_dialect_split() {
use ListMarker::*;
assert!(markers_match(
&Bullet('-'),
&Bullet('+'),
crate::Dialect::Pandoc
));
assert!(markers_match(
&Bullet('-'),
&Bullet('-'),
crate::Dialect::CommonMark
));
assert!(!markers_match(
&Bullet('-'),
&Bullet('+'),
crate::Dialect::CommonMark
));
assert!(!markers_match(
&Bullet('*'),
&Bullet('-'),
crate::Dialect::CommonMark
));
}
#[test]
fn detects_complex_roman_numerals() {
let mut config = ParserOptions::default();
config.extensions.fancy_lists = true;
assert!(
try_parse_list_marker("iv. item", &config).is_some(),
"iv. should parse"
);
assert!(
try_parse_list_marker("v. item", &config).is_some(),
"v. should parse"
);
assert!(
try_parse_list_marker("vi. item", &config).is_some(),
"vi. should parse"
);
assert!(
try_parse_list_marker("vii. item", &config).is_some(),
"vii. should parse"
);
assert!(
try_parse_list_marker("viii. item", &config).is_some(),
"viii. should parse"
);
assert!(
try_parse_list_marker("ix. item", &config).is_some(),
"ix. should parse"
);
assert!(
try_parse_list_marker("x. item", &config).is_some(),
"x. should parse"
);
}
#[test]
fn detects_example_list_markers() {
let mut config = ParserOptions::default();
config.extensions.example_lists = true;
assert!(
try_parse_list_marker("(@) item", &config).is_some(),
"(@) should parse"
);
assert!(
try_parse_list_marker("(@foo) item", &config).is_some(),
"(@foo) should parse"
);
assert!(
try_parse_list_marker("(@my_label) item", &config).is_some(),
"(@my_label) should parse"
);
assert!(
try_parse_list_marker("(@test-123) item", &config).is_some(),
"(@test-123) should parse"
);
let disabled_config = ParserOptions {
extensions: crate::options::Extensions {
example_lists: false,
..Default::default()
},
..Default::default()
};
assert!(
try_parse_list_marker("(@) item", &disabled_config).is_none(),
"(@) should not parse when extension disabled"
);
}
#[test]
fn deep_ordered_prefers_nearest_enclosing_indent_over_nearest_below() {
use crate::parser::utils::container_stack::{Container, ContainerStack};
let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
numeral: "ii".to_string(),
style: ListDelimiter::Period,
});
let mut containers = ContainerStack::new();
containers.push(Container::List {
marker: marker.clone(),
base_indent_cols: 8,
has_blank_between_items: false,
});
containers.push(Container::ListItem {
content_col: 11,
buffer: crate::parser::utils::list_item_buffer::ListItemBuffer::new(),
marker_only: false,
virtual_marker_space: false,
});
containers.push(Container::List {
marker,
base_indent_cols: 6,
has_blank_between_items: false,
});
assert_eq!(
find_matching_list_level(
&containers,
&ListMarker::Ordered(OrderedMarker::LowerRoman {
numeral: "iii".to_string(),
style: ListDelimiter::Period,
}),
7,
crate::Dialect::Pandoc,
),
Some(0)
);
}
#[test]
fn deep_ordered_matches_exact_indent_when_available() {
use crate::parser::utils::container_stack::{Container, ContainerStack};
let marker = ListMarker::Ordered(OrderedMarker::LowerRoman {
numeral: "ii".to_string(),
style: ListDelimiter::Period,
});
let mut containers = ContainerStack::new();
containers.push(Container::List {
marker: marker.clone(),
base_indent_cols: 8,
has_blank_between_items: false,
});
containers.push(Container::List {
marker,
base_indent_cols: 6,
has_blank_between_items: false,
});
assert_eq!(
find_matching_list_level(
&containers,
&ListMarker::Ordered(OrderedMarker::LowerRoman {
numeral: "iii".to_string(),
style: ListDelimiter::Period,
}),
6,
crate::Dialect::Pandoc,
),
Some(1)
);
}
#[test]
fn parses_nested_bullet_list_from_single_marker() {
use crate::parse;
use crate::syntax::SyntaxKind;
let config = ParserOptions::default();
for (input, desc) in [("- *\n", "- *"), ("- +\n", "- +"), ("- -\n", "- -")] {
let tree = parse(input, Some(config.clone()));
assert_eq!(
tree.kind(),
SyntaxKind::DOCUMENT,
"{desc}: root should be DOCUMENT"
);
let outer_list = tree
.children()
.find(|n| n.kind() == SyntaxKind::LIST)
.unwrap_or_else(|| panic!("{desc}: should have outer LIST node"));
let outer_item = outer_list
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM)
.unwrap_or_else(|| panic!("{desc}: should have outer LIST_ITEM"));
let nested_list = outer_item
.children()
.find(|n| n.kind() == SyntaxKind::LIST)
.unwrap_or_else(|| {
panic!(
"{desc}: outer LIST_ITEM should contain nested LIST, got: {:?}",
outer_item.children().map(|n| n.kind()).collect::<Vec<_>>()
)
});
let nested_item = nested_list
.children()
.find(|n| n.kind() == SyntaxKind::LIST_ITEM)
.unwrap_or_else(|| panic!("{desc}: nested LIST should have LIST_ITEM"));
let has_plain = nested_item
.children()
.any(|n| n.kind() == SyntaxKind::PLAIN);
assert!(
!has_plain,
"{desc}: nested LIST_ITEM should not have PLAIN node (should be empty)"
);
}
}
pub(in crate::parser) fn in_list(containers: &ContainerStack) -> bool {
containers
.stack
.iter()
.any(|c| matches!(c, Container::List { .. }))
}
pub(in crate::parser) fn in_blockquote_list(containers: &ContainerStack) -> bool {
let mut seen_blockquote = false;
for c in &containers.stack {
if matches!(c, Container::BlockQuote { .. }) {
seen_blockquote = true;
}
if seen_blockquote && matches!(c, Container::List { .. }) {
return true;
}
}
false
}
pub(in crate::parser) fn find_matching_list_level(
containers: &ContainerStack,
marker: &ListMarker,
indent_cols: usize,
dialect: crate::Dialect,
) -> Option<usize> {
let mut best_match: Option<(usize, usize, bool)> = None;
let is_deep_ordered = matches!(marker, ListMarker::Ordered(_)) && indent_cols >= 4;
let mut best_above_match: Option<(usize, usize)> = None;
for (i, c) in containers.stack.iter().enumerate().rev() {
if matches!(c, Container::BlockQuote { .. }) {
break;
}
if let Container::List {
marker: list_marker,
base_indent_cols,
..
} = c
&& markers_match(marker, list_marker, dialect)
{
let matches = if indent_cols >= 4 && *base_indent_cols >= 4 {
match (marker, list_marker) {
(ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
indent_cols.abs_diff(*base_indent_cols) <= 3
}
_ => indent_cols >= *base_indent_cols && indent_cols <= base_indent_cols + 3,
}
} else if indent_cols >= 4 || *base_indent_cols >= 4 {
match (marker, list_marker) {
(ListMarker::Ordered(_), ListMarker::Ordered(_)) => {
indent_cols.abs_diff(*base_indent_cols) <= 3
}
_ => false,
}
} else {
indent_cols.abs_diff(*base_indent_cols) <= 3
};
if matches {
let distance = indent_cols.abs_diff(*base_indent_cols);
let base_leq_indent = *base_indent_cols <= indent_cols;
if is_deep_ordered
&& matches!(
(marker, list_marker),
(ListMarker::Ordered(_), ListMarker::Ordered(_))
)
&& *base_indent_cols >= indent_cols
{
let delta = *base_indent_cols - indent_cols;
if best_above_match.is_none_or(|(_, best_delta)| delta < best_delta) {
best_above_match = Some((i, delta));
}
}
if let Some((_, best_dist, best_base_leq)) = best_match {
if distance < best_dist
|| (distance == best_dist && base_leq_indent && !best_base_leq)
{
best_match = Some((i, distance, base_leq_indent));
}
} else {
best_match = Some((i, distance, base_leq_indent));
}
if distance == 0 {
return Some(i);
}
}
}
}
if let Some((index, _)) = best_above_match {
return Some(index);
}
best_match.map(|(i, _, _)| i)
}
pub(in crate::parser) fn start_nested_list(
containers: &mut ContainerStack,
builder: &mut GreenNodeBuilder<'static>,
marker: &ListMarker,
item: &ListItemEmissionInput<'_>,
indent_to_emit: Option<&str>,
config: &ParserOptions,
) {
if let Some(indent_str) = indent_to_emit {
builder.token(SyntaxKind::WHITESPACE.into(), indent_str);
}
builder.start_node(SyntaxKind::LIST.into());
containers.push(Container::List {
marker: marker.clone(),
base_indent_cols: item.indent_cols,
has_blank_between_items: false,
});
let (content_col, text_to_buffer) = emit_list_item(builder, item);
finish_list_item_with_optional_nested(
containers,
builder,
content_col,
text_to_buffer,
item.virtual_marker_space,
config,
);
}
pub(in crate::parser) fn is_content_nested_bullet_marker(
content: &str,
marker_len: usize,
spaces_after_bytes: usize,
) -> Option<char> {
let (_, indent_bytes) = leading_indent(content);
let content_start = indent_bytes + marker_len + spaces_after_bytes;
if content_start >= content.len() {
return None;
}
let remaining = &content[content_start..];
let (text_part, _) = strip_newline(remaining);
let trimmed = text_part.trim();
if trimmed.len() == 1 {
let ch = trimmed.chars().next().unwrap();
if matches!(ch, '*' | '+' | '-') {
return Some(ch);
}
}
None
}
pub(in crate::parser) fn add_list_item_with_nested_empty_list(
containers: &mut ContainerStack,
builder: &mut GreenNodeBuilder<'static>,
item: &ListItemEmissionInput<'_>,
nested_marker: char,
) {
builder.start_node(SyntaxKind::LIST_ITEM.into());
if item.indent_bytes > 0 {
builder.token(
SyntaxKind::WHITESPACE.into(),
&item.content[..item.indent_bytes],
);
}
let marker_text = &item.content[item.indent_bytes..item.indent_bytes + item.marker_len];
builder.token(SyntaxKind::LIST_MARKER.into(), marker_text);
if item.spaces_after_bytes > 0 {
let space_start = item.indent_bytes + item.marker_len;
let space_end = space_start + item.spaces_after_bytes;
if space_end <= item.content.len() {
builder.token(
SyntaxKind::WHITESPACE.into(),
&item.content[space_start..space_end],
);
}
}
builder.start_node(SyntaxKind::LIST.into());
builder.start_node(SyntaxKind::LIST_ITEM.into());
builder.token(SyntaxKind::LIST_MARKER.into(), &nested_marker.to_string());
let content_start = item.indent_bytes + item.marker_len + item.spaces_after_bytes;
if content_start < item.content.len() {
let remaining = &item.content[content_start..];
if remaining.len() > 1 {
let (_, newline_str) = strip_newline(&remaining[1..]);
if !newline_str.is_empty() {
builder.token(SyntaxKind::NEWLINE.into(), newline_str);
}
}
}
builder.finish_node(); builder.finish_node();
let content_col = item.indent_cols + item.marker_len + item.spaces_after_cols;
containers.push(Container::ListItem {
content_col,
buffer: ListItemBuffer::new(),
marker_only: false, virtual_marker_space: item.virtual_marker_space,
});
}
pub(in crate::parser) fn add_list_item(
containers: &mut ContainerStack,
builder: &mut GreenNodeBuilder<'static>,
item: &ListItemEmissionInput<'_>,
config: &ParserOptions,
) {
let (content_col, text_to_buffer) = emit_list_item(builder, item);
log::trace!(
"add_list_item: content={:?}, text_to_buffer={:?}",
item.content,
text_to_buffer
);
finish_list_item_with_optional_nested(
containers,
builder,
content_col,
text_to_buffer,
item.virtual_marker_space,
config,
);
}
fn finish_list_item_with_optional_nested(
containers: &mut ContainerStack,
builder: &mut GreenNodeBuilder<'static>,
content_col: usize,
text_to_buffer: String,
virtual_marker_space: bool,
config: &ParserOptions,
) {
let buffered_is_thematic_break =
super::horizontal_rules::try_parse_horizontal_rule(trim_end_newlines(&text_to_buffer))
.is_some();
if !buffered_is_thematic_break
&& let Some(inner_match) = try_parse_list_marker(&text_to_buffer, config)
{
let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
let after_inner =
trim_end_newlines(text_to_buffer.get(inner_content_start..).unwrap_or(""));
if !after_inner.is_empty() {
containers.push(Container::ListItem {
content_col,
buffer: ListItemBuffer::new(),
marker_only: false, virtual_marker_space,
});
builder.start_node(SyntaxKind::LIST.into());
containers.push(Container::List {
marker: inner_match.marker.clone(),
base_indent_cols: content_col,
has_blank_between_items: false,
});
let inner_item = ListItemEmissionInput {
content: text_to_buffer.as_str(),
marker_len: inner_match.marker_len,
spaces_after_cols: inner_match.spaces_after_cols,
spaces_after_bytes: inner_match.spaces_after_bytes,
indent_cols: content_col,
indent_bytes: 0,
virtual_marker_space: inner_match.virtual_marker_space,
};
let (inner_content_col, inner_text_to_buffer) = emit_list_item(builder, &inner_item);
finish_list_item_with_optional_nested(
containers,
builder,
inner_content_col,
inner_text_to_buffer,
inner_match.virtual_marker_space,
config,
);
return;
}
}
if !buffered_is_thematic_break
&& text_to_buffer.starts_with('>')
&& !text_to_buffer.starts_with(">>")
{
let bytes = text_to_buffer.as_bytes();
let has_trailing_space = bytes.get(1).copied() == Some(b' ');
let content_offset = if has_trailing_space { 2 } else { 1 };
let remaining = &text_to_buffer[content_offset..];
containers.push(Container::ListItem {
content_col,
buffer: ListItemBuffer::new(),
marker_only: false,
virtual_marker_space,
});
builder.start_node(SyntaxKind::BLOCK_QUOTE.into());
builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
if has_trailing_space {
builder.token(SyntaxKind::WHITESPACE.into(), " ");
}
containers.push(Container::BlockQuote {});
let trimmed = trim_end_newlines(remaining);
let inner_is_thematic_break =
super::horizontal_rules::try_parse_horizontal_rule(trimmed).is_some();
if !inner_is_thematic_break
&& let Some(inner_match) = try_parse_list_marker(remaining, config)
{
let inner_content_start = inner_match.marker_len + inner_match.spaces_after_bytes;
let after_inner = trim_end_newlines(remaining.get(inner_content_start..).unwrap_or(""));
if !after_inner.is_empty() {
let bq_content_col = content_col + content_offset;
builder.start_node(SyntaxKind::LIST.into());
containers.push(Container::List {
marker: inner_match.marker.clone(),
base_indent_cols: bq_content_col,
has_blank_between_items: false,
});
let inner_item = ListItemEmissionInput {
content: remaining,
marker_len: inner_match.marker_len,
spaces_after_cols: inner_match.spaces_after_cols,
spaces_after_bytes: inner_match.spaces_after_bytes,
indent_cols: bq_content_col,
indent_bytes: 0,
virtual_marker_space: inner_match.virtual_marker_space,
};
let (inner_content_col, inner_text_to_buffer) =
emit_list_item(builder, &inner_item);
finish_list_item_with_optional_nested(
containers,
builder,
inner_content_col,
inner_text_to_buffer,
inner_match.virtual_marker_space,
config,
);
return;
}
}
if !trimmed.is_empty() {
crate::parser::blocks::paragraphs::start_paragraph_if_needed(containers, builder);
crate::parser::blocks::paragraphs::append_paragraph_line(
containers, builder, remaining, config,
);
}
return;
}
let marker_only = text_to_buffer.trim().is_empty();
let mut buffer = ListItemBuffer::new();
if !text_to_buffer.is_empty() {
buffer.push_text(text_to_buffer);
}
containers.push(Container::ListItem {
content_col,
buffer,
marker_only,
virtual_marker_space,
});
}