use crate::grammar::blocks::{
cm_fenced_code_block::fenced_code_block, cm_heading::heading, cm_html_blocks::html_comment,
cm_thematic_break::thematic_break,
};
use crate::grammar::shared::{count_indentation, Span};
use nom::{
bytes::complete::take,
character::complete::{digit1, line_ending, one_of},
combinator::opt,
IResult, Input, Parser,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ListMarker {
Bullet(char),
Ordered {
number: u32,
delimiter: char,
},
}
pub fn detect_list_marker(input: Span) -> IResult<Span, (ListMarker, usize)> {
let start = input;
let leading_spaces = input
.fragment()
.chars()
.take_while(|&c| c == ' ' || c == '\t')
.take(3)
.fold(0, |acc, c| {
if c == ' ' {
acc + 1
} else {
acc + 4 - (acc % 4)
} });
let space_bytes = input
.fragment()
.chars()
.take_while(|&c| c == ' ' || c == '\t')
.take(3)
.count();
let (input, _) = if space_bytes > 0 {
take(space_bytes)(input)?
} else {
(input, Span::new(""))
};
if let Ok((after_marker, digits)) = digit1::<Span, nom::error::Error<Span>>(input) {
let number_str = digits.fragment();
if number_str.len() > 9 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::TooLarge,
)));
}
let number: u32 = number_str.parse().map_err(|_| {
nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
})?;
if let Ok((after_delim, delimiter)) =
one_of::<_, _, nom::error::Error<Span>>(".)")(after_marker)
{
let after_delim_fragment = after_delim.fragment();
let has_space_or_tab = !after_delim_fragment.is_empty()
&& (after_delim_fragment.starts_with(' ')
|| after_delim_fragment.starts_with('\t'));
let is_end_of_line = after_delim_fragment.is_empty()
|| after_delim_fragment.starts_with('\n')
|| after_delim_fragment.starts_with('\r');
if has_space_or_tab || is_end_of_line {
let marker_width = leading_spaces + number_str.len() + 1;
let mut spaces_after = 0;
let current_column = marker_width;
for ch in after_delim_fragment.chars() {
if ch != ' ' && ch != '\t' {
break;
}
let space_width = if ch == ' ' {
1
} else {
4 - ((current_column + spaces_after) % 4)
};
if spaces_after + space_width > 4 {
break;
}
spaces_after += space_width;
}
let content_indent = marker_width + spaces_after;
let marker = ListMarker::Ordered { number, delimiter };
return Ok((after_delim, (marker, content_indent)));
}
}
}
if let Ok((after_marker, bullet_char)) = one_of::<_, _, nom::error::Error<Span>>("-+*")(input) {
let after_marker_fragment = after_marker.fragment();
let has_space_or_tab = !after_marker_fragment.is_empty()
&& (after_marker_fragment.starts_with(' ') || after_marker_fragment.starts_with('\t'));
let is_end_of_line = after_marker_fragment.is_empty()
|| after_marker_fragment.starts_with('\n')
|| after_marker_fragment.starts_with('\r');
if has_space_or_tab || is_end_of_line {
let marker_width = leading_spaces + 1;
let mut spaces_after = 0;
let current_column = marker_width;
for ch in after_marker_fragment.chars() {
if ch != ' ' && ch != '\t' {
break;
}
let space_width = if ch == ' ' {
1
} else {
4 - ((current_column + spaces_after) % 4)
};
if spaces_after + space_width > 4 {
break;
}
spaces_after += space_width;
}
let content_indent = marker_width + spaces_after;
let marker = ListMarker::Bullet(bullet_char);
return Ok((after_marker, (marker, content_indent)));
}
}
Err(nom::Err::Error(nom::error::Error::new(
start,
nom::error::ErrorKind::Tag,
)))
}
pub fn list_item(
input: Span,
expected_marker_type: Option<ListMarker>,
) -> IResult<Span, (ListMarker, Span, bool, usize)> {
let (after_marker, (marker, content_indent)) = detect_list_marker(input)?;
let marker_indent = count_indentation(input.fragment());
if let Some(expected) = expected_marker_type {
let matches = matches!(
(&marker, &expected),
(ListMarker::Bullet(_), ListMarker::Bullet(_))
| (ListMarker::Ordered { .. }, ListMarker::Ordered { .. })
);
if !matches {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
}
let content_start = after_marker; let content_start_offset = content_start.location_offset();
let mut remaining = after_marker;
let mut content_end_offset = remaining.location_offset();
let mut has_blank_lines = false;
let mut last_was_blank = false;
let mut is_first_line = true;
let mut in_fenced_code = false;
let mut fence_char: Option<char> = None;
let mut fence_indent: usize = 0;
const MAX_LINES: usize = 10000;
let mut line_count = 0;
loop {
line_count += 1;
if line_count > MAX_LINES {
log::warn!("List item exceeded MAX_LINES");
break;
}
if remaining.fragment().is_empty() {
break;
}
let current_line_end = remaining
.fragment()
.find('\n')
.unwrap_or(remaining.fragment().len());
let current_line = &remaining.fragment()[..current_line_end];
let is_blank = current_line.trim().is_empty();
if is_first_line {
is_first_line = false;
if is_blank {
let skip_len = if current_line_end < remaining.fragment().len() {
current_line_end + 1 } else {
current_line_end };
if skip_len > 0 {
let (new_remaining, _) = take(skip_len)(remaining)?;
content_end_offset = new_remaining.location_offset();
remaining = new_remaining;
}
if !remaining.fragment().is_empty() {
let next_line_end = remaining
.fragment()
.find('\n')
.unwrap_or(remaining.fragment().len());
let next_line = &remaining.fragment()[..next_line_end];
let next_indent = count_indentation(next_line);
if next_indent < 4 {
if detect_list_marker(remaining).is_ok() {
break;
}
}
}
continue;
}
last_was_blank = false;
let line_indent = count_indentation(current_line);
let trimmed_line = current_line.trim_start();
if (trimmed_line.starts_with("```") || trimmed_line.starts_with("~~~"))
&& trimmed_line.len() >= 3
{
let ch = trimmed_line.chars().next().unwrap();
let fence_len = trimmed_line.chars().take_while(|&c| c == ch).count();
if fence_len >= 3 {
log::debug!("list_item: first line starts fenced code block");
in_fenced_code = true;
fence_char = Some(ch);
fence_indent = line_indent;
}
}
let (new_remaining, _) =
nom::bytes::complete::take_while(|c| c != '\n' && c != '\r')(remaining)?;
let (new_remaining, _) = opt(line_ending).parse(new_remaining)?;
content_end_offset = new_remaining.location_offset();
remaining = new_remaining;
continue;
}
if is_blank {
let skip_len = if current_line_end < remaining.fragment().len() {
current_line_end + 1
} else {
current_line_end
};
if skip_len < remaining.fragment().len() {
let after_blank = &remaining.fragment()[skip_len..];
let next_line_end = after_blank.find('\n').unwrap_or(after_blank.len());
let next_line = &after_blank[..next_line_end];
let next_line_indent = count_indentation(next_line);
if next_line_indent < 4 {
let next_line_span = remaining.take_from(skip_len);
if detect_list_marker(next_line_span).is_ok() {
break;
}
if html_comment(next_line_span).is_ok() {
break;
}
}
}
let should_include_blank = if skip_len < remaining.fragment().len() {
let mut search_offset = skip_len;
let mut found_non_blank = false;
let mut next_non_blank_indent = 0;
while search_offset < remaining.fragment().len() {
let search_text = &remaining.fragment()[search_offset..];
let line_end = search_text.find('\n').unwrap_or(search_text.len());
let line = &search_text[..line_end];
if !line.trim().is_empty() {
found_non_blank = true;
next_non_blank_indent = count_indentation(line);
break;
}
search_offset += line_end + 1;
if search_offset > remaining.fragment().len() {
break;
}
}
if !found_non_blank {
false } else {
next_non_blank_indent >= content_indent
}
} else {
false };
if !should_include_blank {
break;
}
if !in_fenced_code {
has_blank_lines = true;
}
last_was_blank = true;
let (new_remaining, _) = take(skip_len)(remaining)?;
content_end_offset = new_remaining.location_offset();
remaining = new_remaining;
continue;
}
let line_indent = count_indentation(current_line);
let trimmed_line = current_line.trim_start();
if !in_fenced_code {
if (trimmed_line.starts_with("```") || trimmed_line.starts_with("~~~"))
&& trimmed_line.len() >= 3
{
let ch = trimmed_line.chars().next().unwrap();
let fence_len = trimmed_line.chars().take_while(|&c| c == ch).count();
if fence_len >= 3 {
log::debug!("list_item: entering fenced code block");
in_fenced_code = true;
fence_char = Some(ch);
fence_indent = line_indent;
}
}
} else if let Some(fc) = fence_char {
if trimmed_line.starts_with(fc) {
let close_fence_len = trimmed_line.chars().take_while(|&c| c == fc).count();
if close_fence_len >= 3 && line_indent <= fence_indent + content_indent {
log::debug!("list_item: exiting fenced code block");
in_fenced_code = false;
fence_char = None;
}
}
}
if line_indent < 4 && detect_list_marker(remaining).is_ok() && line_indent <= marker_indent
{
break;
}
let min_indent = content_indent;
if line_indent >= min_indent {
last_was_blank = false;
let skip_len = if current_line_end < remaining.fragment().len() {
current_line_end + 1
} else {
current_line_end
};
let (new_remaining, _) = take(skip_len)(remaining)?;
content_end_offset = new_remaining.location_offset();
remaining = new_remaining;
continue;
}
if !last_was_blank {
if line_indent < 4 && detect_list_marker(remaining).is_ok() {
break;
}
if thematic_break(remaining).is_ok() {
break;
}
if heading(remaining).is_ok() {
break;
}
if fenced_code_block(remaining).is_ok() {
break;
}
last_was_blank = false;
let skip_len = if current_line_end < remaining.fragment().len() {
current_line_end + 1
} else {
current_line_end
};
let (new_remaining, _) = take(skip_len)(remaining)?;
content_end_offset = new_remaining.location_offset();
remaining = new_remaining;
continue;
}
break;
}
let content_length = content_end_offset - content_start_offset;
let content = content_start.take(content_length);
let after_content = content_start.take_from(content_length);
Ok((
after_content,
(marker, content, has_blank_lines, content_indent),
))
}
pub type ListItemData<'a> = (ListMarker, Span<'a>, bool, bool, usize);
pub fn list(input: Span) -> IResult<Span, Vec<ListItemData>> {
let (mut remaining, (first_marker, first_content, first_has_blank, first_indent)) =
list_item(input, None)?;
let mut items = vec![(
first_marker,
first_content,
first_has_blank,
false,
first_indent,
)];
const MAX_ITEMS: usize = 1000;
let mut item_count = 1;
let mut last_offset = 0;
loop {
item_count += 1;
if item_count > MAX_ITEMS {
log::warn!("List exceeded MAX_ITEMS");
break;
}
if remaining.fragment().is_empty() {
break;
}
let mut has_blank_before_next = false;
let mut temp_remaining = remaining;
loop {
if temp_remaining.fragment().is_empty() {
remaining = temp_remaining;
break;
}
let first_line_end = temp_remaining
.fragment()
.find('\n')
.unwrap_or(temp_remaining.fragment().len());
let first_line = &temp_remaining.fragment()[..first_line_end];
if first_line.trim().is_empty() {
has_blank_before_next = true;
let skip_len = if first_line_end < temp_remaining.fragment().len() {
first_line_end + 1
} else {
first_line_end
};
let (new_remaining, _) = take(skip_len)(temp_remaining)?;
temp_remaining = new_remaining;
} else {
remaining = temp_remaining;
break;
}
}
let current_offset = remaining.location_offset();
if current_offset == last_offset {
log::error!("List parser stuck at offset {}", current_offset);
break;
}
last_offset = current_offset;
if remaining.fragment().is_empty() {
break;
}
match list_item(remaining, Some(first_marker)) {
Ok((new_remaining, (marker, content, has_blank, item_content_indent))) => {
log::debug!("Parsed list item");
if has_blank_before_next {
let last_idx = items.len() - 1;
items[last_idx].3 = true;
}
items.push((marker, content, has_blank, false, item_content_indent));
remaining = new_remaining;
}
Err(_) => {
log::debug!("Failed to parse next list item");
break;
}
}
}
log::debug!("List parsing complete, {} items", items.len());
Ok((remaining, items))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn smoke_test_detect_bullet_marker() {
let input = Span::new("- Item");
let result = detect_list_marker(input);
assert!(result.is_ok());
let (_, (marker, _)) = result.unwrap();
assert!(matches!(marker, ListMarker::Bullet('-')));
}
#[test]
fn smoke_test_detect_ordered_marker() {
let input = Span::new("1. Item");
let result = detect_list_marker(input);
assert!(result.is_ok());
let (_, (marker, _)) = result.unwrap();
assert!(matches!(
marker,
ListMarker::Ordered {
number: 1,
delimiter: '.'
}
));
}
#[test]
fn smoke_test_list_item_single_line() {
let input = Span::new("- Item content");
let result = list_item(input, None);
assert!(result.is_ok());
let (_, (marker, content, _, _)) = result.unwrap();
assert!(matches!(marker, ListMarker::Bullet('-')));
assert!(content.fragment().contains("Item content"));
}
#[test]
fn smoke_test_list_item_multiline() {
let input = Span::new("- Line 1\n Line 2");
let result = list_item(input, None);
assert!(result.is_ok());
let (_, (_, content, _, _)) = result.unwrap();
assert!(content.fragment().contains("Line 1"));
assert!(content.fragment().contains("Line 2"));
}
#[test]
fn smoke_test_list_single_item() {
let input = Span::new("- Item");
let result = list(input);
assert!(result.is_ok());
let (_, items) = result.unwrap();
assert_eq!(items.len(), 1);
}
#[test]
fn smoke_test_list_multiple_items() {
let input = Span::new("- Item 1\n- Item 2\n- Item 3");
let result = list(input);
assert!(result.is_ok());
let (_, items) = result.unwrap();
assert_eq!(items.len(), 3);
}
#[test]
fn smoke_test_ordered_list() {
let input = Span::new("1. First\n2. Second\n3. Third");
let result = list(input);
assert!(result.is_ok());
let (_, items) = result.unwrap();
assert_eq!(items.len(), 3);
}
#[test]
fn smoke_test_list_with_blank_lines() {
let input = Span::new("- Item 1\n\n- Item 2");
let result = list(input);
assert!(result.is_ok());
let (_, items) = result.unwrap();
assert_eq!(items.len(), 2);
assert!(items[0].3); }
#[test]
fn smoke_test_list_lazy_continuation() {
let input = Span::new("- Item 1\nLazy line\n- Item 2");
let result = list(input);
assert!(result.is_ok());
let (_, items) = result.unwrap();
assert_eq!(items.len(), 2);
assert!(items[0].1.fragment().contains("Lazy line"));
}
#[test]
fn smoke_test_detect_marker_fails_without_space() {
let input = Span::new("-Item");
let result = detect_list_marker(input);
assert!(result.is_err());
}
}