use crate::grammar::shared::{count_indentation, Span};
use nom::{
bytes::complete::take_while,
character::complete::{line_ending, not_line_ending},
combinator::opt,
IResult, Input, Parser,
};
use crate::grammar::blocks::cm_list::detect_list_marker;
pub fn paragraph(input: Span) -> IResult<Span, Span> {
log::debug!(
"Parsing paragraph from: {:?}",
crate::logic::logger::safe_preview(input.fragment(), 40)
);
let original_input = input;
let indentation = count_indentation(input.fragment());
if indentation >= 4 {
return Err(nom::Err::Error(nom::error::Error::new(
original_input,
nom::error::ErrorKind::Tag,
)));
}
let (after_ws, _) = take_while(|c| c == ' ' || c == '\t')(original_input)?;
let (after_first, first_line) = not_line_ending(after_ws)?;
if first_line.fragment().chars().all(|c| c == ' ' || c == '\t') {
return Err(nom::Err::Error(nom::error::Error::new(
original_input,
nom::error::ErrorKind::Tag,
)));
}
let mut last_line_end = first_line.location_offset() + first_line.fragment().len();
let (mut input, _) = opt(line_ending).parse(after_first)?;
loop {
let (after_spaces, spaces) =
match take_while::<_, _, nom::error::Error<Span>>(|c| c == ' ')(input) {
Ok(result) => result,
Err(_) => break,
};
if spaces.fragment().len() <= 3 {
let trimmed = after_spaces.fragment().trim_start();
if trimmed.starts_with('#') {
let hash_count = trimmed.chars().take_while(|&c| c == '#').count();
if (1..=6).contains(&hash_count) {
if hash_count == trimmed.len()
|| trimmed
.chars()
.nth(hash_count)
.map(|c| c.is_whitespace())
.unwrap_or(false)
{
break;
}
}
}
if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
let fence_char = trimmed.chars().next().unwrap();
let fence_count = trimmed.chars().take_while(|&c| c == fence_char).count();
if fence_count >= 3 {
break;
}
}
if trimmed.starts_with('>') {
break;
}
if detect_list_marker(after_spaces).is_ok() {
let marker_chars: Vec<char> = trimmed.chars().take(5).collect();
if marker_chars
.first()
.map(|c| *c == '-' || *c == '*' || *c == '+')
.unwrap_or(false)
{
break;
} else if marker_chars
.first()
.map(|c| c.is_ascii_digit())
.unwrap_or(false)
{
if trimmed.starts_with("1.") || trimmed.starts_with("1)") {
break;
}
}
}
}
let (after_line, line) =
match not_line_ending::<Span, nom::error::Error<Span>>(after_spaces) {
Ok(result) => result,
Err(_) => break,
};
if line.fragment().chars().all(|c| c == ' ' || c == '\t') {
break;
}
last_line_end = line.location_offset() + line.fragment().len();
match line_ending::<Span, nom::error::Error<Span>>(after_line) {
Ok((after_newline, _)) => {
input = after_newline;
}
Err(_) => {
input = after_line;
break;
}
}
}
let leading_ws_len = original_input.fragment().len() - after_ws.fragment().len();
let start_offset = original_input.location_offset() + leading_ws_len;
let content_len = last_line_end - start_offset;
let para_span = original_input.take_from(leading_ws_len).take(content_len);
log::debug!(
"Parsed paragraph: {:?}",
crate::logic::logger::safe_preview(para_span.fragment(), 40)
);
Ok((input, para_span))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn smoke_test_paragraph_single_line() {
let input = Span::new("Hello world");
let result = paragraph(input);
assert!(result.is_ok());
let (_, para) = result.unwrap();
assert_eq!(para.fragment(), &"Hello world");
}
#[test]
fn smoke_test_paragraph_multiline() {
let input = Span::new("Line one\nLine two\nLine three");
let result = paragraph(input);
assert!(result.is_ok());
let (_, para) = result.unwrap();
assert_eq!(para.fragment(), &"Line one\nLine two\nLine three");
}
#[test]
fn smoke_test_paragraph_ends_at_blank() {
let input = Span::new("First para\nSecond line\n\nNext para");
let result = paragraph(input);
assert!(result.is_ok());
let (remaining, para) = result.unwrap();
assert_eq!(para.fragment(), &"First para\nSecond line");
assert!(remaining.fragment().trim_start().starts_with("Next para"));
}
#[test]
fn smoke_test_paragraph_with_leading_spaces() {
let input = Span::new(" Indented para\n Continued");
let result = paragraph(input);
assert!(result.is_ok());
let (_, para) = result.unwrap();
assert_eq!(para.fragment(), &"Indented para\n Continued");
}
#[test]
fn smoke_test_paragraph_interrupted_by_heading() {
let input = Span::new("Para text\n# Heading");
let result = paragraph(input);
assert!(result.is_ok());
let (remaining, para) = result.unwrap();
assert_eq!(para.fragment(), &"Para text");
assert!(remaining.fragment().starts_with("# Heading"));
}
#[test]
fn smoke_test_paragraph_interrupted_by_fence() {
let input = Span::new("Para text\n```\ncode\n```");
let result = paragraph(input);
assert!(result.is_ok());
let (remaining, para) = result.unwrap();
assert_eq!(para.fragment(), &"Para text");
assert!(remaining.fragment().starts_with("```"));
}
#[test]
fn smoke_test_paragraph_fails_with_4_spaces() {
let input = Span::new(" Code block");
let result = paragraph(input);
assert!(result.is_err());
}
#[test]
fn smoke_test_paragraph_lazy_continuation() {
let input = Span::new("First line\n Lazy indented\nThird line");
let result = paragraph(input);
assert!(result.is_ok());
let (_, para) = result.unwrap();
assert!(para.fragment().contains("Lazy indented"));
}
}