use crate::grammar::shared::Span;
use nom::{
branch::alt,
bytes::complete::take_while,
character::complete::{char as nom_char, line_ending, not_line_ending},
combinator::opt,
IResult, Input, Parser,
};
pub fn fenced_code_block(input: Span) -> IResult<Span, (Option<String>, Span)> {
log::debug!(
"Parsing fenced code block from: {:?}",
crate::logic::logger::safe_preview(input.fragment(), 20)
);
let original_input = input;
let (input, leading_spaces) = take_while(|c| c == ' ').parse(input)?;
if leading_spaces.fragment().len() > 3 {
return Err(nom::Err::Error(nom::error::Error::new(
original_input,
nom::error::ErrorKind::Tag,
)));
}
let (input, fence_char) = alt((nom_char('`'), nom_char('~'))).parse(input)?;
let (input, fence_count) = {
let mut count = 1; let mut current = input;
while let Ok((remaining, _)) = nom_char::<_, nom::error::Error<Span>>(fence_char)(current) {
count += 1;
current = remaining;
}
if count < 3 {
return Err(nom::Err::Error(nom::error::Error::new(
original_input,
nom::error::ErrorKind::Tag,
)));
}
(current, count)
};
let (input, info_line) = not_line_ending(input)?;
let info_string = info_line.fragment().trim();
if fence_char == '`' && info_string.contains('`') {
return Err(nom::Err::Error(nom::error::Error::new(
original_input,
nom::error::ErrorKind::Tag,
)));
}
let language = if !info_string.is_empty() {
Some(
info_string
.split_whitespace()
.next()
.unwrap_or("")
.to_string(),
)
} else {
None
};
let (mut input, _) = line_ending(input)?;
let content_start = input.location_offset();
let mut content_end = content_start;
let mut found_closing = false;
loop {
let check_input = input;
if let Ok((after_spaces, spaces)) =
take_while::<_, _, nom::error::Error<Span>>(|c| c == ' ')(check_input)
{
if spaces.fragment().len() <= 3 {
if let Ok((after_fence_start, _)) =
nom_char::<_, nom::error::Error<Span>>(fence_char)(after_spaces)
{
let mut close_count = 1;
let mut current = after_fence_start;
while let Ok((remaining, _)) =
nom_char::<_, nom::error::Error<Span>>(fence_char)(current)
{
close_count += 1;
current = remaining;
}
if close_count >= fence_count {
if let Ok((after_line, rest)) =
not_line_ending::<_, nom::error::Error<Span>>(current)
{
if rest.fragment().trim().is_empty() {
found_closing = true;
input = after_line;
let _ = opt(line_ending).parse(input)?;
break;
}
}
}
}
}
}
match not_line_ending::<Span, nom::error::Error<Span>>(input) {
Ok((after_line, line)) => {
content_end = line.location_offset() + line.fragment().len();
match line_ending::<Span, nom::error::Error<Span>>(after_line) {
Ok((after_newline, _)) => {
content_end += 1; input = after_newline;
}
Err(_) => {
input = after_line;
break;
}
}
}
Err(_) => {
break;
}
}
}
if !found_closing {
log::debug!("Unclosed fenced code block");
}
let content_len = content_end.saturating_sub(content_start);
let offset_from_original = content_start - original_input.location_offset();
let content_span = if content_len > 0
&& offset_from_original + content_len <= original_input.fragment().len()
{
let mut span = original_input
.take_from(offset_from_original)
.take(content_len);
if span.fragment().ends_with('\n') {
let len = span.fragment().len();
span = span.take(len.saturating_sub(1));
}
span
} else {
original_input.take_from(offset_from_original).take(0usize)
};
log::debug!(
"Parsed fenced code block with language={:?}, content length={}",
language,
content_span.fragment().len()
);
Ok((input, (language, content_span)))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn smoke_test_fenced_basic_backticks() {
let input = Span::new("```\ncode\n```\n");
let result = fenced_code_block(input);
assert!(result.is_ok());
let (_, (lang, content)) = result.unwrap();
assert_eq!(lang, None);
assert_eq!(*content.fragment(), "code");
}
#[test]
fn smoke_test_fenced_with_language() {
let input = Span::new("```rust\nfn main() {}\n```\n");
let result = fenced_code_block(input);
assert!(result.is_ok());
let (_, (lang, _)) = result.unwrap();
assert_eq!(lang, Some("rust".to_string()));
}
#[test]
fn smoke_test_fenced_tildes() {
let input = Span::new("~~~\ncode\n~~~\n");
let result = fenced_code_block(input);
assert!(result.is_ok());
}
#[test]
fn smoke_test_fenced_longer_closing() {
let input = Span::new("```\ncode\n`````\n");
let result = fenced_code_block(input);
assert!(result.is_ok());
}
#[test]
fn smoke_test_fenced_unclosed() {
let input = Span::new("```\ncode\n");
let result = fenced_code_block(input);
assert!(result.is_ok());
let (_, (_, content)) = result.unwrap();
assert_eq!(*content.fragment(), "code");
}
#[test]
fn smoke_test_fenced_nested_fences() {
let input = Span::new("````\n```\ncode\n```\n````\n");
let result = fenced_code_block(input);
assert!(result.is_ok());
let (_, (_, content)) = result.unwrap();
assert!(content.fragment().contains("```"));
}
#[test]
fn smoke_test_fenced_less_than_three_fails() {
let input = Span::new("``\ncode\n``\n");
let result = fenced_code_block(input);
assert!(result.is_err());
}
#[test]
fn smoke_test_fenced_backtick_in_info_fails() {
let input = Span::new("```rust`lang\ncode\n```\n");
let result = fenced_code_block(input);
assert!(result.is_err());
}
}