use super::shared::{opt_span, GrammarSpan};
use crate::parser::ast::{Node, NodeKind};
use nom::bytes::complete::take;
use nom::IResult;
use nom::Input;
use nom::Parser;
pub fn parse_text(input: GrammarSpan) -> IResult<GrammarSpan, Node> {
let text_fragment = input.fragment();
let next_autolink_literal =
super::gfm_autolink_literal_parser::find_next_autolink_literal_start(text_fragment)
.unwrap_or(text_fragment.len());
let next_emoji_shortcode =
super::marco_emoji_shortcode_parser::find_next_emoji_shortcode_start(text_fragment)
.unwrap_or(text_fragment.len());
let next_platform_mention =
super::marco_platform_mentions_parser::find_next_platform_mention_start(text_fragment)
.unwrap_or(text_fragment.len());
let next_special = text_fragment
.char_indices()
.find_map(|(idx, ch)| match ch {
'*' | '_' | '`' | '[' | '<' | '!' | '&' | '\n' | '\\' | '$' => Some(idx),
'^' | '~' | '˅' => Some(idx),
'=' => {
if text_fragment[idx..].starts_with("==") {
Some(idx)
} else {
None
}
}
'-' => {
if text_fragment[idx..].starts_with("--") {
Some(idx)
} else {
None
}
}
_ => None,
})
.unwrap_or(text_fragment.len());
if next_autolink_literal == 0 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
if next_emoji_shortcode == 0 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
if next_platform_mention == 0 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
let next_special = next_special
.min(next_autolink_literal)
.min(next_emoji_shortcode)
.min(next_platform_mention);
if next_special == 0 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
let mut text_len = next_special;
if next_special < text_fragment.len() && text_fragment[next_special..].starts_with('\n') {
let mut trailing_spaces = 0;
for ch in text_fragment[..next_special].chars().rev() {
if ch == ' ' {
trailing_spaces += 1;
} else {
break;
}
}
if trailing_spaces >= 2 {
text_len = next_special - trailing_spaces;
}
}
if text_len == 0 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
let text_content = input.take(text_len);
let rest = input.take_from(text_len);
let span = opt_span(text_content);
let node = Node {
kind: NodeKind::Text(text_content.fragment().to_string()),
span,
children: Vec::new(),
};
Ok((rest, node))
}
pub fn parse_special_as_text(input: GrammarSpan) -> IResult<GrammarSpan, Node> {
let text_fragment = input.fragment();
if text_fragment.is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Eof,
)));
}
let char_len = if text_fragment.starts_with('`') {
text_fragment.chars().take_while(|&c| c == '`').count()
} else {
text_fragment
.chars()
.next()
.map(|c| c.len_utf8())
.unwrap_or(1)
};
let (rest, text_content) = take(char_len).parse(input)?;
let span = opt_span(text_content);
let node = Node {
kind: NodeKind::Text(text_content.fragment().to_string()),
span,
children: Vec::new(),
};
Ok((rest, node))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn smoke_test_parse_text_basic() {
let input = GrammarSpan::new("Hello World*");
let result = parse_text(input);
assert!(result.is_ok(), "Failed to parse plain text");
let (rest, node) = result.unwrap();
assert_eq!(rest.fragment(), &"*");
if let NodeKind::Text(text) = &node.kind {
assert_eq!(text, "Hello World");
} else {
panic!("Expected Text node");
}
}
#[test]
fn smoke_test_parse_text_up_to_special() {
let input = GrammarSpan::new("text with `code`");
let result = parse_text(input);
assert!(result.is_ok());
let (rest, node) = result.unwrap();
assert_eq!(rest.fragment(), &"`code`");
if let NodeKind::Text(text) = &node.kind {
assert_eq!(text, "text with ");
}
}
#[test]
fn smoke_test_parse_text_trailing_spaces() {
let input = GrammarSpan::new("text \n");
let result = parse_text(input);
assert!(result.is_ok());
let (rest, node) = result.unwrap();
assert_eq!(rest.fragment(), &" \n");
if let NodeKind::Text(text) = &node.kind {
assert_eq!(text, "text");
}
}
#[test]
fn smoke_test_parse_text_starts_with_special() {
let input = GrammarSpan::new("*emphasis*");
let result = parse_text(input);
assert!(
result.is_err(),
"Should not parse text starting with special char"
);
}
#[test]
fn smoke_test_parse_special_as_text_asterisk() {
let input = GrammarSpan::new("* not emphasis");
let result = parse_special_as_text(input);
assert!(result.is_ok(), "Failed to parse special as text");
let (rest, node) = result.unwrap();
assert_eq!(rest.fragment(), &" not emphasis");
if let NodeKind::Text(text) = &node.kind {
assert_eq!(text, "*");
}
}
#[test]
fn smoke_test_parse_special_as_text_backticks() {
let input = GrammarSpan::new("```not code");
let result = parse_special_as_text(input);
assert!(result.is_ok());
let (rest, node) = result.unwrap();
assert_eq!(rest.fragment(), &"not code");
if let NodeKind::Text(text) = &node.kind {
assert_eq!(text, "```");
}
}
#[test]
fn smoke_test_parse_text_position() {
let input = GrammarSpan::new("Hello*");
let result = parse_text(input);
assert!(result.is_ok());
let (_, node) = result.unwrap();
assert!(node.span.is_some(), "Text should have position info");
let span = node.span.unwrap();
assert_eq!(span.start.offset, 0);
assert_eq!(span.end.offset, 5); }
}