use crate::ast::node::TextSpan;
use crate::ast::value::PropertyValue;
const BOLD_WEIGHT: &str = "700";
const HIGHLIGHT_DEFAULT: &str = "#fff59d";
#[derive(Debug, Clone, Default, PartialEq, Eq)]
struct MarkSet {
bold: bool,
italic: bool,
underline: bool,
strikethrough: bool,
highlight: bool,
code: bool,
}
impl MarkSet {
fn span(&self, text: String, link: Option<String>) -> TextSpan {
TextSpan {
text,
fill: None,
font_weight: if self.bold {
Some(PropertyValue::Literal(BOLD_WEIGHT.to_owned()))
} else {
None
},
italic: if self.italic { Some(true) } else { None },
underline: if self.underline { Some(true) } else { None },
strikethrough: if self.strikethrough { Some(true) } else { None },
vertical_align: None,
footnote_ref: None,
data_ref: None,
data_format: None,
highlight: if self.highlight {
Some(PropertyValue::Literal(HIGHLIGHT_DEFAULT.to_owned()))
} else {
None
},
code: if self.code { Some(true) } else { None },
link,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Delim {
Bold, Italic, Strikethrough, Underline, Highlight, }
#[derive(Debug, Clone)]
enum Token {
Text(String),
Code(String),
Link(Vec<TextSpan>),
Marker {
delim: Delim,
literal: String,
can_open: bool,
can_close: bool,
role: MarkerRole,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum MarkerRole {
Unresolved,
Open,
Close,
}
pub fn parse_inline_markdown(input: &str) -> Vec<TextSpan> {
let chars: Vec<char> = input.chars().collect();
let link: Option<String> = None;
let mut out: Vec<TextSpan> = Vec::new();
parse_run(&chars, link, &mut out);
out
}
fn parse_run(chars: &[char], link: Option<String>, out: &mut Vec<TextSpan>) {
let mut tokens = lex(chars);
resolve_markers(&mut tokens);
build_spans(&tokens, link, out);
}
fn lex(chars: &[char]) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut buf = String::new();
let mut i: usize = 0;
while i < chars.len() {
let Some(&c) = chars.get(i) else { break };
if c == '\\' {
match chars.get(i + 1) {
Some(&next) if is_escapable(next) => {
buf.push(next);
i += 2;
continue;
}
_ => {
buf.push('\\');
i += 1;
continue;
}
}
}
if c == '`' {
if let Some(end) = find_code_close(chars, i + 1) {
flush_text(&mut buf, &mut tokens);
let raw: String = chars.get(i + 1..end).unwrap_or(&[]).iter().collect();
tokens.push(Token::Code(raw));
i = end + 1;
continue;
}
buf.push('`');
i += 1;
continue;
}
if c == '[' {
if let Some((label, url, next)) = try_parse_link(chars, i) {
flush_text(&mut buf, &mut tokens);
let label_chars: Vec<char> = label.chars().collect();
let mut label_spans: Vec<TextSpan> = Vec::new();
parse_run(&label_chars, Some(url), &mut label_spans);
tokens.push(Token::Link(label_spans));
i = next;
continue;
}
buf.push('[');
i += 1;
continue;
}
if let Some((delim, lit)) = match_two_char(chars, i) {
flush_text(&mut buf, &mut tokens);
let (can_open, can_close) = flanking(chars, i, 2);
tokens.push(Token::Marker {
delim,
literal: lit,
can_open,
can_close,
role: MarkerRole::Unresolved,
});
i += 2;
continue;
}
if c == '*' || c == '_' {
flush_text(&mut buf, &mut tokens);
let (can_open, can_close) = flanking(chars, i, 1);
tokens.push(Token::Marker {
delim: Delim::Italic,
literal: c.to_string(),
can_open,
can_close,
role: MarkerRole::Unresolved,
});
i += 1;
continue;
}
buf.push(c);
i += 1;
}
flush_text(&mut buf, &mut tokens);
tokens
}
fn flush_text(buf: &mut String, tokens: &mut Vec<Token>) {
if !buf.is_empty() {
tokens.push(Token::Text(std::mem::take(buf)));
}
}
fn flanking(chars: &[char], i: usize, width: usize) -> (bool, bool) {
let before = if i == 0 {
None
} else {
chars.get(i - 1).copied()
};
let after = chars.get(i + width).copied();
let followed_by_nonspace = matches!(after, Some(ch) if !ch.is_whitespace());
let preceded_by_nonspace = matches!(before, Some(ch) if !ch.is_whitespace());
(followed_by_nonspace, preceded_by_nonspace)
}
fn resolve_markers(tokens: &mut [Token]) {
let mut open_stack: Vec<usize> = Vec::new();
for idx in 0..tokens.len() {
let (delim, can_open, can_close) = match tokens.get(idx) {
Some(Token::Marker {
delim,
can_open,
can_close,
..
}) => (*delim, *can_open, *can_close),
_ => continue,
};
if can_close
&& let Some(stack_pos) = open_stack.iter().rposition(
|&oi| matches!(tokens.get(oi), Some(Token::Marker { delim: d, .. }) if *d == delim),
)
&& let Some(&open_idx) = open_stack.get(stack_pos)
{
open_stack.truncate(stack_pos);
set_role(tokens, open_idx, MarkerRole::Open);
set_role(tokens, idx, MarkerRole::Close);
continue;
}
if can_open {
open_stack.push(idx);
}
}
for idx in 0..tokens.len() {
if let Some(Token::Marker {
literal,
role: MarkerRole::Unresolved,
..
}) = tokens.get(idx)
{
let lit = literal.clone();
if let Some(slot) = tokens.get_mut(idx) {
*slot = Token::Text(lit);
}
}
}
}
fn set_role(tokens: &mut [Token], idx: usize, new_role: MarkerRole) {
if let Some(Token::Marker { role, .. }) = tokens.get_mut(idx) {
*role = new_role;
}
}
fn build_spans(tokens: &[Token], link: Option<String>, out: &mut Vec<TextSpan>) {
let mut sink = SpanSink::new(link);
let mut stack: Vec<Delim> = Vec::new();
for tok in tokens {
match tok {
Token::Text(t) => {
for ch in t.chars() {
sink.push_char(&stack, ch);
}
}
Token::Code(raw) => {
let mut marks = sink.marks_from_stack(&stack);
marks.code = true;
sink.push_span(marks.span(raw.clone(), sink.link.clone()));
}
Token::Link(spans) => {
for s in spans {
sink.push_span(s.clone());
}
}
Token::Marker { delim, role, .. } => match role {
MarkerRole::Open => stack.push(*delim),
MarkerRole::Close => {
stack.pop();
}
MarkerRole::Unresolved => {}
},
}
}
sink.finish(out);
}
struct SpanSink {
link: Option<String>,
spans: Vec<TextSpan>,
pending_marks: MarkSet,
pending_text: String,
have_pending: bool,
}
impl SpanSink {
fn new(link: Option<String>) -> Self {
SpanSink {
link,
spans: Vec::new(),
pending_marks: MarkSet::default(),
pending_text: String::new(),
have_pending: false,
}
}
fn marks_from_stack(&self, stack: &[Delim]) -> MarkSet {
let mut m = MarkSet::default();
for delim in stack {
match delim {
Delim::Bold => m.bold = true,
Delim::Italic => m.italic = true,
Delim::Strikethrough => m.strikethrough = true,
Delim::Underline => m.underline = true,
Delim::Highlight => m.highlight = true,
}
}
m
}
fn push_char(&mut self, stack: &[Delim], c: char) {
let marks = self.marks_from_stack(stack);
if self.have_pending && marks == self.pending_marks {
self.pending_text.push(c);
} else {
self.flush_pending();
self.pending_marks = marks;
self.pending_text.push(c);
self.have_pending = true;
}
}
fn push_span(&mut self, span: TextSpan) {
if span.text.is_empty() {
return;
}
self.flush_pending();
if let Some(last) = self.spans.last_mut()
&& spans_mergeable(last, &span)
{
last.text.push_str(&span.text);
return;
}
self.spans.push(span);
}
fn flush_pending(&mut self) {
if !self.have_pending {
return;
}
let text = std::mem::take(&mut self.pending_text);
let marks = std::mem::take(&mut self.pending_marks);
self.have_pending = false;
if text.is_empty() {
return;
}
let span = marks.span(text, self.link.clone());
if let Some(last) = self.spans.last_mut()
&& spans_mergeable(last, &span)
{
last.text.push_str(&span.text);
return;
}
self.spans.push(span);
}
fn finish(mut self, out: &mut Vec<TextSpan>) {
self.flush_pending();
out.append(&mut self.spans);
}
}
fn spans_mergeable(a: &TextSpan, b: &TextSpan) -> bool {
a.fill == b.fill
&& a.font_weight == b.font_weight
&& a.italic == b.italic
&& a.underline == b.underline
&& a.strikethrough == b.strikethrough
&& a.vertical_align == b.vertical_align
&& a.footnote_ref == b.footnote_ref
&& a.data_ref == b.data_ref
&& a.data_format == b.data_format
&& a.highlight == b.highlight
&& a.code == b.code
&& a.link == b.link
}
fn is_escapable(c: char) -> bool {
matches!(c, '*' | '_' | '~' | '=' | '+' | '`' | '[' | ']' | '\\')
}
fn match_two_char(chars: &[char], i: usize) -> Option<(Delim, String)> {
let a = *chars.get(i)?;
let b = *chars.get(i + 1)?;
let delim = match (a, b) {
('*', '*') | ('_', '_') => Delim::Bold,
('~', '~') => Delim::Strikethrough,
('+', '+') => Delim::Underline,
('=', '=') => Delim::Highlight,
_ => return None,
};
Some((delim, format!("{a}{b}")))
}
fn find_code_close(chars: &[char], start: usize) -> Option<usize> {
let mut j = start;
while j < chars.len() {
if chars.get(j) == Some(&'`') {
return Some(j);
}
j += 1;
}
None
}
fn try_parse_link(chars: &[char], open: usize) -> Option<(String, String, usize)> {
if chars.get(open) != Some(&'[') {
return None;
}
let mut j = open + 1;
let mut label: Vec<char> = Vec::new();
let mut closed_label: Option<usize> = None;
while j < chars.len() {
match chars.get(j) {
Some(&'\\') => {
if let Some(&next) = chars.get(j + 1) {
label.push('\\');
label.push(next);
j += 2;
continue;
}
label.push('\\');
j += 1;
}
Some(&']') => {
closed_label = Some(j);
break;
}
Some(&ch) => {
label.push(ch);
j += 1;
}
None => break,
}
}
let label_end = closed_label?;
let paren_open = label_end + 1;
if chars.get(paren_open) != Some(&'(') {
return None;
}
let mut k = paren_open + 1;
let mut url: Vec<char> = Vec::new();
let mut closed_url: Option<usize> = None;
while k < chars.len() {
match chars.get(k) {
Some(&')') => {
closed_url = Some(k);
break;
}
Some(&ch) => {
url.push(ch);
k += 1;
}
None => break,
}
}
let url_end = closed_url?;
Some((
label.into_iter().collect(),
url.into_iter().collect(),
url_end + 1,
))
}
#[cfg(test)]
mod tests {
use super::*;
fn texts(spans: &[TextSpan]) -> String {
spans.iter().map(|s| s.text.as_str()).collect()
}
fn bold() -> Option<PropertyValue> {
Some(PropertyValue::Literal(BOLD_WEIGHT.to_owned()))
}
fn hl() -> Option<PropertyValue> {
Some(PropertyValue::Literal(HIGHLIGHT_DEFAULT.to_owned()))
}
#[test]
fn empty_input_yields_no_spans() {
assert!(parse_inline_markdown("").is_empty());
}
#[test]
fn plain_text_single_span() {
let s = parse_inline_markdown("hello world");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "hello world");
assert_eq!(s[0].font_weight, None);
assert_eq!(s[0].italic, None);
}
#[test]
fn bold_star_and_underscore() {
for src in ["**bold**", "__bold__"] {
let s = parse_inline_markdown(src);
assert_eq!(s.len(), 1, "src={src}");
assert_eq!(s[0].text, "bold");
assert_eq!(s[0].font_weight, bold());
}
}
#[test]
fn italic_star_and_underscore() {
for src in ["*it*", "_it_"] {
let s = parse_inline_markdown(src);
assert_eq!(s.len(), 1, "src={src}");
assert_eq!(s[0].text, "it");
assert_eq!(s[0].italic, Some(true));
assert_eq!(s[0].font_weight, None);
}
}
#[test]
fn strikethrough() {
let s = parse_inline_markdown("~~gone~~");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "gone");
assert_eq!(s[0].strikethrough, Some(true));
}
#[test]
fn underline() {
let s = parse_inline_markdown("++under++");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "under");
assert_eq!(s[0].underline, Some(true));
}
#[test]
fn highlight_uses_default_color() {
let s = parse_inline_markdown("==mark==");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "mark");
assert_eq!(s[0].highlight, hl());
}
#[test]
fn code_span_basic() {
let s = parse_inline_markdown("`fn main()`");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "fn main()");
assert_eq!(s[0].code, Some(true));
}
#[test]
fn code_span_is_verbatim_no_inner_parsing() {
let s = parse_inline_markdown("`**not bold** \\n _x_`");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "**not bold** \\n _x_");
assert_eq!(s[0].code, Some(true));
assert_eq!(s[0].font_weight, None);
assert_eq!(s[0].italic, None);
}
#[test]
fn nested_bold_italic_single_span() {
let s = parse_inline_markdown("**_bold italic_**");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "bold italic");
assert_eq!(s[0].font_weight, bold());
assert_eq!(s[0].italic, Some(true));
}
#[test]
fn nested_highlight_bold() {
let s = parse_inline_markdown("==**important**==");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "important");
assert_eq!(s[0].highlight, hl());
assert_eq!(s[0].font_weight, bold());
}
#[test]
fn partial_nesting_splits_spans() {
let s = parse_inline_markdown("a **b _c_ d** e");
assert_eq!(texts(&s), "a b c d e");
let joined: Vec<(&str, bool, bool)> = s
.iter()
.map(|x| {
(
x.text.as_str(),
x.font_weight.is_some(),
x.italic == Some(true),
)
})
.collect();
assert_eq!(
joined,
vec![
("a ", false, false),
("b ", true, false),
("c", true, true),
(" d", true, false),
(" e", false, false),
]
);
}
#[test]
fn escapes_emit_literals() {
let s = parse_inline_markdown(r##"\*not italic\* \_ \~ \= \+ \` \[ \] \\"##);
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, r##"*not italic* _ ~ = + ` [ ] \"##);
assert_eq!(s[0].italic, None);
assert_eq!(s[0].font_weight, None);
}
#[test]
fn backslash_before_normal_char_is_literal() {
let s = parse_inline_markdown(r##"a\b"##);
assert_eq!(texts(&s), r##"a\b"##);
}
#[test]
fn link_plain_label() {
let s = parse_inline_markdown("[Zenith](https://example.com)");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "Zenith");
assert_eq!(s[0].link.as_deref(), Some("https://example.com"));
}
#[test]
fn link_label_with_inner_marks() {
let s = parse_inline_markdown("[**bold** link](u)");
assert_eq!(texts(&s), "bold link");
for sp in &s {
assert_eq!(sp.link.as_deref(), Some("u"));
}
assert_eq!(s[0].text, "bold");
assert_eq!(s[0].font_weight, bold());
assert_eq!(s[1].text, " link");
assert_eq!(s[1].font_weight, None);
}
#[test]
fn link_url_is_verbatim() {
let s = parse_inline_markdown("[x](http://a/**b**)");
assert_eq!(s.len(), 1);
assert_eq!(s[0].link.as_deref(), Some("http://a/**b**"));
}
#[test]
fn bracket_without_link_is_literal() {
let s = parse_inline_markdown("[just text]");
assert_eq!(texts(&s), "[just text]");
assert!(s.iter().all(|sp| sp.link.is_none()));
}
#[test]
fn bracket_with_label_but_no_paren_is_literal() {
let s = parse_inline_markdown("[label] (noturl)");
assert_eq!(texts(&s), "[label] (noturl)");
assert!(s.iter().all(|sp| sp.link.is_none()));
}
#[test]
fn dangling_bold_is_literal() {
let s = parse_inline_markdown("**oops");
assert_eq!(texts(&s), "**oops");
assert!(s.iter().all(|sp| sp.font_weight.is_none()));
}
#[test]
fn lone_star_is_literal() {
let s = parse_inline_markdown("a * b");
assert_eq!(texts(&s), "a * b");
assert!(s.iter().all(|sp| sp.italic.is_none()));
}
#[test]
fn unmatched_closing_underscore_is_literal() {
let s = parse_inline_markdown("end_");
assert_eq!(texts(&s), "end_");
assert!(s.iter().all(|sp| sp.italic.is_none()));
}
#[test]
fn whitespace_flanked_double_delim_is_literal_in_place() {
let s = parse_inline_markdown("a ** b");
assert_eq!(texts(&s), "a ** b");
assert!(s.iter().all(|sp| sp.font_weight.is_none()));
}
#[test]
fn dangling_opener_emits_literal_in_original_position() {
let s = parse_inline_markdown("x *unclosed");
assert_eq!(texts(&s), "x *unclosed");
assert!(s.iter().all(|sp| sp.italic.is_none()));
let joined = texts(&s);
let star = joined.find('*').expect("literal star present");
assert!(joined[star + 1..].starts_with("unclosed"));
}
#[test]
fn opener_needs_following_nonspace() {
let s = parse_inline_markdown("* a*");
assert_eq!(texts(&s), "* a*");
assert!(s.iter().all(|sp| sp.italic.is_none()));
}
#[test]
fn closer_needs_preceding_nonspace() {
let s = parse_inline_markdown("*a *");
assert_eq!(texts(&s), "*a *");
assert!(s.iter().all(|sp| sp.italic.is_none()));
}
#[test]
fn same_delim_nested_pairs_keep_marks() {
let s = parse_inline_markdown("**a **b** c**");
assert_eq!(texts(&s), "a b c");
assert!(s.iter().all(|sp| sp.font_weight == bold()));
}
#[test]
fn no_character_loss_consumes_only_delimiters() {
let s = parse_inline_markdown("**a** _b_ ~~c~~ ++d++ ==e==");
assert_eq!(texts(&s), "a b c d e");
}
#[test]
fn no_character_loss_with_escapes() {
let s = parse_inline_markdown(r##"x \* y"##);
assert_eq!(texts(&s), "x * y");
}
#[test]
fn determinism_parse_twice_equal() {
let src = "a **b _c_** ~~d~~ `e` [f](g) ==h== \\* ++i++";
let a = parse_inline_markdown(src);
let b = parse_inline_markdown(src);
assert_eq!(a, b);
}
#[test]
fn combined_all_marks() {
let s = parse_inline_markdown("==++~~**_x_**~~++==");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "x");
assert_eq!(s[0].highlight, hl());
assert_eq!(s[0].underline, Some(true));
assert_eq!(s[0].strikethrough, Some(true));
assert_eq!(s[0].font_weight, bold());
assert_eq!(s[0].italic, Some(true));
}
#[test]
fn code_inside_text_run() {
let s = parse_inline_markdown("use `cargo build` now");
assert_eq!(texts(&s), "use cargo build now");
assert_eq!(s[0].text, "use ");
assert_eq!(s[0].code, None);
assert_eq!(s[1].text, "cargo build");
assert_eq!(s[1].code, Some(true));
assert_eq!(s[2].text, " now");
}
#[test]
fn unclosed_code_is_literal_backtick() {
let s = parse_inline_markdown("a `b c");
assert_eq!(texts(&s), "a `b c");
assert!(s.iter().all(|sp| sp.code.is_none()));
}
#[test]
fn adjacent_same_marks_coalesce() {
let s = parse_inline_markdown("**a****b**");
assert_eq!(s.len(), 1);
assert_eq!(s[0].text, "ab");
assert_eq!(s[0].font_weight, bold());
}
}