use std::sync::Arc;
use std::{borrow::Cow, ops::Deref};
use bitflags::bitflags;
use regex::Regex;
use std::sync::LazyLock;
use tree_sitter::{Node, Parser, Tree, TreeCursor};
use unicode_width::UnicodeWidthStr;
pub(crate) struct MdIterator<'a> {
source: &'a str,
cursor: TreeCursor<'a>,
#[expect(dead_code)]
tree: Box<Tree>,
done: bool,
inline_parser: &'a mut Parser,
context: Vec<(usize, MdContainer)>,
depth: usize,
list_item_content_depth: Option<usize>,
}
impl<'a> MdIterator<'a> {
pub fn new(tree: Tree, inline_parser: &'a mut Parser, source: &'a str) -> Self {
let tree = Box::new(tree);
let cursor =
unsafe { std::mem::transmute::<TreeCursor<'_>, TreeCursor<'static>>(tree.walk()) };
MdIterator {
source,
cursor,
tree,
done: false,
inline_parser,
context: Vec::new(),
depth: 0,
list_item_content_depth: None,
}
}
}
impl Iterator for MdIterator<'_> {
type Item = MdSection;
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.done {
return None;
}
let node = self.cursor.node();
if let Some(container) = self.node_to_container(node) {
self.context.push((self.depth, container));
}
if self.cursor.goto_first_child() {
self.depth += 1;
} else {
while !self.cursor.goto_next_sibling() {
if self.cursor.goto_parent() {
self.depth -= 1;
while self.context.last().is_some_and(|(d, _)| *d >= self.depth) {
let popped = self.context.pop();
if let Some((d, MdContainer::ListItem(_))) = popped {
if self.list_item_content_depth == Some(d) {
self.list_item_content_depth = None;
}
}
}
} else {
self.done = true;
break;
}
}
}
if let Some(content) = self.parse_node(node) {
let nesting: Vec<MdContainer> =
self.context.iter().map(|(_, c)| c.clone()).collect();
let list_item_depth = self
.context
.iter()
.filter(|(_, c)| matches!(c, MdContainer::ListItem(_)))
.map(|(d, _)| *d)
.next_back();
let is_list_continuation = if let Some(depth) = list_item_depth {
if self.list_item_content_depth == Some(depth) {
true
} else {
self.list_item_content_depth = Some(depth);
false
}
} else {
false
};
return Some(MdSection {
content,
nesting,
is_list_continuation,
});
}
}
}
}
impl<'a> MdIterator<'a> {
#[expect(clippy::string_slice)] fn parse_node(&mut self, node: Node<'a>) -> Option<MdContent> {
match node.kind() {
"paragraph" => self.parse_paragraph(&node),
"atx_heading" => {
let mut tier = 0;
let mut text = "";
for child in node.children(&mut node.walk()) {
match child.kind() {
"inline" => text = &self.source[child.byte_range()],
"atx_h1_marker" => tier = 1,
"atx_h2_marker" => tier = 2,
"atx_h3_marker" => tier = 3,
"atx_h4_marker" => tier = 4,
"atx_h5_marker" => tier = 5,
"atx_h6_marker" => tier = 6,
_ => {
debug_assert!(false, "heading greater than 6");
}
}
}
Some(MdContent::Header {
tier,
text: text.to_owned(),
})
}
"block_continuation" => {
if let Some(parent) = node.parent() {
if parent.kind() == "block_quote" {
return Some(MdContent::Paragraph(MdParagraph::empty()));
}
}
None
}
"fenced_code_block" => {
let mut language = String::new();
let mut code = String::new();
for child in node.children(&mut node.walk()) {
match child.kind() {
"info_string" => {
language = self.source[child.byte_range()].trim().to_owned();
}
"code_fence_content" => {
code = self.source[child.byte_range()].to_owned();
}
_ => {}
}
}
if code.ends_with('\n') {
code.pop();
}
Some(MdContent::CodeBlock { language, code })
}
"indented_code_block" => {
let code = self.source[node.byte_range()]
.lines()
.map(|line| line.strip_prefix(" ").unwrap_or(line))
.collect::<Vec<_>>()
.join("\n");
Some(MdContent::CodeBlock {
language: String::new(),
code,
})
}
"thematic_break" => Some(MdContent::HorizontalRule),
"pipe_table" => Some(self.parse_table(node)),
_ => None,
}
}
fn parse_table(&mut self, node: Node<'a>) -> MdContent {
let mut header: Vec<Vec<Span>> = Vec::new();
let mut rows: Vec<Vec<Vec<Span>>> = Vec::new();
let mut alignments: Vec<TableAlignment> = Vec::new();
for child in node.children(&mut node.walk()) {
match child.kind() {
"pipe_table_header" => {
header = self.parse_table_row(child);
}
"pipe_table_delimiter_row" => {
alignments = self.parse_table_alignments(child);
}
"pipe_table_row" => {
rows.push(self.parse_table_row(child));
}
_ => {}
}
}
while alignments.len() < header.len() {
alignments.push(TableAlignment::default());
}
MdContent::Table {
header,
rows,
alignments,
}
}
#[expect(clippy::string_slice)]
fn parse_table_row(&mut self, row_node: Node<'a>) -> Vec<Vec<Span>> {
let mut cells: Vec<Vec<Span>> = Vec::new();
for child in row_node.children(&mut row_node.walk()) {
if child.kind() == "pipe_table_cell" {
let cell_text = self.source[child.byte_range()].trim();
if cell_text.is_empty() {
cells.push(Vec::new());
} else if let Some(tree) = self.inline_parser.parse(cell_text, None) {
let mut p = MdParagraph::empty();
p.recurse(tree.root_node(), cell_text, Modifier::default(), 0);
cells.push(detect_bare_urls(p.spans));
} else {
cells.push(vec![Span::new(cell_text.to_owned(), Modifier::default())]);
}
}
}
cells
}
#[expect(clippy::string_slice)]
fn parse_table_alignments(&self, delimiter_node: Node<'a>) -> Vec<TableAlignment> {
let mut alignments = Vec::new();
for child in delimiter_node.children(&mut delimiter_node.walk()) {
if child.kind() == "pipe_table_delimiter_cell" {
let cell_text = &self.source[child.byte_range()];
let starts_colon = cell_text.starts_with(':');
let ends_colon = cell_text.ends_with(':');
let alignment = match (starts_colon, ends_colon) {
(true, true) => TableAlignment::Center,
(false, true) => TableAlignment::Right,
_ => TableAlignment::Left,
};
alignments.push(alignment);
}
}
alignments
}
fn node_to_container(&self, node: Node<'a>) -> Option<MdContainer> {
match node.kind() {
"list" => {
for child in node.children(&mut node.walk()) {
if child.kind() == "list_item" {
return Some(MdContainer::List(self.extract_list_marker(child)));
}
}
Some(MdContainer::List(ListMarker::Unordered(BulletStyle::Dash)))
}
"list_item" => Some(MdContainer::ListItem(self.extract_list_marker(node))),
"block_quote" => Some(MdContainer::Blockquote(BlockquoteMarker)),
_ => None,
}
}
#[expect(clippy::string_slice)]
fn extract_list_marker(&self, list_item: Node<'a>) -> ListMarker {
let mut first_char = '-';
let mut task: Option<bool> = None;
for child in list_item.children(&mut list_item.walk()) {
match child.kind() {
"list_marker_minus"
| "list_marker_plus"
| "list_marker_star"
| "list_marker_dot"
| "list_marker_parenthesis" => {
let marker_text = self.source[child.byte_range()].trim();
first_char = marker_text.chars().next().unwrap_or('-');
}
"task_list_marker_checked" => {
task = Some(true);
}
"task_list_marker_unchecked" => {
task = Some(false);
}
_ => {}
}
}
let bullet = BulletStyle::from_char(first_char).unwrap_or(BulletStyle::Dash);
match task {
Some(true) => ListMarker::TaskChecked(bullet),
Some(false) => ListMarker::TaskUnchecked(bullet),
None if first_char.is_ascii_digit() => {
let num: u32 = self.source[list_item.byte_range()]
.chars()
.take_while(|c| c.is_ascii_digit())
.fold(0_u32, |acc, c| {
acc.saturating_mul(10)
.saturating_add(c.to_digit(10).unwrap_or(0))
});
ListMarker::Ordered(if num == 0 { 1 } else { num })
}
None => ListMarker::Unordered(bullet),
}
}
fn parse_paragraph(&mut self, node: &Node<'_>) -> Option<MdContent> {
#[expect(clippy::string_slice)]
let text = &self.source[node.byte_range()];
if text.trim().is_empty() {
return None;
}
let Some(tree) = self.inline_parser.parse(text, None) else {
return Some(MdContent::Paragraph(MdParagraph::from(text)));
};
let blockquote_depth = self
.context
.iter()
.filter(|(_, c)| matches!(c, MdContainer::Blockquote(_)))
.count();
MdParagraph::from_inline(tree.root_node(), text, blockquote_depth)
}
}
bitflags! {
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct Modifier: u32 {
const Emphasis = 1 << 0;
const StrongEmphasis = 1 << 1;
const Code = 1 << 2;
const Link = 1 << 3;
const BareLink = 1 << 4;
const LinkDescription = 1 << 5;
const LinkDescriptionWrapper = 1 << 6;
const LinkURL = 1 << 7;
const LinkURLWrapper = 1 << 8;
const Image = 1 << 9;
const NewLine = 1 << 10;
const BlockquoteBar = 1 << 11;
const ListMarker = 1 << 12;
const TableBorder = 1 << 13;
const HorizontalRule = 1 << 14;
const EmphasisWrapper = 1 << 15;
const StrongEmphasisWrapper = 1 << 16;
const CodeWrapper = 1 << 17;
const Strikethrough = 1 << 18;
const StrikethroughWrapper = 1 << 19;
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Span {
pub content: String,
pub modifiers: Modifier,
pub source_content: Option<SourceContent>,
}
impl Span {
pub fn new(content: String, extra: Modifier) -> Self {
Span {
content,
modifiers: extra,
source_content: None,
}
}
pub fn link(content: String, modifiers: Modifier, url: Option<SourceContent>) -> Self {
debug_assert!(
modifiers.contains(Modifier::LinkURL),
"link requires LinkURL"
);
let source_content = url.unwrap_or_else(|| SourceContent::from(content.as_ref()));
Span {
content,
modifiers,
source_content: Some(source_content),
}
}
#[cfg(test)]
pub fn source_link(
content: String,
modifiers: Modifier,
source_content: SourceContent,
) -> Self {
Span {
content,
modifiers,
source_content: Some(source_content),
}
}
#[cfg(test)]
pub fn test_link(description: &str, url: &str) -> Vec<Self> {
let source_content = SourceContent::from(url);
vec![
Self::new("[".to_owned(), Modifier::Link),
Self::new(description.to_owned(), Modifier::Link),
Self::new("]".to_owned(), Modifier::Link),
Self::new("(".to_owned(), Modifier::Link),
Self::source_link(
url.to_owned(),
Modifier::Link | Modifier::LinkURL,
source_content,
),
Self::new(")".to_owned(), Modifier::Link),
]
}
}
impl From<String> for Span {
fn from(value: String) -> Self {
Span {
content: value,
modifiers: Modifier::default(),
source_content: None,
}
}
}
#[cfg(test)]
impl From<&str> for Span {
fn from(value: &str) -> Self {
Self::from(value.to_owned())
}
}
#[cfg(test)]
impl std::fmt::Display for Span {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.content)
}
}
impl UnicodeWidthStr for Span {
fn width(&self) -> usize {
self.content.width()
}
fn width_cjk(&self) -> usize {
self.content.width_cjk()
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct SourceContent(Arc<str>);
impl From<&str> for SourceContent {
fn from(value: &str) -> Self {
Self(Arc::from(value))
}
}
impl Deref for SourceContent {
type Target = Arc<str>;
fn deref(&self) -> &Arc<str> {
&self.0
}
}
#[cfg(test)]
impl std::fmt::Display for SourceContent {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "SourceContent({:?},{})", self.0.as_ptr(), self.0)
}
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum MdContainer {
List(ListMarker),
ListItem(ListMarker),
Blockquote(BlockquoteMarker),
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum ListMarker {
Unordered(BulletStyle),
Ordered(u32),
TaskUnchecked(BulletStyle),
TaskChecked(BulletStyle),
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum BulletStyle {
Dash,
Star,
Plus,
}
impl BulletStyle {
pub fn from_char(c: char) -> Option<Self> {
match c {
'-' => Some(BulletStyle::Dash),
'*' => Some(BulletStyle::Star),
'+' => Some(BulletStyle::Plus),
_ => None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub(crate) enum TableAlignment {
#[default]
Left,
Center,
Right,
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum MdContent {
Paragraph(MdParagraph),
Header {
tier: u8,
text: String,
},
CodeBlock {
language: String,
code: String,
},
HorizontalRule,
Table {
header: Vec<Vec<Span>>,
rows: Vec<Vec<Vec<Span>>>,
alignments: Vec<TableAlignment>,
},
}
impl MdContent {
pub fn is_blank(&self) -> bool {
matches!(self, MdContent::Paragraph(p) if p.is_empty())
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct MdParagraph {
pub backing: String,
pub spans: Vec<Span>,
}
impl MdParagraph {
pub fn is_empty(&self) -> bool {
self.spans.is_empty()
}
fn from_inline(node: Node<'_>, text: &str, blockquote_depth: usize) -> Option<MdContent> {
let mut p = MdParagraph {
backing: String::new(),
spans: Vec::new(),
};
p.recurse(node, text, Modifier::default(), 0);
p.spans = split_newlines(p.spans);
p.spans = detect_bare_urls(p.spans);
p.spans = p
.spans
.into_iter()
.map(|mut s| {
if s.modifiers.contains(Modifier::NewLine) {
s.content = strip_blockquote_prefix(&s.content, blockquote_depth).into_owned();
}
s
})
.filter(|s| {
if s.content.is_empty() {
return s.modifiers.contains(Modifier::NewLine);
}
if s.modifiers.contains(Modifier::NewLine) {
return !is_blockquote_marker_only(s.content.trim());
}
true
})
.collect();
if p.spans.is_empty() {
return None;
}
Some(MdContent::Paragraph(p))
}
#[expect(clippy::string_slice)]
pub(crate) fn recurse(
&mut self,
node: Node<'_>,
source: &str,
extra: Modifier,
_depth: i32,
) -> Option<SourceContent> {
let kind = node.kind();
if kind.contains("delimiter") {
return None;
}
let current_extra = match kind {
"emphasis" => Modifier::Emphasis,
"strong_emphasis" => Modifier::StrongEmphasis,
"strikethrough" => Modifier::Strikethrough,
"code_span" => {
let content = &source[node.byte_range()];
let stripped = content.trim_start_matches('`').trim_end_matches('`').trim(); self.backing.push_str(stripped);
self.spans
.push(Span::new(stripped.to_owned(), extra.union(Modifier::Code)));
return None;
}
"hard_line_break" | "soft_break" => {
self.spans
.push(Span::new(String::new(), extra.union(Modifier::Code)));
return None;
}
"[" | "]" => Modifier::LinkDescriptionWrapper,
"(" | ")" => Modifier::LinkURLWrapper,
"link_text" => Modifier::LinkDescription,
"inline_link" => Modifier::Link,
"image" => Modifier::Image,
"image_description" => Modifier::LinkDescription,
"link_destination" => {
let url = source[node.byte_range()].to_owned();
let source_content = SourceContent::from(url.as_ref());
self.backing.push_str(&url);
self.spans.push(Span::link(
url,
extra.union(Modifier::LinkURL),
Some(source_content.clone()),
));
return Some(source_content);
}
_ => Modifier::default(),
};
let extra = extra.union(current_extra);
if node.child_count() == 0 {
self.backing
.push_str(&source[node.start_byte()..node.end_byte()]);
self.spans.push(Span::new(
source[node.start_byte()..node.end_byte()].to_owned(),
extra,
));
return None;
}
let mut pos = node.start_byte();
for child in node.children(&mut node.walk()) {
if is_punctuation(child.kind(), current_extra) {
continue;
}
if child.start_byte() > pos {
self.spans
.push(Span::new(source[pos..child.start_byte()].to_owned(), extra));
}
let source_content = self.recurse(child, source, extra, _depth + 1);
if let Some(source_content) = source_content {
if let Some(desc) = self.spans.iter_mut().rev().find(|span| {
span.modifiers.contains(Modifier::LinkDescription)
&& !span.modifiers.contains(Modifier::Image)
}) {
desc.source_content = Some(source_content);
}
}
pos = child.end_byte();
}
if pos < node.end_byte() {
self.backing.push_str(&source[pos..node.end_byte()]);
self.spans
.push(Span::new(source[pos..node.end_byte()].to_owned(), extra));
}
None
}
fn empty() -> MdParagraph {
Self {
backing: String::new(),
spans: Vec::new(),
}
}
}
impl From<&str> for MdParagraph {
fn from(value: &str) -> Self {
let owned = value.to_owned();
Self {
backing: owned.clone(),
spans: vec![Span::new(owned, Modifier::default())],
}
}
}
#[derive(Debug, Default, Clone, PartialEq)]
pub(crate) struct BlockquoteMarker;
#[derive(Debug)]
pub(crate) struct MdSection {
pub content: MdContent,
pub nesting: Vec<MdContainer>,
pub is_list_continuation: bool,
}
fn strip_blockquote_prefix(s: &str, depth: usize) -> Cow<'_, str> {
if depth == 0 {
return Cow::Borrowed(s);
}
let mut remaining = s;
for _ in 0..depth {
if let Some(rest) = remaining.strip_prefix("> ") {
remaining = rest;
} else if let Some(rest) = remaining.strip_prefix(">") {
remaining = rest;
} else {
break;
}
}
if remaining.len() == s.len() {
Cow::Borrowed(s)
} else {
Cow::Owned(remaining.to_owned())
}
}
fn is_blockquote_marker_only(s: &str) -> bool {
if s.is_empty() {
return false;
}
let mut chars = s.chars().peekable();
let mut has_space = false;
while let Some(c) = chars.next() {
if c == '>' {
if chars.peek() == Some(&' ') {
chars.next();
has_space = true;
}
} else {
return false;
}
}
has_space
}
#[inline]
fn is_punctuation(kind: &str, parent_modifier: Modifier) -> bool {
match kind {
"(" | ")" | "[" | "]" if parent_modifier == Modifier::Link => false,
"!" | "\"" | "#" | "$" | "%" | "&" | "'" | "(" | ")" | "*" | "+" | "," | "-" | "."
| "/" | ":" | ";" | "<" | "=" | ">" | "?" | "@" | "[" | "\\" | "]" | "^" | "_" | "`"
| "{" | "|" | "}" | "~" => true,
_ => false,
}
}
#[expect(clippy::unwrap_used)]
static URL_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"https?://[^\s<>\[\]()]+").unwrap());
fn detect_bare_urls(mdspans: Vec<Span>) -> Vec<Span> {
let mut result = Vec::with_capacity(mdspans.len());
for span in mdspans {
if span
.modifiers
.intersects(Modifier::Link | Modifier::LinkURL | Modifier::Code)
{
result.push(span);
continue;
}
let mut last_end = 0;
let content = &span.content;
let mut found_urls = false;
let mut first_emitted = false;
let base_modifiers = span.modifiers.difference(Modifier::NewLine);
for mat in URL_REGEX.find_iter(content) {
found_urls = true;
if mat.start() > last_end {
let mods = if first_emitted {
base_modifiers
} else {
first_emitted = true;
span.modifiers
};
#[expect(clippy::string_slice)]
result.push(Span::new(content[last_end..mat.start()].to_owned(), mods));
}
let wrapper_mods = if first_emitted {
base_modifiers | Modifier::LinkURLWrapper
} else {
first_emitted = true;
span.modifiers | Modifier::LinkURLWrapper
};
result.push(Span::new("(".to_owned(), wrapper_mods));
let url = mat.as_str().to_owned();
result.push(Span::link(
url,
base_modifiers | Modifier::LinkURL | Modifier::BareLink,
None,
));
result.push(Span::new(
")".to_owned(),
base_modifiers | Modifier::LinkURLWrapper,
));
last_end = mat.end();
}
if found_urls {
#[expect(clippy::string_slice)]
if last_end < content.len() {
result.push(Span::new(content[last_end..].to_owned(), base_modifiers));
}
} else {
result.push(span);
}
}
result
}
fn split_newlines(mdspans: Vec<Span>) -> Vec<Span> {
let mut result = Vec::with_capacity(mdspans.len());
let mut trailing_newline = false;
for mut mdspan in mdspans {
if trailing_newline {
mdspan.modifiers = mdspan.modifiers.union(Modifier::NewLine);
trailing_newline = false;
}
if mdspan.content.is_empty() && mdspan.modifiers.contains(Modifier::NewLine) {
result.push(mdspan);
continue;
}
if !mdspan.content.contains('\n') {
result.push(mdspan);
continue;
}
let mut first = true;
for part in mdspan.content.split('\n') {
if part.is_empty() {
first = false;
trailing_newline = true;
continue;
}
trailing_newline = false;
result.push(Span {
content: part.to_owned(),
modifiers: if first {
first = false;
mdspan.modifiers
} else {
mdspan.modifiers.union(Modifier::NewLine)
},
source_content: mdspan.source_content.clone(),
});
}
}
result
}
#[cfg(test)]
#[expect(clippy::unwrap_used)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn split_no_empty_spans() {
let mdspans = split_newlines(vec![
Span::new("one line".to_owned(), Modifier::default()),
Span::new(".".to_owned(), Modifier::default()),
Span::new("\nanother line".to_owned(), Modifier::NewLine),
Span::new(".".to_owned(), Modifier::default()),
]);
assert_eq!(
mdspans,
vec![
Span::new("one line".to_owned(), Modifier::default()),
Span::new(".".to_owned(), Modifier::default()),
Span::new("another line".to_owned(), Modifier::NewLine),
Span::new(".".to_owned(), Modifier::default()),
]
);
}
fn make_parser() -> Parser {
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_md::LANGUAGE.into())
.unwrap();
parser
}
fn make_inline_parser() -> Parser {
let mut inline_parser = Parser::new();
inline_parser
.set_language(&tree_sitter_md::INLINE_LANGUAGE.into())
.unwrap();
inline_parser
}
#[test]
fn blockquote_blank_lines() {
let mut parser = make_parser();
let mut inline_parser = make_inline_parser();
let source = r#"> First paragraph
>
> Second paragraph"#;
let tree = parser.parse(source, None).unwrap();
let sections: Vec<_> = MdIterator::new(tree, &mut inline_parser, source).collect();
assert_eq!(sections.len(), 3);
assert!(!sections[0].content.is_blank());
assert!(sections[1].content.is_blank());
assert!(!sections[2].content.is_blank());
}
#[test]
fn parse_header() {
let mut parser = make_parser();
let mut inline_parser = make_inline_parser();
let source = "# Hello\n";
let tree = parser.parse(source, None).unwrap();
let sections: Vec<_> = MdIterator::new(tree, &mut inline_parser, source).collect();
assert_eq!(sections.len(), 1);
assert!(matches!(
sections[0].content,
MdContent::Header { tier: 1, .. }
));
}
#[test]
fn detect_bare_url() {
let spans = vec![Span::new(
"Check https://example.com for more.".to_owned(),
Modifier::default(),
)];
let result = detect_bare_urls(spans);
assert_eq!(result.len(), 5);
assert_eq!(result[0].content, "Check ");
assert!(!result[0].modifiers.contains(Modifier::LinkURL));
assert_eq!(result[1].content, "(");
assert!(result[1].modifiers.contains(Modifier::LinkURLWrapper));
assert_eq!(result[2].content, "https://example.com");
assert!(result[2].modifiers.contains(Modifier::LinkURL));
assert_eq!(result[3].content, ")");
assert!(result[3].modifiers.contains(Modifier::LinkURLWrapper));
assert_eq!(result[4].content, " for more.");
assert!(!result[4].modifiers.contains(Modifier::LinkURL));
}
#[test]
fn detect_bare_url_preserves_existing_modifiers() {
let spans = vec![Span::new(
"See https://example.com now".to_owned(),
Modifier::Emphasis,
)];
let result = detect_bare_urls(spans);
assert_eq!(result.len(), 5);
assert!(result[0].modifiers.contains(Modifier::Emphasis));
assert!(result[1].modifiers.contains(Modifier::Emphasis));
assert!(result[1].modifiers.contains(Modifier::LinkURLWrapper));
assert!(result[2].modifiers.contains(Modifier::Emphasis));
assert!(result[2].modifiers.contains(Modifier::LinkURL));
assert!(result[3].modifiers.contains(Modifier::Emphasis));
assert!(result[3].modifiers.contains(Modifier::LinkURLWrapper));
assert!(result[4].modifiers.contains(Modifier::Emphasis));
}
#[test]
fn detect_bare_url_skips_existing_links() {
let spans = vec![Span::new(
"https://example.com".to_owned(),
Modifier::Link | Modifier::LinkURL,
)];
let result = detect_bare_urls(spans.clone());
assert_eq!(result, spans);
}
#[test]
fn detect_bare_url_skips_code() {
let spans = vec![Span::new("https://example.com".to_owned(), Modifier::Code)];
let result = detect_bare_urls(spans.clone());
assert_eq!(result, spans);
}
#[test]
fn angle_bracket_url_preserved() {
let spans = vec![Span::new(
"<http://www.example.com>".to_owned(),
Modifier::default(),
)];
let result = detect_bare_urls(spans);
assert_eq!(result.len(), 5);
assert_eq!(result[0].content, "<");
assert_eq!(result[1].content, "(");
assert!(result[1].modifiers.contains(Modifier::LinkURLWrapper));
assert_eq!(result[2].content, "http://www.example.com");
assert!(result[2].modifiers.contains(Modifier::LinkURL));
assert_eq!(result[3].content, ")");
assert!(result[3].modifiers.contains(Modifier::LinkURLWrapper));
assert_eq!(result[4].content, ">");
}
#[test]
fn image_contained() {
let mut parser = make_parser();
let mut inline_parser = make_inline_parser();
let source = "\n";
let tree = parser.parse(source, None).unwrap();
let first = MdIterator::new(tree, &mut inline_parser, source)
.next()
.unwrap();
let MdContent::Paragraph(MdParagraph { spans, .. }) = first.content else {
panic!("expected paragraph");
};
assert_eq!(spans[0], Span::new("![".to_owned(), Modifier::Image));
assert_eq!(
spans[1],
Span {
content: "text".to_owned(),
modifiers: Modifier::Image | Modifier::LinkDescription,
source_content: None,
}
);
assert_eq!(spans[2], Span::new("](".to_owned(), Modifier::Image));
assert_eq!(
spans[3],
Span {
content: "url".to_owned(),
modifiers: Modifier::Image | Modifier::LinkURL,
source_content: Some(SourceContent::from("url")),
}
);
assert_eq!(spans[4], Span::new(")".to_owned(), Modifier::Image));
}
}