use crate::attrs::ParsedAttrs;
use crate::errors::MarkdownError;
use crate::link_index::{OutboundLink, is_internal_link, split_url_anchor};
use crate::link_transform::{LinkTransformConfig, transform_link};
use crate::media::MediaEmbed;
use crate::oembed::PageInfo;
use crate::oembed_cache::OembedCache;
use crate::vid::Vid;
use crate::wikilink::{parse_tag_link, transform_wikilinks};
use pulldown_cmark::{
BlockQuoteKind, CowStr, Event, HeadingLevel, MetadataBlockKind, Options, Parser as MDParser,
Tag, TagEnd, TextMergeStream,
};
use regex::Regex;
use std::{
collections::{HashMap, HashSet},
fs::{self, File},
io::Read,
path::{Path, PathBuf},
sync::Arc,
};
use yaml_rust2::{Yaml, YamlLoader};
pub(crate) fn markdown_options() -> Options {
Options::all()
}
#[derive(Debug, Clone)]
pub struct ParsedDocument {
pub source: String,
pub frontmatter: SimpleMetadata,
pub headings: Vec<HeadingInfo>,
pub has_h1: bool,
pub word_count: usize,
}
impl ParsedDocument {
pub fn events(&self) -> TextMergeStream<'_, MDParser<'_>> {
let parser = MDParser::new_ext(&self.source, markdown_options());
TextMergeStream::new(parser)
}
}
pub fn parse<P: AsRef<Path>>(file: P) -> Result<ParsedDocument, MarkdownError> {
let file = file.as_ref();
let markdown_input = fs::read_to_string(file).map_err(|e| MarkdownError::ReadFailed {
path: file.to_path_buf(),
source: e,
})?;
let (events, headings, _section_attrs) = collect_events_and_headings(&markdown_input);
let has_h1 = headings.first().is_some_and(|h| h.level == 1);
let mut frontmatter = SimpleMetadata::new();
let mut word_count: usize = 0;
let mut in_yaml = false;
let mut in_code_block = false;
let mut in_metadata_block = false;
for event in &events {
match event {
Event::Start(Tag::MetadataBlock(MetadataBlockKind::YamlStyle)) => {
in_yaml = true;
in_metadata_block = true;
}
Event::End(TagEnd::MetadataBlock(MetadataBlockKind::YamlStyle)) => {
in_yaml = false;
in_metadata_block = false;
}
Event::Text(text) if in_yaml => {
let metadata_parsed = YamlLoader::load_from_str(text).map(|ys| ys[0].clone()).ok();
frontmatter = yaml_frontmatter_simplified(&metadata_parsed);
in_yaml = false;
}
Event::Start(Tag::MetadataBlock(_)) => in_metadata_block = true,
Event::End(TagEnd::MetadataBlock(_)) => in_metadata_block = false,
Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
Event::End(TagEnd::CodeBlock) => in_code_block = false,
Event::Text(text) if !in_code_block && !in_metadata_block => {
word_count += text.split_whitespace().count();
}
_ => {}
}
}
if !frontmatter.contains_key("title") && has_h1 {
frontmatter.insert(
"title".to_string(),
serde_json::Value::String(headings[0].text.clone()),
);
}
Ok(ParsedDocument {
source: markdown_input,
frontmatter,
headings,
has_h1,
word_count,
})
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct HeadingInfo {
pub level: u8,
pub text: String,
pub id: String,
}
#[derive(Debug, Clone)]
pub struct MarkdownRenderResult {
pub frontmatter: SimpleMetadata,
pub headings: Vec<HeadingInfo>,
pub html: String,
pub outbound_links: Vec<OutboundLink>,
pub has_h1: bool,
pub word_count: usize,
pub sentence_count: usize,
pub syllable_count: usize,
}
struct EventState {
#[allow(dead_code)] root_path: PathBuf,
current_media: Option<MediaEmbed>,
in_metadata: bool,
in_link: bool, metadata_source: Option<MetadataBlockKind>,
metadata_parsed: Option<Yaml>,
link_transform_config: LinkTransformConfig,
prefetched_oembed: HashMap<String, PageInfo>,
server_mode: bool,
transcode_enabled: bool,
collected_links: Vec<OutboundLink>,
current_link_dest: Option<String>,
current_link_text: String,
valid_tag_sources: HashSet<String>,
word_count: usize,
in_code_block: bool,
sentence_count: usize,
syllable_count: usize,
block_needs_sentence_bump: bool,
}
pub type SimpleMetadata = HashMap<String, serde_json::Value>;
fn count_sentence_terminators(text: &str) -> (usize, bool) {
let bytes = text.as_bytes();
let mut count: usize = 0;
let mut prev_was_terminator = false;
for (i, &b) in bytes.iter().enumerate() {
let is_terminator = matches!(b, b'.' | b'!' | b'?');
if is_terminator && !prev_was_terminator {
let next_is_boundary = bytes[i + 1..]
.iter()
.find(|&&c| !matches!(c, b'.' | b'!' | b'?'))
.is_none_or(|&c| c.is_ascii_whitespace());
if next_is_boundary {
count += 1;
}
}
prev_was_terminator = is_terminator;
}
let ends_with_terminator = text
.trim_end()
.chars()
.next_back()
.is_some_and(|c| matches!(c, '.' | '!' | '?'));
(count, ends_with_terminator)
}
pub fn extract_first_h1(markdown_input: &str) -> Option<String> {
let parser = MDParser::new_ext(markdown_input, Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
let parser = TextMergeStream::new(parser);
let mut in_h1 = false;
let mut h1_text = String::new();
for event in parser {
match event {
Event::Start(Tag::Heading {
level: HeadingLevel::H1,
..
}) => {
in_h1 = true;
}
Event::Text(text) if in_h1 => {
h1_text.push_str(&text);
}
Event::End(TagEnd::Heading(HeadingLevel::H1)) => {
if !h1_text.is_empty() {
return Some(h1_text);
}
in_h1 = false;
}
_ => {}
}
}
None
}
const EM_DASH: &str = "\u{2014}";
fn detect_hint_prefix(text: &str) -> Option<(BlockQuoteKind, &str)> {
let (prefix, kind) = match text.as_bytes().first()? {
b'!' => ("!> ", BlockQuoteKind::Tip),
b'?' => ("?> ", BlockQuoteKind::Warning),
b'x' => ("x> ", BlockQuoteKind::Caution),
_ => return None,
};
text.strip_prefix(prefix).map(|rest| (kind, rest))
}
#[allow(dead_code)]
fn transform_rule_attrs(events: Vec<Event<'_>>) -> (Vec<Event<'_>>, HashMap<usize, ParsedAttrs>) {
let mut result = Vec::with_capacity(events.len());
let mut section_attrs = HashMap::new();
let mut section_index = 0;
let mut i = 0;
while i < events.len() {
if i + 2 < events.len()
&& let (Event::Start(Tag::Paragraph), Event::Text(text), Event::End(TagEnd::Paragraph)) =
(&events[i], &events[i + 1], &events[i + 2])
&& text.starts_with(EM_DASH)
&& let Some(attrs_str) = text.strip_prefix(EM_DASH)
&& attrs_str.starts_with(" {")
&& attrs_str.ends_with('}')
&& let Some(attrs) = ParsedAttrs::parse(attrs_str.trim())
{
result.push(Event::Rule);
section_index += 1;
section_attrs.insert(section_index, attrs);
i += 3; continue;
}
if matches!(&events[i], Event::Rule) {
section_index += 1;
}
result.push(events[i].clone());
i += 1;
}
(result, section_attrs)
}
fn collect_events_and_headings(
markdown_input: &str,
) -> (
Vec<Event<'_>>,
Vec<HeadingInfo>,
HashMap<usize, ParsedAttrs>,
) {
let parser = MDParser::new_ext(markdown_input, markdown_options());
let parser = TextMergeStream::new(parser);
let mut events = Vec::new();
let mut headings = Vec::new();
let mut anchor_ids: HashMap<String, usize> = HashMap::new();
let mut in_heading_text: Option<String> = None;
let mut section_attrs = HashMap::new();
let mut section_index = 0;
let mut hint_open = false;
for event in parser {
match &event {
Event::Start(Tag::Heading { .. }) => {
in_heading_text = Some(String::new());
events.push(event);
}
Event::Text(text) if in_heading_text.is_some() => {
if let Some(ref mut heading_text) = in_heading_text {
heading_text.push_str(text);
}
events.push(event);
}
Event::Text(text) if matches!(events.last(), Some(Event::Start(Tag::Paragraph))) => {
if let Some((kind, rest)) = detect_hint_prefix(text) {
events.pop(); events.push(Event::Start(Tag::BlockQuote(Some(kind))));
events.push(Event::Start(Tag::Paragraph));
events.push(Event::Text(CowStr::from(rest.to_owned())));
hint_open = true;
continue;
}
events.push(event);
}
Event::End(TagEnd::Heading(heading_level)) => {
if let Some(text) = in_heading_text.take() {
let id = generate_anchor_id(&text, &mut anchor_ids);
let level_num = match heading_level {
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
};
headings.push(HeadingInfo {
level: level_num,
text: text.clone(),
id: id.clone(),
});
for i in (0..events.len()).rev() {
if let Event::Start(Tag::Heading {
level,
id: _,
classes,
attrs,
}) = &events[i]
{
events[i] = Event::Start(Tag::Heading {
level: *level,
id: Some(CowStr::from(id)),
classes: classes.clone(),
attrs: attrs.clone(),
});
break;
}
}
}
events.push(event);
}
Event::End(TagEnd::Paragraph) => {
if hint_open {
events.push(event);
events.push(Event::End(TagEnd::BlockQuote(None)));
hint_open = false;
continue;
}
let len = events.len();
if len >= 2 {
let is_rule_attrs = matches!(
(&events[len - 2], &events[len - 1]),
(Event::Start(Tag::Paragraph), Event::Text(_))
) && {
if let Event::Text(text) = &events[len - 1] {
text.starts_with(EM_DASH)
&& text.strip_prefix(EM_DASH).is_some_and(|rest| {
rest.starts_with(" {") && rest.ends_with('}')
})
} else {
false
}
};
if is_rule_attrs {
let parsed = if let Event::Text(text) = &events[len - 1] {
text.strip_prefix(EM_DASH)
.and_then(|rest| ParsedAttrs::parse(rest.trim()))
} else {
None
};
events.pop(); events.pop();
events.push(Event::Rule);
section_index += 1;
if let Some(attrs) = parsed {
section_attrs.insert(section_index, attrs);
}
continue;
}
}
events.push(event);
}
Event::Rule => {
section_index += 1;
events.push(event);
}
_ => {
events.push(event);
}
}
}
(events, headings, section_attrs)
}
#[allow(clippy::too_many_arguments)]
pub async fn render(
file: PathBuf,
root_path: &Path,
oembed_timeout_ms: u64,
link_transform_config: LinkTransformConfig,
server_mode: bool,
transcode_enabled: bool,
valid_tag_sources: HashSet<String>,
mark_incomplete: bool,
incomplete_markers: &[String],
) -> Result<MarkdownRenderResult, MarkdownError> {
render_with_cache(
file,
root_path,
oembed_timeout_ms,
link_transform_config,
None,
server_mode,
transcode_enabled,
valid_tag_sources,
mark_incomplete,
incomplete_markers,
)
.await
}
#[allow(clippy::too_many_arguments)]
pub async fn render_with_cache(
file: PathBuf,
root_path: &Path,
oembed_timeout_ms: u64,
link_transform_config: LinkTransformConfig,
oembed_cache: Option<Arc<OembedCache>>,
server_mode: bool,
transcode_enabled: bool,
valid_tag_sources: HashSet<String>,
mark_incomplete: bool,
incomplete_markers: &[String],
) -> Result<MarkdownRenderResult, MarkdownError> {
let raw_markdown_input = fs::read_to_string(&file).map_err(|e| MarkdownError::ReadFailed {
path: file.clone(),
source: e,
})?;
let markdown_input = if valid_tag_sources.is_empty() {
raw_markdown_input
} else {
transform_wikilinks(&raw_markdown_input, &valid_tag_sources)
};
let (events_with_ids, headings, section_attrs) = collect_events_and_headings(&markdown_input);
let has_h1 = headings.first().is_some_and(|h| h.level == 1);
let prefetched_oembed = if oembed_timeout_ms > 0 {
prefetch_oembed_urls(&events_with_ids, oembed_timeout_ms, &oembed_cache).await
} else {
HashMap::new()
};
let (processed_events, state) = process_all_events(
events_with_ids,
root_path,
link_transform_config,
prefetched_oembed,
server_mode,
transcode_enabled,
valid_tag_sources,
);
let processed_events = if mark_incomplete {
match build_incomplete_marker_regex(incomplete_markers) {
Some(re) => mark_incomplete_blocks(processed_events, &re),
None => processed_events,
}
} else {
processed_events
};
finalize_render(
processed_events,
state,
section_attrs,
&markdown_input,
headings,
has_h1,
)
}
#[allow(clippy::too_many_arguments)]
fn process_all_events<'a>(
events: Vec<Event<'a>>,
root_path: &Path,
link_transform_config: LinkTransformConfig,
prefetched_oembed: HashMap<String, PageInfo>,
server_mode: bool,
transcode_enabled: bool,
valid_tag_sources: HashSet<String>,
) -> (Vec<Event<'a>>, EventState) {
let mut state = EventState {
root_path: root_path.to_path_buf(),
current_media: None,
in_metadata: false,
in_link: false,
metadata_source: None,
metadata_parsed: None,
link_transform_config,
prefetched_oembed,
server_mode,
transcode_enabled,
collected_links: Vec::new(),
current_link_dest: None,
current_link_text: String::new(),
valid_tag_sources,
word_count: 0,
in_code_block: false,
sentence_count: 0,
syllable_count: 0,
block_needs_sentence_bump: false,
};
let mut processed_events = Vec::with_capacity(events.len());
for event in events {
let (processed, new_state) = process_event(event, state);
state = new_state;
processed_events.push(processed);
}
(processed_events, state)
}
const INCOMPLETE_SPAN_OPEN: &str = "<span class=\"mbr-incomplete\">";
const INCOMPLETE_SPAN_CLOSE: &str = "</span>";
pub(crate) fn build_incomplete_marker_regex(markers: &[String]) -> Option<Regex> {
let parts: Vec<String> = markers
.iter()
.filter(|m| !m.is_empty())
.map(|m| regex::escape(m))
.collect();
if parts.is_empty() {
return None;
}
let pattern = format!("^(?:{})\\b", parts.join("|"));
Regex::new(&pattern).ok()
}
fn mark_incomplete_blocks<'a>(events: Vec<Event<'a>>, marker_re: &Regex) -> Vec<Event<'a>> {
struct Frame {
start_idx: usize,
has_seen_text: bool,
marker_open: bool,
}
let mut output: Vec<Event<'a>> = Vec::with_capacity(events.len());
let mut stack: Vec<Frame> = Vec::new();
for event in events {
match &event {
Event::Start(Tag::Paragraph)
| Event::Start(Tag::Heading { .. })
| Event::Start(Tag::Item)
| Event::Start(Tag::TableCell) => {
let start_idx = output.len();
output.push(event);
stack.push(Frame {
start_idx,
has_seen_text: false,
marker_open: false,
});
}
Event::End(TagEnd::Paragraph)
| Event::End(TagEnd::Heading(_))
| Event::End(TagEnd::Item)
| Event::End(TagEnd::TableCell) => {
if let Some(frame) = stack.pop()
&& frame.marker_open
{
output.push(Event::Html(CowStr::from(INCOMPLETE_SPAN_CLOSE)));
}
output.push(event);
}
Event::Text(text) => {
if let Some(top) = stack.last_mut()
&& !top.has_seen_text
{
top.has_seen_text = true;
if marker_re.is_match(text.trim_start()) {
output.insert(
top.start_idx + 1,
Event::Html(CowStr::from(INCOMPLETE_SPAN_OPEN)),
);
top.marker_open = true;
}
}
output.push(event);
}
_ => {
output.push(event);
}
}
}
output
}
fn finalize_render(
processed_events: Vec<Event<'_>>,
state: EventState,
section_attrs: HashMap<usize, ParsedAttrs>,
markdown_input: &str,
headings: Vec<HeadingInfo>,
has_h1: bool,
) -> Result<MarkdownRenderResult, MarkdownError> {
let mut html_output = String::with_capacity(markdown_input.len() * 2);
let mut seen_targets: HashSet<String> = HashSet::new();
let deduplicated_links: Vec<OutboundLink> = state
.collected_links
.into_iter()
.filter(|link| seen_targets.insert(link.to.clone()))
.collect();
crate::html::push_html_mbr_with_attrs(
&mut html_output,
processed_events.into_iter(),
section_attrs,
);
let mut frontmatter = yaml_frontmatter_simplified(&state.metadata_parsed);
if !frontmatter.contains_key("title")
&& let Some(h1_text) = headings
.first()
.filter(|h| h.level == 1)
.map(|h| h.text.clone())
{
frontmatter.insert("title".to_string(), serde_json::Value::String(h1_text));
}
Ok(MarkdownRenderResult {
frontmatter,
headings,
html: html_output,
outbound_links: deduplicated_links,
has_h1,
word_count: state.word_count,
sentence_count: state.sentence_count,
syllable_count: state.syllable_count,
})
}
#[allow(clippy::too_many_arguments)]
pub fn render_sync(
file: PathBuf,
root_path: &Path,
oembed_timeout_ms: u64,
link_transform_config: LinkTransformConfig,
oembed_cache: Option<Arc<OembedCache>>,
server_mode: bool,
transcode_enabled: bool,
valid_tag_sources: HashSet<String>,
mark_incomplete: bool,
incomplete_markers: &[String],
) -> Result<MarkdownRenderResult, MarkdownError> {
let raw_markdown_input = fs::read_to_string(&file).map_err(|e| MarkdownError::ReadFailed {
path: file.clone(),
source: e,
})?;
let markdown_input = if valid_tag_sources.is_empty() {
raw_markdown_input
} else {
transform_wikilinks(&raw_markdown_input, &valid_tag_sources)
};
let (events_with_ids, headings, section_attrs) = collect_events_and_headings(&markdown_input);
let has_h1 = headings.first().is_some_and(|h| h.level == 1);
let prefetched_oembed = if oembed_timeout_ms > 0 {
if let Some(ref cache) = oembed_cache {
collect_cached_oembed(&events_with_ids, cache)
} else {
HashMap::new()
}
} else {
HashMap::new()
};
let (processed_events, state) = process_all_events(
events_with_ids,
root_path,
link_transform_config,
prefetched_oembed,
server_mode,
transcode_enabled,
valid_tag_sources,
);
let processed_events = if mark_incomplete {
match build_incomplete_marker_regex(incomplete_markers) {
Some(re) => mark_incomplete_blocks(processed_events, &re),
None => processed_events,
}
} else {
processed_events
};
finalize_render(
processed_events,
state,
section_attrs,
&markdown_input,
headings,
has_h1,
)
}
fn collect_cached_oembed(events: &[Event<'_>], cache: &OembedCache) -> HashMap<String, PageInfo> {
let urls = collect_bare_urls(events);
let mut results = HashMap::new();
for url in urls {
if let Some(info) = cache.get(&url) {
results.insert(url, info);
}
}
results
}
fn collect_bare_urls(events: &[Event<'_>]) -> HashSet<String> {
let mut urls = HashSet::new();
let mut in_link = false;
let mut in_metadata = false;
for event in events {
match event {
Event::Start(Tag::Link { .. }) => in_link = true,
Event::End(TagEnd::Link) => in_link = false,
Event::Start(Tag::MetadataBlock(_)) => in_metadata = true,
Event::End(TagEnd::MetadataBlock(_)) => in_metadata = false,
Event::Text(text)
if !in_link
&& !in_metadata
&& text.starts_with("http")
&& !text.contains(' ')
&& !text.trim_start().starts_with("{{") =>
{
urls.insert(text.to_string());
}
_ => {}
}
}
urls
}
async fn prefetch_oembed_urls(
events: &[Event<'_>],
oembed_timeout_ms: u64,
oembed_cache: &Option<Arc<OembedCache>>,
) -> HashMap<String, PageInfo> {
let urls = collect_bare_urls(events);
if urls.is_empty() {
return HashMap::new();
}
tracing::debug!("oembed prefetch: found {} bare URLs to fetch", urls.len());
let (cached, uncached): (Vec<_>, Vec<_>) = urls
.into_iter()
.partition(|url| oembed_cache.as_ref().and_then(|c| c.get(url)).is_some());
let mut results = HashMap::new();
if let Some(cache) = oembed_cache {
for url in cached {
if let Some(info) = cache.get(&url) {
results.insert(url, info);
}
}
}
if !uncached.is_empty() {
tracing::debug!(
"oembed prefetch: {} cached, {} to fetch",
results.len(),
uncached.len()
);
let fetch_futures: Vec<_> = uncached
.into_iter()
.map(|url| async move {
tracing::debug!("oembed fetch start: {}", url);
let result = PageInfo::new_from_url(&url, oembed_timeout_ms)
.await
.unwrap_or_else(|_| PageInfo {
url: url.clone(),
..Default::default()
});
tracing::debug!("oembed fetch complete: {}", url);
(url, result)
})
.collect();
let fetched: Vec<_> = futures::future::join_all(fetch_futures).await;
for (url, info) in fetched {
if let Some(cache) = oembed_cache {
cache.insert(url.clone(), info.clone());
}
results.insert(url, info);
}
}
results
}
fn yaml_frontmatter_simplified(y: &Option<Yaml>) -> SimpleMetadata {
match y.as_ref().and_then(|yaml| yaml.as_hash()) {
Some(hash) => yaml_hash_to_metadata(hash),
None => HashMap::new(),
}
}
fn yaml_hash_to_metadata(hash: &yaml_rust2::yaml::Hash) -> SimpleMetadata {
let mut hm = HashMap::with_capacity(hash.len());
for (k, v) in hash.iter() {
match (k, v) {
(Yaml::String(key), Yaml::String(value)) => {
tracing::trace!("Frontmatter: {key} = {value}");
hm.insert(key.clone(), serde_json::Value::String(value.clone()));
}
(Yaml::String(key), Yaml::Array(vals)) => {
let arr: Vec<serde_json::Value> = vals
.iter()
.filter_map(|val| val.as_str())
.map(|s| serde_json::Value::String(s.to_string()))
.collect();
tracing::trace!("Frontmatter: {key} = {:?}", &arr);
hm.insert(key.clone(), serde_json::Value::Array(arr));
}
(Yaml::String(key), Yaml::Hash(nested_hash)) => {
tracing::trace!("Frontmatter: {key} = (nested hash)");
let nested = yaml_hash_to_metadata(nested_hash);
for (k, v) in nested {
hm.insert(key.to_string() + "." + k.as_str(), v);
}
}
(Yaml::String(key), Yaml::Integer(val)) => {
tracing::trace!("Frontmatter: {key} = {val}");
hm.insert(key.clone(), serde_json::json!(val));
}
(Yaml::String(key), Yaml::Real(val)) => {
tracing::trace!("Frontmatter: {key} = {val}");
hm.insert(key.clone(), serde_json::Value::String(val.clone()));
}
(Yaml::String(key), Yaml::Boolean(val)) => {
tracing::trace!("Frontmatter: {key} = {val}");
hm.insert(key.clone(), serde_json::json!(val));
}
(Yaml::String(key), other_val) => {
tracing::trace!("Frontmatter: {key} = {:?}", &other_val);
if let Some(str_val) = other_val.as_str() {
hm.insert(key.clone(), serde_json::Value::String(str_val.to_string()));
}
}
(k, v) => {
tracing::warn!("Unexpected frontmatter key-value: {:?} = {:?}", k, v);
}
}
}
hm
}
const FRONTMATTER_MAX_BYTES: usize = 8 * 1024;
pub fn extract_metadata_from_file<P: AsRef<Path>>(
path: P,
) -> Result<SimpleMetadata, MarkdownError> {
let path = path.as_ref();
let mut file = File::open(path).map_err(|e| MarkdownError::ReadFailed {
path: path.to_path_buf(),
source: e,
})?;
let file_len = file.metadata().map(|m| m.len() as usize).unwrap_or(0);
let read_len = file_len.min(FRONTMATTER_MAX_BYTES);
let mut buffer = vec![0u8; read_len];
file.read_exact(&mut buffer)
.map_err(|e| MarkdownError::ReadFailed {
path: path.to_path_buf(),
source: e,
})?;
let markdown_input = String::from_utf8_lossy(&buffer);
let parser = MDParser::new_ext(&markdown_input, Options::ENABLE_YAML_STYLE_METADATA_BLOCKS);
let parser = TextMergeStream::new(parser);
let mut in_metadata = false;
let mut hm = HashMap::new();
for event in parser.take(4) {
match &event {
Event::Start(Tag::MetadataBlock(MetadataBlockKind::YamlStyle)) => {
in_metadata = true;
}
Event::End(TagEnd::MetadataBlock(MetadataBlockKind::YamlStyle)) => {
break;
}
Event::Text(text) if in_metadata => {
let metadata_parsed = YamlLoader::load_from_str(text).map(|ys| ys[0].clone()).ok();
hm = yaml_frontmatter_simplified(&metadata_parsed);
break;
}
_ => {}
}
}
if !hm.contains_key("title")
&& let Some(h1_text) = extract_first_h1(&markdown_input)
{
hm.insert("title".to_string(), serde_json::Value::String(h1_text));
}
Ok(hm)
}
fn generate_anchor_id(text: &str, anchor_ids: &mut HashMap<String, usize>) -> String {
let base_id = text
.to_lowercase()
.chars()
.map(|c| {
if c.is_alphanumeric() || c == '-' {
c
} else if c.is_whitespace() {
'-'
} else {
' '
}
})
.collect::<String>()
.split_whitespace()
.collect::<Vec<_>>()
.join("-");
let base_id = if base_id.is_empty() {
"heading".to_string()
} else {
base_id
};
let count = anchor_ids.entry(base_id.clone()).or_insert(0);
*count += 1;
if *count == 1 {
base_id
} else {
format!("{}-{}", base_id, count)
}
}
fn process_event(
event: pulldown_cmark::Event<'_>,
mut state: EventState,
) -> (pulldown_cmark::Event<'_>, EventState) {
match &event {
Event::Start(Tag::Image {
link_type,
dest_url,
title,
id,
}) => {
let transformed_url = transform_link(dest_url, &state.link_transform_config);
match MediaEmbed::from_url_and_title(&transformed_url, title) {
Some(media) => {
let html = media.to_html(true, state.server_mode, state.transcode_enabled);
state.current_media = Some(media);
(Event::Html(html.into()), state)
}
_ => {
let new_event = Event::Start(Tag::Image {
link_type: *link_type,
dest_url: CowStr::from(transformed_url),
title: title.clone(),
id: id.clone(),
});
(new_event, state)
}
}
}
Event::End(TagEnd::Image) => {
if let Some(media) = state.current_media.take() {
(Event::Html(media.html_close().into()), state)
} else {
(event, state)
}
}
Event::Start(Tag::MetadataBlock(v)) => {
state.metadata_source = Some(*v);
state.in_metadata = true;
(event.clone(), state)
}
Event::End(TagEnd::MetadataBlock(_)) => {
state.in_metadata = false;
(event.clone(), state)
}
Event::Start(Tag::Link {
link_type,
dest_url,
title,
id,
}) => {
state.in_link = true;
state.current_link_dest = Some(dest_url.to_string());
state.current_link_text.clear();
let transformed_url =
if let Some(wikilink) = parse_tag_link(dest_url, &state.valid_tag_sources) {
transform_link(&wikilink.url_path(), &state.link_transform_config)
} else {
transform_link(dest_url, &state.link_transform_config)
};
let new_event = Event::Start(Tag::Link {
link_type: *link_type,
dest_url: CowStr::from(transformed_url),
title: title.clone(),
id: id.clone(),
});
(new_event, state)
}
Event::End(TagEnd::Link) => {
state.in_link = false;
if let Some(dest_url) = state.current_link_dest.take() {
let (path, anchor) = split_url_anchor(&dest_url);
let internal = is_internal_link(&dest_url);
let link = OutboundLink {
to: path,
text: std::mem::take(&mut state.current_link_text),
anchor,
internal,
};
state.collected_links.push(link);
}
(event, state)
}
Event::Start(Tag::CodeBlock(_)) => {
state.in_code_block = true;
(event, state)
}
Event::End(TagEnd::CodeBlock) => {
state.in_code_block = false;
(event, state)
}
Event::End(TagEnd::Paragraph | TagEnd::Heading(_) | TagEnd::Item) => {
if state.block_needs_sentence_bump {
state.sentence_count += 1;
state.block_needs_sentence_bump = false;
}
(event, state)
}
Event::Text(text) => {
if state.in_link {
state.current_link_text.push_str(text);
}
if !state.in_metadata && !state.in_code_block {
for word in text.split_whitespace() {
state.word_count += 1;
state.syllable_count += crate::readability::count_syllables(word);
}
let (sentences_in_text, ends_with_terminator) = count_sentence_terminators(text);
state.sentence_count += sentences_in_text;
let trimmed = text.trim_end();
if !trimmed.is_empty() {
state.block_needs_sentence_bump = !ends_with_terminator;
}
}
if state.in_metadata {
state.metadata_parsed = YamlLoader::load_from_str(text)
.ok()
.and_then(|ys| ys.into_iter().next());
(event, state)
} else if let Some(remaining_text) = text.strip_prefix("[-] ") {
let html = format!(
r#"<input disabled type="checkbox" class="canceled-checkbox"/><s>{}</s>"#,
html_escape::encode_text(remaining_text)
);
(Event::Html(html.into()), state)
} else if !state.in_link && text.starts_with("http") && !text.contains(' ') {
let url_str = text.to_string();
let info = state
.prefetched_oembed
.get(&url_str)
.cloned()
.unwrap_or_else(|| PageInfo {
url: url_str,
..Default::default()
});
(Event::Html(info.html().into()), state)
} else if text.trim_start().starts_with("{{") {
if let Some(mut vid) = Vid::from_vid(text) {
vid.url = transform_link(&vid.url, &state.link_transform_config);
(
Event::Html(
vid.to_html(false, state.server_mode, state.transcode_enabled)
.into(),
),
state,
)
} else {
(event, state)
}
} else {
(event, state)
}
}
_ => (event, state),
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
async fn render_markdown(content: &str) -> String {
render_markdown_with_config(content, false, HashSet::new()).await
}
async fn render_markdown_with_tags(content: &str, tag_sources: HashSet<String>) -> String {
render_markdown_with_config(content, false, tag_sources).await
}
async fn render_markdown_with_config(
content: &str,
is_index_file: bool,
tag_sources: HashSet<String>,
) -> String {
let mut file = NamedTempFile::new().unwrap();
file.write_all(content.as_bytes()).unwrap();
let path = file.path().to_path_buf();
let root = path.parent().unwrap().to_path_buf();
let config = LinkTransformConfig {
markdown_extensions: vec!["md".to_string()],
index_file: "index.md".to_string(),
is_index_file,
url_depth: None,
};
let result = render(
path,
&root,
100,
config,
false,
false,
tag_sources,
false,
&[],
)
.await
.unwrap();
result.html
}
async fn render_markdown_marked(content: &str, markers: &[&str]) -> String {
let mut file = NamedTempFile::new().unwrap();
file.write_all(content.as_bytes()).unwrap();
let path = file.path().to_path_buf();
let root = path.parent().unwrap().to_path_buf();
let config = LinkTransformConfig {
markdown_extensions: vec!["md".to_string()],
index_file: "index.md".to_string(),
is_index_file: false,
url_depth: None,
};
let owned: Vec<String> = markers.iter().map(|s| s.to_string()).collect();
let result = render(
path,
&root,
0,
config,
false,
false,
HashSet::new(),
true,
&owned,
)
.await
.unwrap();
result.html
}
async fn render_result(content: &str) -> MarkdownRenderResult {
let mut file = NamedTempFile::new().unwrap();
file.write_all(content.as_bytes()).unwrap();
let path = file.path().to_path_buf();
let root = path.parent().unwrap().to_path_buf();
let config = LinkTransformConfig {
markdown_extensions: vec!["md".to_string()],
index_file: "index.md".to_string(),
is_index_file: false,
url_depth: None,
};
render(
path,
&root,
0,
config,
false,
false,
HashSet::new(),
false,
&[],
)
.await
.unwrap()
}
#[test]
fn sentence_terminator_basic_cases() {
assert_eq!(count_sentence_terminators(""), (0, false));
assert_eq!(count_sentence_terminators("Hello."), (1, true));
assert_eq!(count_sentence_terminators("Hi! How are you?"), (2, true));
assert_eq!(count_sentence_terminators("Wait..."), (1, true));
assert_eq!(count_sentence_terminators("v1.2.3 is out."), (1, true));
assert_eq!(count_sentence_terminators("No ending here"), (0, false));
}
#[tokio::test]
async fn readability_counts_simple_paragraph() {
let md = "The cat sat on the mat. The dog ran away.";
let result = render_result(md).await;
assert_eq!(result.word_count, 10);
assert_eq!(result.sentence_count, 2);
assert_eq!(result.syllable_count, 11);
}
#[tokio::test]
async fn readability_heading_without_terminator_bumps_sentence() {
let md = "# Introduction\n\nHello world.";
let result = render_result(md).await;
assert_eq!(result.word_count, 3);
assert_eq!(result.sentence_count, 2);
}
#[tokio::test]
async fn readability_excludes_code_blocks() {
let md = "Some prose here.\n\n```rust\nfn main() { println!(\"hi\"); }\n```\n";
let result = render_result(md).await;
assert_eq!(result.word_count, 3);
assert_eq!(result.sentence_count, 1);
}
#[tokio::test]
async fn readability_empty_document_has_zero_counts() {
let result = render_result("").await;
assert_eq!(result.word_count, 0);
assert_eq!(result.sentence_count, 0);
assert_eq!(result.syllable_count, 0);
}
#[tokio::test]
async fn test_canceled_checkbox_dash() {
let md = "- [-] canceled task";
let html = render_markdown(md).await;
assert!(html.contains(r#"<input disabled type="checkbox" class="canceled-checkbox"/>"#));
assert!(html.contains("<s>canceled task</s>"));
}
#[tokio::test]
async fn test_canceled_checkbox_asterisk() {
let md = "* [-] another canceled item";
let html = render_markdown(md).await;
assert!(html.contains(r#"<input disabled type="checkbox" class="canceled-checkbox"/>"#));
assert!(html.contains("<s>another canceled item</s>"));
}
#[tokio::test]
async fn test_unchecked_checkbox() {
let md = "- [ ] unchecked item";
let html = render_markdown(md).await;
assert!(html.contains(r#"<input disabled="" type="checkbox"/>"#));
assert!(!html.contains("canceled-checkbox"));
}
#[tokio::test]
async fn test_checked_checkbox() {
let md = "- [x] checked item";
let html = render_markdown(md).await;
assert!(html.contains(r#"<input disabled="" type="checkbox" checked=""/>"#));
assert!(!html.contains("canceled-checkbox"));
}
#[tokio::test]
async fn test_canceled_checkbox_with_special_chars() {
let md = "- [-] text with special chars: & < > \"";
let html = render_markdown(md).await;
assert!(html.contains("<s>"));
assert!(html.contains("</s>"));
assert!(html.contains("canceled-checkbox"));
}
#[tokio::test]
async fn test_canceled_checkbox_plain_text() {
let md = "- [-] plain canceled text";
let html = render_markdown(md).await;
assert!(html.contains("<s>plain canceled text</s>"));
}
#[tokio::test]
async fn test_yaml_frontmatter() {
let md = "---\ntitle: Test Title\n---\n\n# Heading";
let mut file = NamedTempFile::new().unwrap();
file.write_all(md.as_bytes()).unwrap();
let path = file.path().to_path_buf();
let root = path.parent().unwrap().to_path_buf();
let config = LinkTransformConfig {
markdown_extensions: vec!["md".to_string()],
index_file: "index.md".to_string(),
is_index_file: false,
url_depth: None,
};
let result = render(
path,
&root,
100,
config,
false,
false,
HashSet::new(),
false,
&[],
)
.await
.unwrap();
assert_eq!(
result.frontmatter.get("title"),
Some(&serde_json::Value::String("Test Title".to_string()))
);
}
#[test]
fn test_extract_first_h1_basic() {
let md = "# Hello World\n\nSome content";
let result = extract_first_h1(md);
assert_eq!(result, Some("Hello World".to_string()));
}
#[test]
fn test_extract_first_h1_with_inline_formatting() {
let md = "# Hello **World**\n\nSome content";
let result = extract_first_h1(md);
assert_eq!(result, Some("Hello World".to_string()));
}
#[test]
fn test_extract_first_h1_none_when_no_h1() {
let md = "## This is H2\n\nSome content";
let result = extract_first_h1(md);
assert_eq!(result, None);
}
#[test]
fn test_extract_first_h1_returns_first_only() {
let md = "# First H1\n\n# Second H1";
let result = extract_first_h1(md);
assert_eq!(result, Some("First H1".to_string()));
}
#[test]
fn test_extract_first_h1_empty_doc() {
let md = "";
let result = extract_first_h1(md);
assert_eq!(result, None);
}
#[tokio::test]
async fn test_has_h1_true_when_first_heading_is_h1() {
let md = "# Main Title\n\n## Subsection";
let mut file = NamedTempFile::new().unwrap();
file.write_all(md.as_bytes()).unwrap();
let path = file.path().to_path_buf();
let root = path.parent().unwrap().to_path_buf();
let config = LinkTransformConfig {
markdown_extensions: vec!["md".to_string()],
index_file: "index.md".to_string(),
is_index_file: false,
url_depth: None,
};
let result = render(
path,
&root,
100,
config,
false,
false,
HashSet::new(),
false,
&[],
)
.await
.unwrap();
assert!(result.has_h1);
}
#[tokio::test]
async fn test_has_h1_false_when_first_heading_is_h2() {
let md = "## Subsection\n\n# Late H1";
let mut file = NamedTempFile::new().unwrap();
file.write_all(md.as_bytes()).unwrap();
let path = file.path().to_path_buf();
let root = path.parent().unwrap().to_path_buf();
let config = LinkTransformConfig {
markdown_extensions: vec!["md".to_string()],
index_file: "index.md".to_string(),
is_index_file: false,
url_depth: None,
};
let result = render(
path,
&root,
100,
config,
false,
false,
HashSet::new(),
false,
&[],
)
.await
.unwrap();
assert!(!result.has_h1);
}
#[tokio::test]
async fn test_title_fallback_from_h1() {
let md = "# My Document Title\n\nSome content here.";
let mut file = NamedTempFile::new().unwrap();
file.write_all(md.as_bytes()).unwrap();
let path = file.path().to_path_buf();
let root = path.parent().unwrap().to_path_buf();
let config = LinkTransformConfig {
markdown_extensions: vec!["md".to_string()],
index_file: "index.md".to_string(),
is_index_file: false,
url_depth: None,
};
let result = render(
path,
&root,
100,
config,
false,
false,
HashSet::new(),
false,
&[],
)
.await
.unwrap();
assert!(result.has_h1);
assert_eq!(
result.frontmatter.get("title"),
Some(&serde_json::Value::String("My Document Title".to_string()))
);
}
#[tokio::test]
async fn test_frontmatter_title_takes_precedence() {
let md = "---\ntitle: Frontmatter Title\n---\n\n# H1 Title";
let mut file = NamedTempFile::new().unwrap();
file.write_all(md.as_bytes()).unwrap();
let path = file.path().to_path_buf();
let root = path.parent().unwrap().to_path_buf();
let config = LinkTransformConfig {
markdown_extensions: vec!["md".to_string()],
index_file: "index.md".to_string(),
is_index_file: false,
url_depth: None,
};
let result = render(
path,
&root,
100,
config,
false,
false,
HashSet::new(),
false,
&[],
)
.await
.unwrap();
assert!(result.has_h1);
assert_eq!(
result.frontmatter.get("title"),
Some(&serde_json::Value::String("Frontmatter Title".to_string()))
);
}
#[tokio::test]
async fn test_no_title_when_no_frontmatter_and_no_h1() {
let md = "## Subsection\n\nSome content.";
let mut file = NamedTempFile::new().unwrap();
file.write_all(md.as_bytes()).unwrap();
let path = file.path().to_path_buf();
let root = path.parent().unwrap().to_path_buf();
let config = LinkTransformConfig {
markdown_extensions: vec!["md".to_string()],
index_file: "index.md".to_string(),
is_index_file: false,
url_depth: None,
};
let result = render(
path,
&root,
100,
config,
false,
false,
HashSet::new(),
false,
&[],
)
.await
.unwrap();
assert!(!result.has_h1);
assert_eq!(result.frontmatter.get("title"), None);
}
#[tokio::test]
async fn test_video_embed_from_image_syntax() {
let md = "";
let html = render_markdown(md).await;
assert!(html.contains("<video"));
assert!(html.contains("video.mp4"));
assert!(html.contains("<figcaption>"));
assert!(html.contains("My Video"));
assert!(html.contains("</figcaption></figure>"));
}
#[tokio::test]
async fn test_audio_embed_from_image_syntax() {
let md = "";
let html = render_markdown(md).await;
assert!(html.contains("<audio"));
assert!(html.contains("audio-embed"));
assert!(html.contains("podcast.mp3"));
assert!(html.contains("<figcaption>"));
assert!(html.contains("Episode 1"));
assert!(html.contains("</figcaption></figure>"));
}
#[tokio::test]
async fn test_youtube_embed_from_image_syntax() {
let md = "";
let html = render_markdown(md).await;
assert!(html.contains("youtube-embed"));
assert!(html.contains("youtube-nocookie.com/embed/dQw4w9WgXcQ"));
assert!(html.contains("<figcaption>"));
assert!(html.contains("Watch this"));
assert!(html.contains("</figcaption></figure>"));
}
#[tokio::test]
async fn test_youtube_short_url_embed() {
let md = "";
let html = render_markdown(md).await;
assert!(html.contains("youtube-embed"));
assert!(html.contains("youtube-nocookie.com/embed/dQw4w9WgXcQ"));
}
#[tokio::test]
async fn test_pdf_embed_from_image_syntax() {
let md = "";
let html = render_markdown(md).await;
assert!(html.contains("pdf-embed"));
assert!(
html.contains(r#"data="../report.pdf""#),
"PDF URL should be transformed. Got: {}",
html
);
assert!(html.contains(r#"type="application/pdf""#));
assert!(html.contains("data-pdf-fallback"));
assert!(html.contains("<figcaption>"));
assert!(html.contains("Important Document"));
assert!(html.contains("</figcaption></figure>"));
}
#[tokio::test]
async fn test_pdf_embed_with_path() {
let md = "";
let html = render_markdown(md).await;
assert!(html.contains("pdf-embed"));
assert!(
html.contains(r#"data="../docs/manual.pdf""#),
"PDF URL should be transformed. Got: {}",
html
);
}
#[tokio::test]
async fn test_regular_image_not_converted() {
let md = "";
let html = render_markdown(md).await;
assert!(html.contains("<img"));
assert!(html.contains("photo.jpg"));
assert!(!html.contains("<video"));
assert!(!html.contains("<audio"));
assert!(!html.contains("pdf-embed"));
}
#[tokio::test]
async fn test_multiple_media_types_in_document() {
let md = r#"
# My Media




"#;
let html = render_markdown(md).await;
assert!(html.contains("<video"));
assert!(html.contains("<audio"));
assert!(html.contains("pdf-embed"));
assert!(html.contains("<img"));
}
#[tokio::test]
async fn test_vid_shortcode() {
let md = r#"{{ vid(path="test/video.mp4") }}"#;
let html = render_markdown(md).await;
println!("Output HTML: {}", &html);
assert!(html.contains("<video"), "Should contain video element");
assert!(
html.contains("/videos/test/video.mp4"),
"Should contain video path"
);
}
#[tokio::test]
async fn test_vid_shortcode_with_spaces() {
let md = r#"{{ vid(path="Eric Jones/Eric Jones - Metal 3.mp4")}}"#;
let html = render_markdown(md).await;
println!("Output HTML: {}", &html);
assert!(html.contains("<video"), "Should contain video element");
assert!(
html.contains("/videos/Eric%20Jones"),
"Should contain URL-encoded path"
);
}
#[tokio::test]
async fn test_link_transformation_regular_markdown() {
let md = "[Other Doc](other.md)";
let html = render_markdown_with_config(md, false, HashSet::new()).await;
assert!(
html.contains(r#"href="../other/""#),
"Regular markdown should transform other.md to ../other/. Got: {}",
html
);
}
#[tokio::test]
async fn test_link_transformation_index_file() {
let md = "[Other Doc](other.md)";
let html = render_markdown_with_config(md, true, HashSet::new()).await;
assert!(
html.contains(r#"href="other/""#),
"Index file should transform other.md to other/. Got: {}",
html
);
}
#[tokio::test]
async fn test_link_transformation_preserves_absolute_urls() {
let md = "[External](https://example.com)";
let html = render_markdown(md).await;
assert!(
html.contains(r#"href="https://example.com""#),
"Absolute URLs should remain unchanged"
);
}
#[tokio::test]
async fn test_link_transformation_with_anchor() {
let md = "[Section](other.md#section)";
let html = render_markdown_with_config(md, false, HashSet::new()).await;
assert!(
html.contains(r#"href="../other/#section""#),
"Links with anchors should transform correctly. Got: {}",
html
);
}
#[tokio::test]
async fn test_image_transformation_regular_markdown() {
let md = "";
let html = render_markdown_with_config(md, false, HashSet::new()).await;
assert!(
html.contains(r#"src="../images/photo.jpg""#),
"Image URLs should be transformed. Got: {}",
html
);
}
#[tokio::test]
async fn test_image_transformation_index_file() {
let md = "";
let html = render_markdown_with_config(md, true, HashSet::new()).await;
assert!(
html.contains(r#"src="images/photo.jpg""#),
"Index file image URLs shouldn't get ../. Got: {}",
html
);
}
#[tokio::test]
async fn test_video_embed_url_transformation() {
let md = "";
let html = render_markdown_with_config(md, false, HashSet::new()).await;
assert!(
html.contains("../video.mp4"),
"Video URLs should be transformed with ../. Got: {}",
html
);
}
#[tokio::test]
async fn test_video_embed_url_transformation_index_file() {
let md = "";
let html = render_markdown_with_config(md, true, HashSet::new()).await;
assert!(
!html.contains("../video.mp4"),
"Index file video URLs shouldn't get ../. Got: {}",
html
);
assert!(
html.contains("video.mp4"),
"Video URL should be present. Got: {}",
html
);
}
#[tokio::test]
async fn test_audio_embed_url_transformation() {
let md = "";
let html = render_markdown_with_config(md, false, HashSet::new()).await;
assert!(
html.contains("../episode.mp3"),
"Audio URLs should be transformed with ../. Got: {}",
html
);
}
#[tokio::test]
async fn test_pdf_embed_url_transformation() {
let md = "";
let html = render_markdown_with_config(md, false, HashSet::new()).await;
assert!(
html.contains("../report.pdf"),
"PDF URLs should be transformed with ../. Got: {}",
html
);
}
#[tokio::test]
async fn test_pdf_embed_url_transformation_index_file() {
let md = "";
let html = render_markdown_with_config(md, true, HashSet::new()).await;
assert!(
!html.contains("../report.pdf"),
"Index file PDF URLs shouldn't get ../. Got: {}",
html
);
assert!(
html.contains("report.pdf"),
"PDF URL should be present. Got: {}",
html
);
}
#[tokio::test]
async fn test_media_embed_peer_file_transformation() {
let md = "";
let html = render_markdown_with_config(md, false, HashSet::new()).await;
assert!(
html.contains("../peer-video.mp4"),
"Peer file video should get ../ prefix. Got: {}",
html
);
}
#[tokio::test]
async fn test_media_embed_explicit_relative_path() {
let md = "";
let html = render_markdown_with_config(md, false, HashSet::new()).await;
assert!(
html.contains("../peer-video.mp4"),
"./peer-video.mp4 should transform to ../peer-video.mp4. Got: {}",
html
);
}
#[tokio::test]
async fn test_section_attrs_with_id() {
let md = "First section\n\n--- {#intro}\n\nSecond section";
let html = render_markdown(md).await;
assert!(
html.contains(r#"<section id="intro">"#),
"Section should have id='intro'. Got: {}",
html
);
}
#[tokio::test]
async fn test_section_attrs_with_class() {
let md = "First section\n\n--- {.highlight}\n\nSecond section";
let html = render_markdown(md).await;
assert!(
html.contains(r#"<section class="highlight">"#),
"Section should have class='highlight'. Got: {}",
html
);
}
#[tokio::test]
async fn test_section_attrs_with_multiple_classes() {
let md = "First section\n\n--- {.slide .center}\n\nSecond section";
let html = render_markdown(md).await;
assert!(
html.contains(r#"<section class="slide center">"#),
"Section should have class='slide center'. Got: {}",
html
);
}
#[tokio::test]
async fn test_section_attrs_with_data_attributes() {
let md = "First section\n\n--- {data-transition=\"slide\"}\n\nSecond section";
let html = render_markdown(md).await;
assert!(
html.contains(r#"data-transition="slide""#),
"Section should have data-transition='slide'. Got: {}",
html
);
}
#[tokio::test]
async fn test_section_attrs_mixed() {
let md = "First section\n\n--- {#main .highlight data-bg=\"blue\"}\n\nSecond section";
let html = render_markdown(md).await;
assert!(
html.contains(r#"id="main""#),
"Section should have id='main'. Got: {}",
html
);
assert!(
html.contains(r#"class="highlight""#),
"Section should have class='highlight'. Got: {}",
html
);
assert!(
html.contains(r#"data-bg="blue""#),
"Section should have data-bg='blue'. Got: {}",
html
);
}
#[tokio::test]
async fn test_section_attrs_multiple_rules() {
let md = "Section 0\n\n--- {#one}\n\nSection 1\n\n--- {#two}\n\nSection 2";
let html = render_markdown(md).await;
assert!(
html.contains(r#"<section id="one">"#),
"First rule section should have id='one'. Got: {}",
html
);
assert!(
html.contains(r#"<section id="two">"#),
"Second rule section should have id='two'. Got: {}",
html
);
}
#[tokio::test]
async fn test_plain_rule_still_works() {
let md = "First section\n\n---\n\nSecond section";
let html = render_markdown(md).await;
let section_count = html.matches("<section>").count();
assert!(
section_count >= 1,
"Plain rule should create sections. Got: {}",
html
);
assert!(
html.contains("<hr />"),
"Should contain <hr /> divider. Got: {}",
html
);
}
#[tokio::test]
async fn test_em_dash_with_non_attrs_text() {
let md = "Some text\n\n--- not attrs\n\nMore text";
let html = render_markdown(md).await;
assert!(
html.contains("—"),
"Em dash should be preserved. Got: {}",
html
);
}
#[tokio::test]
async fn test_section_empty_attrs() {
let md = "First section\n\n--- {}\n\nSecond section";
let html = render_markdown(md).await;
assert!(
html.contains("<section>"),
"Empty attrs should create plain section. Got: {}",
html
);
assert!(
html.contains("<hr />"),
"Should contain <hr /> divider. Got: {}",
html
);
}
#[tokio::test]
async fn test_section_attrs_with_whitespace() {
let md = "First section\n\n--- { #intro .highlight }\n\nSecond section";
let html = render_markdown(md).await;
assert!(
html.contains(r#"id="intro""#),
"Whitespace should not affect ID parsing. Got: {}",
html
);
assert!(
html.contains(r#"class="highlight""#),
"Whitespace should not affect class parsing. Got: {}",
html
);
}
#[tokio::test]
async fn test_section_attrs_curly_quotes() {
let md = "First section\n\n--- {data-x=\u{201C}value\u{201D}}\n\nSecond section";
let html = render_markdown(md).await;
assert!(
html.contains(r#"data-x="value""#),
"Curly quotes should be normalized. Got: {}",
html
);
}
#[tokio::test]
async fn test_section_attrs_html_escaping() {
let md = "First section\n\n--- {data-val=\"a & b\"}\n\nSecond section";
let html = render_markdown(md).await;
assert!(
html.contains("&"),
"HTML special chars should be escaped. Got: {}",
html
);
assert!(
html.contains(r#"data-val="a & b""#),
"Value should have escaped &. Got: {}",
html
);
}
const DEFAULT_MARKERS: &[&str] = &["TK", "TODO", "FIXME", "XXX"];
#[test]
fn test_build_incomplete_marker_regex_defaults() {
let markers = default_incomplete_markers_for_test();
let re = build_incomplete_marker_regex(&markers).expect("regex");
assert!(re.is_match("TK"));
assert!(re.is_match("TK rewrite this"));
assert!(re.is_match("TODO foo"));
assert!(re.is_match("FIXME(name)"));
assert!(re.is_match("XXX:"));
assert!(!re.is_match("TKTK"));
assert!(!re.is_match("TODOs"));
assert!(!re.is_match("Tk"));
assert!(!re.is_match("todo"));
assert!(!re.is_match("Tomato"));
}
#[test]
fn test_build_incomplete_marker_regex_empty() {
let markers: Vec<String> = Vec::new();
assert!(build_incomplete_marker_regex(&markers).is_none());
let markers = vec!["".to_string()];
assert!(build_incomplete_marker_regex(&markers).is_none());
}
#[test]
fn test_build_incomplete_marker_regex_escapes_metachars() {
let markers = vec!["FOO(".to_string(), "BAR".to_string()];
let re = build_incomplete_marker_regex(&markers).expect("regex compiles");
assert!(re.is_match("BAR foo"));
assert!(!re.is_match("Tomato"));
}
fn default_incomplete_markers_for_test() -> Vec<String> {
DEFAULT_MARKERS.iter().map(|s| s.to_string()).collect()
}
#[tokio::test]
async fn test_incomplete_paragraph() {
let html = render_markdown_marked("TK rewrite this paragraph.", DEFAULT_MARKERS).await;
assert!(
html.contains(r#"<p><span class="mbr-incomplete">"#),
"Paragraph should have span as first child. Got: {html}"
);
assert!(html.contains("TK rewrite"), "TK text preserved: {html}");
}
#[tokio::test]
async fn test_incomplete_heading() {
let html = render_markdown_marked("## TODO finish this", DEFAULT_MARKERS).await;
assert!(
html.contains(r#"<span class="mbr-incomplete">"#),
"Span should be present in heading. Got: {html}"
);
assert!(html.contains("<h2"), "h2 element present: {html}");
let h2_start = html.find("<h2").unwrap();
let span_start = html.find(r#"<span class="mbr-incomplete">"#).unwrap();
assert!(span_start > h2_start, "span should be inside h2: {html}");
}
#[tokio::test]
async fn test_incomplete_tight_list_item() {
let html = render_markdown_marked("- TK item one\n- normal item", DEFAULT_MARKERS).await;
assert!(
html.contains(r#"<li><span class="mbr-incomplete">"#),
"Span should follow <li> for tight list: {html}"
);
assert_eq!(
html.matches(r#"<span class="mbr-incomplete">"#).count(),
1,
"Only one span expected: {html}"
);
}
#[tokio::test]
async fn test_incomplete_loose_list_item() {
let md = "- TK draft this\n\n- finished item\n";
let html = render_markdown_marked(md, DEFAULT_MARKERS).await;
assert!(
html.contains(r#"<p><span class="mbr-incomplete">"#),
"Span should wrap inner <p> in loose list: {html}"
);
assert!(
!html.contains(r#"<li><span class="mbr-incomplete">"#),
"Loose-list <li> should not have direct span child: {html}"
);
}
#[tokio::test]
async fn test_incomplete_table_cell() {
let md = "| H |\n|---|\n| TK cell |\n";
let html = render_markdown_marked(md, DEFAULT_MARKERS).await;
assert!(
html.contains(r#"<td><span class="mbr-incomplete">"#),
"Span should follow <td> for incomplete cell: {html}"
);
}
#[tokio::test]
async fn test_remark_hint_tip() {
let html = render_markdown("!> tip").await;
assert!(
html.contains(r#"<blockquote class="markdown-alert-tip">"#),
"Expected tip alert blockquote: {html}"
);
assert!(
html.contains("<p>tip</p>"),
"Marker should be stripped: {html}"
);
assert!(!html.contains("!>"), "Escaped marker leaked: {html}");
assert!(!html.contains("!>"), "Raw marker leaked: {html}");
}
#[tokio::test]
async fn test_remark_hint_warning() {
let html = render_markdown("?> warn").await;
assert!(
html.contains(r#"<blockquote class="markdown-alert-warning">"#),
"Expected warning alert blockquote: {html}"
);
assert!(
html.contains("<p>warn</p>"),
"Marker should be stripped: {html}"
);
}
#[tokio::test]
async fn test_remark_hint_caution() {
let html = render_markdown("x> caution").await;
assert!(
html.contains(r#"<blockquote class="markdown-alert-caution">"#),
"Expected caution alert blockquote: {html}"
);
assert!(
html.contains("<p>caution</p>"),
"Marker should be stripped: {html}"
);
}
#[tokio::test]
async fn test_remark_hint_multiline() {
let html = render_markdown("!> line one\nline two").await;
assert!(
html.contains(r#"<blockquote class="markdown-alert-tip">"#),
"Expected tip alert blockquote: {html}"
);
assert!(html.contains("line one"), "First line retained: {html}");
assert!(html.contains("line two"), "Second line retained: {html}");
assert!(!html.contains("!>"), "Escaped marker leaked: {html}");
assert!(!html.contains("!>"), "Raw marker leaked: {html}");
}
#[tokio::test]
async fn test_remark_hint_requires_trailing_space() {
let html = render_markdown("!>no-space").await;
assert!(
!html.contains("markdown-alert"),
"Should not be converted without trailing space: {html}"
);
}
#[tokio::test]
async fn test_remark_hint_only_at_paragraph_start() {
let html = render_markdown("text !> more").await;
assert!(
!html.contains("markdown-alert"),
"Mid-paragraph marker should not be converted: {html}"
);
}
#[tokio::test]
async fn test_remark_hint_ignored_in_code_block() {
let html = render_markdown("```\n!> foo\n```").await;
assert!(
!html.contains("markdown-alert"),
"Code block content should not be converted: {html}"
);
assert!(
html.contains("!> foo") || html.contains("!> foo"),
"Code content should render verbatim: {html}"
);
}
#[tokio::test]
async fn test_native_github_alert_still_works() {
let html = render_markdown("> [!TIP]\n> hello").await;
assert!(
html.contains(r#"<blockquote class="markdown-alert-tip">"#),
"Native GitHub alert should still render: {html}"
);
assert!(html.contains("hello"), "Alert content retained: {html}");
}
#[tokio::test]
async fn test_incomplete_blockquote_paragraph() {
let html = render_markdown_marked("> TK quote me", DEFAULT_MARKERS).await;
assert!(
html.contains(r#"<p><span class="mbr-incomplete">"#),
"Inner <p> should carry the span, not <blockquote>: {html}"
);
assert!(
!html.contains(r#"<blockquote><span"#),
"Blockquote should not be span-wrapped: {html}"
);
}
#[tokio::test]
async fn test_incomplete_with_strong_emphasis() {
let html = render_markdown_marked("**TK** finish later", DEFAULT_MARKERS).await;
assert!(
html.contains(r#"<p><span class="mbr-incomplete"><strong>TK</strong>"#),
"Span should wrap <strong>TK</strong>: {html}"
);
}
#[tokio::test]
async fn test_incomplete_with_link() {
let html =
render_markdown_marked("[TK](https://example.com) check this", DEFAULT_MARKERS).await;
assert!(
html.contains(r#"<p><span class="mbr-incomplete"><a "#),
"Span should wrap the link: {html}"
);
}
#[tokio::test]
async fn test_incomplete_negative_tomato() {
let html = render_markdown_marked("Tomato is red.", DEFAULT_MARKERS).await;
assert!(
!html.contains("mbr-incomplete"),
"'Tomato' should not match: {html}"
);
}
#[tokio::test]
async fn test_incomplete_negative_lowercase() {
let html = render_markdown_marked("Tk lowercase ignored.", DEFAULT_MARKERS).await;
assert!(
!html.contains("mbr-incomplete"),
"Mixed case 'Tk' should not match: {html}"
);
let html2 = render_markdown_marked("todo lowercase.", DEFAULT_MARKERS).await;
assert!(
!html2.contains("mbr-incomplete"),
"lowercase 'todo' should not match: {html2}"
);
}
#[tokio::test]
async fn test_incomplete_negative_word_boundary() {
let html = render_markdown_marked("TKTK shouldn't match.", DEFAULT_MARKERS).await;
assert!(
!html.contains("mbr-incomplete"),
"TKTK should not match: {html}"
);
let html2 = render_markdown_marked("TODOs are plural.", DEFAULT_MARKERS).await;
assert!(
!html2.contains("mbr-incomplete"),
"'TODOs' should not match: {html2}"
);
}
#[tokio::test]
async fn test_incomplete_negative_mid_paragraph() {
let html =
render_markdown_marked("This paragraph mentions TK in the middle.", DEFAULT_MARKERS)
.await;
assert!(
!html.contains("mbr-incomplete"),
"Mid-paragraph TK should not match: {html}"
);
}
#[tokio::test]
async fn test_incomplete_negative_code_block() {
let md = "```\nTK code lines\n```\n";
let html = render_markdown_marked(md, DEFAULT_MARKERS).await;
assert!(
!html.contains("mbr-incomplete"),
"TK in code block should not match: {html}"
);
}
#[tokio::test]
async fn test_incomplete_negative_frontmatter() {
let md = "---\ntitle: TK rename later\n---\n\nNormal paragraph.";
let html = render_markdown_marked(md, DEFAULT_MARKERS).await;
assert!(
!html.contains("mbr-incomplete"),
"TK in frontmatter should not match: {html}"
);
}
#[tokio::test]
async fn test_incomplete_disabled_no_span() {
let html = render_markdown("TK should not be highlighted.").await;
assert!(
!html.contains("mbr-incomplete"),
"Disabled flag suppresses span: {html}"
);
}
#[tokio::test]
async fn test_incomplete_custom_markers() {
let html = render_markdown_marked("NOTE this draft.", &["NOTE"]).await;
assert!(
html.contains(r#"<p><span class="mbr-incomplete">"#),
"Custom marker NOTE should match: {html}"
);
let html2 = render_markdown_marked("TK ignored under custom list.", &["NOTE"]).await;
assert!(
!html2.contains("mbr-incomplete"),
"TK should not match when only NOTE configured: {html2}"
);
}
#[tokio::test]
async fn test_incomplete_empty_markers_no_op() {
let html = render_markdown_marked("TK still here.", &[]).await;
assert!(
!html.contains("mbr-incomplete"),
"Empty marker list should not inject spans: {html}"
);
}
fn make_sources(sources: &[&str]) -> HashSet<String> {
sources.iter().map(|s| s.to_string()).collect()
}
#[tokio::test]
async fn test_wikilink_transformation() {
let sources = make_sources(&["tags"]);
let md = "Check out [[Tags:rust]] for more info.";
let html = render_markdown_with_tags(md, sources).await;
assert!(
html.contains(r#"href="/tags/rust/""#),
"Wikilink should transform to tag URL. Got: {}",
html
);
assert!(
html.contains(">rust<"),
"Link text should be the tag value. Got: {}",
html
);
}
#[tokio::test]
async fn test_wikilink_with_spaces() {
let sources = make_sources(&["performers"]);
let md = "Watch [[performers:Joshua Jay]] perform!";
let html = render_markdown_with_tags(md, sources).await;
assert!(
html.contains(r#"href="/performers/joshua_jay/""#),
"Wikilink with spaces should normalize URL. Got: {}",
html
);
assert!(
html.contains(">Joshua Jay<"),
"Link text should preserve original case. Got: {}",
html
);
}
#[tokio::test]
async fn test_wikilink_unknown_source_becomes_native_wikilink() {
let sources = make_sources(&["tags"]);
let md = "See [[category:books]] for more.";
let html = render_markdown_with_tags(md, sources).await;
assert!(
html.contains("<a"),
"Wikilink should become a link via pulldown-cmark. Got: {}",
html
);
assert!(
html.contains("category:books"),
"Link should reference the wikilink content. Got: {}",
html
);
}
#[tokio::test]
async fn test_markdown_tag_link() {
let sources = make_sources(&["tags"]);
let md = "[Learn Rust](Tags:rust)";
let html = render_markdown_with_tags(md, sources).await;
assert!(
html.contains(r#"href="/tags/rust/""#),
"Tag link should transform to tag URL. Got: {}",
html
);
assert!(
html.contains(">Learn Rust<"),
"Link text should be preserved. Got: {}",
html
);
}
#[tokio::test]
async fn test_markdown_tag_link_normalized() {
let sources = make_sources(&["performers"]);
let md = "[Great performer](performers:joshua_jay)";
let html = render_markdown_with_tags(md, sources).await;
assert!(
html.contains(r#"href="/performers/joshua_jay/""#),
"Tag link should transform to tag URL. Got: {}",
html
);
}
#[tokio::test]
async fn test_url_scheme_not_treated_as_tag() {
let sources = make_sources(&["tags", "https"]); let md = "[Example](https://example.com)";
let html = render_markdown_with_tags(md, sources).await;
assert!(
html.contains(r#"href="https://example.com""#),
"URL schemes should not be treated as tag sources. Got: {}",
html
);
}
#[tokio::test]
async fn test_multiple_wikilinks() {
let sources = make_sources(&["tags"]);
let md = "Learn [[Tags:rust]] and [[Tags:python]] today!";
let html = render_markdown_with_tags(md, sources).await;
assert!(
html.contains(r#"href="/tags/rust/""#),
"First wikilink should work. Got: {}",
html
);
assert!(
html.contains(r#"href="/tags/python/""#),
"Second wikilink should work. Got: {}",
html
);
}
#[tokio::test]
async fn test_nested_tag_source() {
let sources = make_sources(&["taxonomy.tags"]);
let md = "See [[taxonomy.tags:rust]] for more.";
let html = render_markdown_with_tags(md, sources).await;
assert!(
html.contains(r#"href="/taxonomy.tags/rust/""#),
"Nested tag source should work. Got: {}",
html
);
}
#[tokio::test]
async fn test_no_tag_sources_uses_native_wikilinks() {
let sources = HashSet::new();
let md = "See [[Tags:rust]] for more.";
let html = render_markdown_with_tags(md, sources).await;
assert!(
html.contains("<a"),
"Wikilink should become a link via pulldown-cmark. Got: {}",
html
);
assert!(
html.contains("Tags:rust"),
"Link should reference the wikilink content. Got: {}",
html
);
}
#[tokio::test]
async fn test_plain_wikilink_works() {
let html = render_markdown("Check out [[MyPage]] for more.").await;
assert!(
html.contains("<a"),
"Plain wikilink should become a link. Got: {}",
html
);
assert!(
html.contains("MyPage"),
"Link should reference MyPage. Got: {}",
html
);
}
#[tokio::test]
async fn test_plain_wikilink_with_spaces() {
let html = render_markdown("See [[My Page]] here.").await;
assert!(
html.contains("<a"),
"Wikilink with spaces should become a link. Got: {}",
html
);
assert!(
html.contains("My Page"),
"Link should preserve the page name. Got: {}",
html
);
}
#[tokio::test]
async fn test_tag_and_plain_wikilinks_together() {
let sources = make_sources(&["tags"]);
let md = "See [[Tags:rust]] and also [[MyPage]] for info.";
let html = render_markdown_with_tags(md, sources).await;
assert!(
html.contains(r#"href="/tags/rust/""#),
"Tag wikilink should transform to /tags/rust/. Got: {}",
html
);
assert!(
html.contains("MyPage"),
"Plain wikilink should reference MyPage. Got: {}",
html
);
let link_count = html.matches("<a").count();
assert!(
link_count >= 2,
"Should have at least 2 links. Got {} in: {}",
link_count,
html
);
}
#[tokio::test]
async fn test_code_blocks_with_unsupported_language() {
let md = "```unknownlang\nsome code\n```";
let html = render_markdown(md).await;
assert!(
html.contains("<pre><code class=\"language-unknownlang\">"),
"Unsupported language should still get a language class. Got: {}",
html
);
assert!(html.contains("some code"));
}
#[tokio::test]
async fn test_code_blocks_mixed_supported_and_unsupported_languages() {
let md = concat!(
"```rust\nfn main() {}\n```\n\n",
"```garbage_lang_404\nfoo bar\n```\n\n",
"```python\nprint(1)\n```",
);
let html = render_markdown(md).await;
assert!(
html.contains("language-rust"),
"Rust block missing. Got: {}",
html
);
assert!(
html.contains("language-garbage_lang_404"),
"Unsupported block missing. Got: {}",
html
);
assert!(
html.contains("language-python"),
"Python block missing. Got: {}",
html
);
assert!(html.contains("fn main"));
assert!(html.contains("foo bar"));
assert!(html.contains("print(1)"));
}
}