use pulldown_cmark::{Event, Tag, TagEnd};
use std::collections::HashMap;
use crate::content::Slug;
use crate::html::escape_html;
fn slugify(text: &str) -> String {
Slug::from(text).to_string()
}
pub struct HeadingEntry {
pub level: u8,
pub text: String,
pub id: Slug,
}
const HEADING_LEVEL_OFFSET: u8 = 1;
struct HeadingBuf<'a> {
source_level: u8,
custom_id: Option<String>,
text: String,
inner: Vec<Event<'a>>,
}
pub fn inject_heading_ids<'a>(events: Vec<Event<'a>>) -> (Vec<Event<'a>>, Vec<HeadingEntry>) {
let mut out = Vec::with_capacity(events.len());
let mut toc = Vec::new();
let mut used_ids: HashMap<String, usize> = HashMap::new();
let mut heading_buf: Option<HeadingBuf<'a>> = None;
for event in events {
match heading_buf.take() {
None => match event {
Event::Start(Tag::Heading { level, id, .. }) => {
heading_buf = Some(HeadingBuf {
source_level: level as u8,
custom_id: id.map(|s| s.to_string()),
text: String::new(),
inner: Vec::new(),
});
}
other => out.push(other),
},
Some(mut state) => match event {
Event::End(TagEnd::Heading(_)) => {
let rendered_level = (state.source_level + HEADING_LEVEL_OFFSET).min(6);
let base_id = state.custom_id.unwrap_or_else(|| slugify(&state.text));
let id = Slug::new_raw(unique_id(&base_id, &mut used_ids));
toc.push(HeadingEntry {
level: rendered_level,
text: state.text,
id: id.clone(),
});
out.push(Event::Html(
format!("<h{} id=\"{}\">", rendered_level, escape_html(id.as_ref())).into(),
));
out.extend(state.inner);
out.push(Event::Html(format!("</h{}>", rendered_level).into()));
}
other => {
if let Event::Text(ref t) | Event::Code(ref t) = other {
state.text.push_str(t);
}
state.inner.push(other);
heading_buf = Some(state);
}
},
}
}
(out, toc)
}
fn unique_id(base: &str, used: &mut HashMap<String, usize>) -> String {
let count = used.entry(base.to_string()).or_insert(0);
*count += 1;
if *count == 1 {
base.to_string()
} else {
format!("{}-{}", base, *count)
}
}
#[cfg(test)]
mod tests {
use insta::assert_snapshot;
use pulldown_cmark::{Options, Parser};
use super::*;
fn render_with_anchors(input: &str) -> (String, Vec<HeadingEntry>) {
let mut opts = Options::empty();
opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
let events: Vec<_> = Parser::new_ext(input, opts).collect();
let (events, toc) = inject_heading_ids(events);
(crate::markdown::render_html(events), toc)
}
#[test]
fn simple_heading_gets_id() {
let (html, toc) = render_with_anchors("## Hello World\n\nSome text.");
assert_snapshot!(html);
assert_eq!(toc.len(), 1);
assert_eq!(toc[0].level, 3);
assert_eq!(toc[0].text, "Hello World");
assert_eq!(toc[0].id, "hello-world");
}
#[test]
fn duplicate_headings_get_suffixed() {
let (html, toc) = render_with_anchors("## Intro\n\n## Intro\n");
assert_snapshot!(html);
assert_eq!(toc[0].id, "intro");
assert_eq!(toc[1].id, "intro-2");
assert_eq!(toc[0].level, 3);
assert_eq!(toc[1].level, 3);
}
#[test]
fn heading_levels_shifted_by_one() {
let (html, toc) = render_with_anchors("# H1\n\n### H3\n");
assert_snapshot!(html);
assert_eq!(toc.len(), 2);
assert_eq!(toc[0].level, 2);
assert_eq!(toc[0].id, "h1");
assert_eq!(toc[1].level, 4);
assert_eq!(toc[1].id, "h3");
}
#[test]
fn all_source_headings_included_in_toc() {
let (html, toc) = render_with_anchors("# Title\n\n## Section\n\n### Sub\n");
assert_snapshot!(html);
assert_eq!(toc.len(), 3);
assert_eq!(toc[0].level, 2);
assert_eq!(toc[0].text, "Title");
assert_eq!(toc[1].level, 3);
assert_eq!(toc[1].text, "Section");
assert_eq!(toc[2].level, 4);
assert_eq!(toc[2].text, "Sub");
}
#[test]
fn h5_h6_capped_at_h6() {
let (html, toc) = render_with_anchors("##### Deep\n\n###### Deepest\n");
assert_snapshot!(html);
assert_eq!(toc[0].level, 6);
assert_eq!(toc[1].level, 6);
}
#[test]
fn toc_order_matches_source() {
let (_, toc) = render_with_anchors("# A\n\n## B\n\n# C\n\n### D\n");
let ids: Vec<&str> = toc.iter().map(|h| h.id.as_ref()).collect();
assert_eq!(ids, vec!["a", "b", "c", "d"]);
}
#[test]
fn inline_formatting_stripped_from_toc_text() {
let (html, toc) = render_with_anchors("# _Intro_\n");
assert_snapshot!(html);
assert_eq!(toc[0].text, "Intro");
assert_eq!(toc[0].level, 2);
}
#[test]
fn special_chars_stripped_from_slug() {
let (_, toc) = render_with_anchors("# Hello, World!\n");
assert_eq!(toc[0].id, "hello-world");
}
#[test]
fn no_headings_produces_empty_toc() {
let (_, toc) = render_with_anchors("Just a paragraph.\n");
assert!(toc.is_empty());
}
#[test]
fn custom_id_overrides_slug() {
let (_, toc) = render_with_anchors("## Hello World {#greet}\n");
assert_eq!(toc[0].id, "greet");
assert_eq!(toc[0].text, "Hello World");
}
#[test]
fn custom_id_used_verbatim_not_slugified() {
let (_, toc) = render_with_anchors("## Section {#API_v2.1}\n");
assert_eq!(toc[0].id, "API_v2.1");
}
#[test]
fn duplicate_custom_ids_get_suffixed() {
let (_, toc) = render_with_anchors("## First {#same}\n\n## Second {#same}\n");
assert_eq!(toc[0].id, "same");
assert_eq!(toc[1].id, "same-2");
}
#[test]
fn custom_id_collides_with_auto_id() {
let (_, toc) = render_with_anchors("## intro\n\n## Other {#intro}\n");
assert_eq!(toc[0].id, "intro");
assert_eq!(toc[1].id, "intro-2");
}
#[test]
fn custom_id_html_escaped_in_output() {
let (html, _) = render_with_anchors("## Heading {#weird&id}\n");
assert!(html.contains(r#"id="weird&id""#), "html was: {html}");
}
#[test]
fn slugify_basic() {
assert_eq!(Slug::from("Hello World").to_string(), "hello-world");
assert_eq!(
Slug::from("The `foo` Function").to_string(),
"the-foo-function"
);
assert_eq!(Slug::from("foo-bar").to_string(), "foo-bar");
assert_eq!(Slug::from("Héllo").to_string(), "héllo");
}
}