use std::{borrow::Borrow, iter::Peekable};
use log::error;
use pulldown_cmark::*;
use crate::{
highlight,
search::{TermFrequenciesBuilder, TermFrequenciesIndex},
utils,
};
#[derive(Debug, PartialEq)]
pub(crate) enum TocElement {
Html(String),
TocReference,
Node(TocNode),
}
#[derive(Debug, PartialEq)]
pub(crate) struct Heading {
pub level: HeadingLevel,
pub contents: String,
pub slug: String,
}
#[derive(Debug, PartialEq)]
pub(crate) struct TocNode {
pub heading: Heading,
pub contents: Vec<TocElement>,
}
impl TocNode {
pub fn nodes(&self) -> Nodes {
Nodes(&self.contents, 0)
}
}
pub(crate) struct Elements<'a>(Vec<&'a TocElement>);
impl<'a> Elements<'a> {
pub fn new(elements: &'a [TocElement]) -> Self {
Elements(Vec::from_iter(elements.iter().rev()))
}
}
impl<'a> Iterator for Elements<'a> {
type Item = &'a TocElement;
fn next(&mut self) -> Option<Self::Item> {
let next = self.0.pop();
if let Some(TocElement::Node(node)) = next {
self.0.extend(node.contents.iter().rev())
}
next
}
}
pub(crate) struct Nodes<'a>(&'a [TocElement], usize);
impl<'a> Iterator for Nodes<'a> {
type Item = &'a TocNode;
fn next(&mut self) -> Option<Self::Item> {
while let Some(element) = self.0.get(self.1) {
self.1 = self.1 + 1;
if let TocElement::Node(node) = element {
return Some(&node);
}
}
None
}
}
#[derive(Debug)]
pub(crate) struct Toc(Vec<TocElement>, TermFrequenciesIndex);
impl Toc {
pub fn new(markdown: &str) -> Self {
let parser = Parser::new_ext(markdown, Options::all());
let mut index_builder = TermFrequenciesBuilder::default();
let parser = build_search_index(&mut index_builder, parser);
let parser = hl_codeblocks(parser);
let events = parse_toc_events(parser);
Toc(events, index_builder.finalise())
}
pub fn primary_heading(&self) -> Option<&String> {
self.0.iter().find_map(|element| match element {
TocElement::Node(node) => Some(&node.heading.contents),
_ => None,
})
}
pub fn nodes(&self) -> Nodes {
Nodes(&self.0, 0)
}
pub fn walk_elements(&self) -> Elements {
Elements::new(&self.0)
}
#[cfg(test)]
fn into_inner(self) -> Vec<TocElement> {
self.0
}
pub fn search_index(&self) -> &TermFrequenciesIndex {
&self.1
}
}
fn hl_codeblocks<'a, I>(parser: I) -> impl Iterator<Item = Event<'a>>
where
I: Iterator<Item = Event<'a>>,
{
let mut state: Option<String> = None;
let hl = highlight::get_hilighter();
parser.flat_map(move |event| {
if let Some(mut hl_state) = state.take() {
match event {
Event::Text(txt) => {
hl_state.push_str(txt.as_ref());
state = Some(hl_state);
vec![]
}
Event::End(Tag::CodeBlock(kind)) => {
state = None;
hl.hl_codeblock(
match &kind {
CodeBlockKind::Indented => None,
CodeBlockKind::Fenced(name) => Some(name.as_ref()),
},
&hl_state,
)
}
_ => {
error!("Unexpected item in codeblock: {:?}", event);
vec![event]
}
}
} else {
match event {
Event::Start(Tag::CodeBlock(_)) => {
state = Some(String::new());
vec![]
}
_ => vec![event],
}
}
})
}
fn build_search_index<'a, 'p, I>(
index_builder: &'p mut TermFrequenciesBuilder,
parser: I,
) -> impl Iterator<Item = Event<'a>> + 'p
where
I: Iterator<Item = Event<'a>> + 'p,
{
parser.inspect(|event| match event {
Event::Code(code) => {
index_builder.add_terms(&code);
}
Event::Text(txt) => {
index_builder.add_terms(&txt);
}
Event::Html(htm) => {
index_builder.add_terms(&htm);
}
_ => (),
})
}
fn events_to_plain<'a, I, E>(events: I) -> String
where
I: Iterator<Item = E>,
E: Borrow<Event<'a>>,
{
let mut text = String::new();
for ev in events {
match ev.borrow() {
Event::Text(txt) => text.push_str(txt.as_ref()),
Event::Code(code) => text.push_str(code.as_ref()),
Event::Html(htm) => text.push_str(htm.as_ref()),
_ => (),
}
}
text
}
fn drain_events_to_html(events: &mut Vec<Event>) -> Option<String> {
if events.is_empty() {
None
} else {
let mut result = String::new();
pulldown_cmark::html::push_html(&mut result, events.drain(..));
Some(result)
}
}
fn parse_toc_events<'a, I>(events: I) -> Vec<TocElement>
where
I: Iterator<Item = Event<'a>>,
{
parse_toc_at_level(None, &mut events.peekable())
}
fn parse_toc_at_level<'a, I>(
level: Option<HeadingLevel>,
events: &mut Peekable<I>,
) -> Vec<TocElement>
where
I: Iterator<Item = Event<'a>>,
{
let mut buffered = Vec::new();
let mut elements = Vec::new();
while let Some(event) = events.next_if(|event| is_below(level, event)) {
match event {
Event::Start(Tag::Heading(..)) => {
if let Some(element) = drain_events_to_html(&mut buffered) {
elements.push(TocElement::Html(element));
}
}
Event::End(Tag::Heading(level, frag, _class)) => {
let slug = frag
.map(|s| s.to_owned())
.unwrap_or_else(|| utils::slugify(&events_to_plain(buffered.iter())));
elements.push(TocElement::Node(TocNode {
heading: Heading {
level,
contents: drain_events_to_html(&mut buffered).unwrap_or(String::new()),
slug,
},
contents: parse_toc_at_level(Some(level), events),
}))
}
Event::End(Tag::Paragraph) => {
if in_toc(&buffered) {
buffered.truncate(buffered.len() - 4);
if let Some(html) = drain_events_to_html(&mut buffered) {
elements.push(TocElement::Html(html));
}
elements.push(TocElement::TocReference);
} else {
buffered.push(Event::End(Tag::Paragraph));
}
}
ev => buffered.push(ev),
}
}
if let Some(element) = drain_events_to_html(&mut buffered) {
elements.push(TocElement::Html(element));
}
elements
}
fn in_toc(current: &[Event]) -> bool {
let idx = current.len() - 1;
if let Some(Event::Text(ref toc)) = current.get(idx) {
if toc.as_ref() != "]" {
return false;
}
} else {
return false;
}
if let Some(Event::Text(ref toc)) = current.get(idx - 1) {
if toc.as_ref() != "TOC" {
return false;
}
} else {
return false;
}
if let Some(Event::Text(ref toc)) = current.get(idx - 2) {
if toc.as_ref() != "[" {
return false;
}
} else {
return false;
}
if let Some(Event::Start(Tag::Paragraph)) = current.get(idx - 3) {
true
} else {
false
}
}
fn is_below(level: Option<HeadingLevel>, event: &Event) -> bool {
level
.map(|level| match event {
Event::Start(Tag::Heading(ref next_level, ..)) => *next_level > level,
_ => true,
})
.unwrap_or(true)
}
#[cfg(test)]
mod test {
use super::*;
fn h(level: HeadingLevel, contents: &str) -> Heading {
let slug = utils::slugify(&contents);
hslug(level, contents, &slug)
}
fn hslug(level: HeadingLevel, contents: &str, slug: &str) -> Heading {
Heading {
level,
contents: contents.into(),
slug: slug.into(),
}
}
fn parse_toc(s: &str) -> Vec<TocElement> {
Toc::new(s).into_inner()
}
#[test]
fn parse_example_doc_toc() {
let parser = Parser::new(
"
# Heading 1.1
para one
## Heading 2.1
para two
para three
### Heading 3.1
```code
block four
```
## Heading 2.2
<img src=example.com/png>
# Heading 1.2
> last bit",
);
let toc = parse_toc_events(parser);
assert_eq!(2, toc.len());
}
#[test]
fn parse_with_no_headings() {
let doc = "hello world";
let parser = Parser::new(doc);
let toc = parse_toc_events(parser);
assert_eq!(vec![TocElement::Html("<p>hello world</p>\n".into())], toc);
}
#[test]
fn parse_with_single_heading() {
let doc = "# I am an H1";
let toc = parse_toc(doc);
assert_eq!(
vec![TocElement::Node(TocNode {
heading: h(HeadingLevel::H1, "I am an H1"),
contents: Vec::new()
})],
toc
);
}
#[test]
fn parse_heading_with_nested_formatting() {
let doc = "# I am `an` **H1**";
let toc = parse_toc(doc);
assert_eq!(
vec![TocElement::Node(TocNode {
heading: hslug(
HeadingLevel::H1,
"I am <code>an</code> <strong>H1</strong>",
"I-am-an-H1"
),
contents: Vec::new()
})],
toc
);
}
#[test]
fn parse_with_single_toc_reference() {
let doc = "[TOC]";
let toc = parse_toc(&doc);
assert_eq!(vec![TocElement::TocReference,], toc);
}
#[test]
fn parse_with_nested_headings() {
let doc = r#"
# Heading 1.1
## Heading 2.1
### Heading 3.1
## Heading 2.2
# Heading 1.2
"#;
let toc = parse_toc(doc);
assert_eq!(
vec![
TocElement::Node(TocNode {
heading: h(HeadingLevel::H1, "Heading 1.1"),
contents: vec![
TocElement::Node(TocNode {
heading: h(HeadingLevel::H2, "Heading 2.1"),
contents: vec![TocElement::Node(TocNode {
heading: h(HeadingLevel::H3, "Heading 3.1"),
contents: Vec::new()
})],
}),
TocElement::Node(TocNode {
heading: h(HeadingLevel::H2, "Heading 2.2"),
contents: Vec::new()
}),
]
}),
TocElement::Node(TocNode {
heading: h(HeadingLevel::H1, "Heading 1.2"),
contents: Vec::new()
}),
],
toc
)
}
}