#![doc = concat!(include_str!("../tests/example.md"), "```")]
#![doc = concat!(include_str!("../tests/example.txt"), "```")]
use hashbrown::HashMap;
use pulldown_cmark::{CowStr, Event, HeadingLevel, LinkType, Options, Tag, TagEnd};
use std::{mem, ops::AddAssign};
pub const CONTENT_TYPE: &str = r#"text/plain; charset="utf-8"; format="flowed""#;
const MAX_LINE_WIDTH: usize = 78;
const PREFERRED_LINE_WIDTH: usize = 66;
pub fn parser_options() -> Options {
Options::ENABLE_FOOTNOTES
| Options::ENABLE_STRIKETHROUGH
| Options::ENABLE_SMART_PUNCTUATION
| Options::ENABLE_WIKILINKS
}
pub fn push_text<'a, I>(s: &mut String, iter: I)
where
I: Iterator<Item = Event<'a>>
{
let mut state = State::new(s);
push_text_to_state(&mut state, iter);
}
struct State<'a, 's> {
txt: &'s mut String,
trailing_newlines: u8,
indentations: Vec<&'static str>,
heading_len: usize,
heading_lvl: Option<HeadingLevel>,
code_block: bool,
html_blocks: u8,
lists: Vec<Option<u64>>,
footnotes: Vec<String>,
footnote_labels: HashMap<CowStr<'a>, usize>,
in_footnote: Option<usize>,
footnote_links: Vec<usize>
}
impl<'s> State<'_, 's> {
fn new(txt: &'s mut String) -> Self {
Self {
txt,
trailing_newlines: u8::MAX,
indentations: Vec::new(),
heading_len: 0,
heading_lvl: None,
code_block: false,
html_blocks: 0,
lists: Vec::new(),
footnotes: Vec::new(),
footnote_labels: HashMap::new(),
in_footnote: None,
footnote_links: Vec::new()
}
}
}
impl<T: AsRef<str>> AddAssign<T> for State<'_, '_> {
fn add_assign(&mut self, rhs: T) {
*self.txt += rhs.as_ref();
}
}
impl<'a> State<'a, '_> {
fn column(&self) -> usize {
let line_begin_idx = self.txt.rfind("\r\n").map(|idx| idx + 2).unwrap_or(0);
self.txt[line_begin_idx ..].chars().count() - 1
}
fn newline_impl(&mut self, space_stuffing: bool, quotes_only: bool) {
if let Some(mut idx) = self.txt.rfind("\r\n").map(|idx| idx + 2) {
while self.txt[idx ..].starts_with('>') {
idx += 1;
}
if self.txt[idx ..].chars().all(|ch| ch == ' ') {
self.txt.truncate(idx);
}
}
*self.txt += "\r\n";
if space_stuffing
&& self
.indentations
.first()
.is_none_or(|indent| *indent != ">")
{
self.txt.push(' ');
}
self.trailing_newlines += 1;
for indent in &self.indentations {
if !quotes_only || *indent == ">" {
*self.txt += indent;
}
}
if space_stuffing && self.indentations.last().is_some_and(|i| *i == ">") {
self.txt.push(' ');
}
}
fn newlines(&mut self, count: u8) {
while self.trailing_newlines < count {
self.newline_impl(true, false);
}
}
fn add_text_unwrapped(&mut self, text: &str) {
if text.is_empty() {
return;
}
*self.txt += text;
self.trailing_newlines = 0;
if self.heading_lvl.is_some() {
let mut column = self.column();
if self.txt.ends_with(' ') {
column -= 1;
}
self.heading_len = self.heading_len.max(column);
}
}
fn add_text_wrapping(&mut self, text: &str) {
if text.is_empty() {
return;
}
let column = self.column();
let optimal_length = PREFERRED_LINE_WIDTH.saturating_sub(column);
let max_length = MAX_LINE_WIDTH.saturating_sub(column);
if text.len() <= optimal_length {
self.add_text_unwrapped(text);
return;
}
let mut space_before = None;
let mut space_after = None;
let mut space_after_within_max_length = false;
let mut byte = 0;
for (i, ch) in text.chars().enumerate() {
if ch == ' ' {
if i <= optimal_length {
space_before = Some(byte);
} else {
space_after = Some(byte);
space_after_within_max_length = i <= max_length;
break;
}
}
byte += ch.len_utf8();
}
let space = match (space_before, space_after) {
(Some(space_before), None) => space_before,
(None, Some(space_after)) if space_after_within_max_length => space_after,
(Some(space_before), Some(space_after)) => {
if !space_after_within_max_length {
space_before
} else if (optimal_length - space_before) <= space_after {
space_before
} else {
space_after
}
},
(None, _) if self.trailing_newlines == 0 && self.txt.ends_with(' ') => {
self.newline_impl(true, true);
self.add_text_wrapping(text);
return;
},
(None, Some(space_after)) => {
space_after
},
_ => {
self.add_text_unwrapped(text);
return;
}
};
let (before, after) = text.split_at(space + 1);
self.add_text_unwrapped(before);
self.newline_impl(true, true);
self.add_text_wrapping(after);
}
fn get_or_create_footnote(&mut self, label: CowStr<'a>) -> usize {
match self.footnote_labels.get(&label) {
Some(footnote_idx) => *footnote_idx,
None => {
let footnote_idx = self.footnotes.len();
self.footnotes.push(String::new());
self.footnote_labels.insert(label, footnote_idx);
footnote_idx
}
}
}
}
fn push_text_to_state<'a, I>(txt: &mut State<'a, '_>, iter: I)
where
I: Iterator<Item = Event<'a>>
{
if txt.txt.is_empty() || txt.txt.ends_with('\n') {
txt.txt.push(' ');
}
for event in iter {
match event {
Event::Start(Tag::Paragraph) => {
if let Some(footnote_idx) = txt.in_footnote {
let footnote_txt = &mut txt.footnotes[footnote_idx];
if !footnote_txt.is_empty() {
*footnote_txt += "\n\n";
}
} else {
txt.newlines(2);
}
},
Event::Start(Tag::Heading { level, .. }) => {
txt.newlines(3);
txt.heading_lvl = Some(level);
txt.heading_len = 0;
},
Event::Start(Tag::BlockQuote(_)) => {
txt.newlines(1);
txt.indentations.push(">");
},
Event::Start(Tag::CodeBlock(_)) => {
txt.newlines(2);
txt.code_block = true;
},
Event::Start(Tag::HtmlBlock) => {
txt.html_blocks += 1;
},
Event::Start(Tag::List(list_idx)) => {
txt.newlines(2);
txt.lists.push(list_idx);
},
Event::Start(Tag::Item) => {
txt.newlines(2);
let list_idx = txt
.lists
.last_mut()
.expect("Markdown parser found a list item outside of a list");
if let Some(list_idx) = list_idx {
let list_idx_str = format!("{list_idx}. ");
for _ in 0 .. 4usize.saturating_sub(list_idx_str.len()) {
txt.txt.push(' ');
}
*txt.txt += &list_idx_str;
*list_idx += 1;
} else {
*txt += " • ";
}
txt.indentations.push(" ");
},
Event::Start(Tag::FootnoteDefinition(label)) => {
let footnote_idx = txt.get_or_create_footnote(label);
txt.in_footnote = Some(footnote_idx);
},
Event::Start(Tag::DefinitionList)
| Event::Start(Tag::DefinitionListTitle)
| Event::Start(Tag::DefinitionListDefinition) => {
unreachable!("Definition lists are not enabled in the parser options")
},
Event::Start(Tag::Table(_))
| Event::Start(Tag::TableHead)
| Event::Start(Tag::TableRow)
| Event::Start(Tag::TableCell) => {
unreachable!("Tables are not enabled in the parser options")
},
Event::Start(Tag::Emphasis)
| Event::Start(Tag::Strong)
| Event::Start(Tag::Strikethrough) => {
},
Event::Start(Tag::Superscript) | Event::Start(Tag::Subscript) => {
unreachable!("Super/Subscript are not enabled in the parser options")
},
Event::Start(Tag::Link {
link_type: LinkType::Autolink,
..
}) => {
txt.footnote_links.push(0);
},
Event::Start(Tag::Link { dest_url, .. })
| Event::Start(Tag::Image { dest_url, .. }) => {
txt.footnotes.push(dest_url.into_string());
txt.footnote_links.push(txt.footnotes.len());
},
Event::Start(Tag::MetadataBlock(_)) => {
unreachable!("Metadata blacks are not enabled in the parser options")
},
Event::End(TagEnd::Paragraph) => {
},
Event::End(TagEnd::Heading(level)) => {
txt.newlines(1);
let ch = match level {
HeadingLevel::H1 => '=',
_ => '-'
};
for _ in 0 .. txt.heading_len {
txt.txt.push(ch);
}
txt.trailing_newlines = 0;
txt.newlines(2);
txt.heading_lvl = None;
},
Event::End(TagEnd::BlockQuote(_)) => {
let indent = txt.indentations.pop();
debug_assert_eq!(indent, Some(">"));
},
Event::End(TagEnd::CodeBlock) => {
debug_assert!(txt.code_block);
txt.code_block = false;
},
Event::End(TagEnd::HtmlBlock) => {
txt.html_blocks -= 1;
},
Event::End(TagEnd::List(_)) => {
let list_idx = txt.lists.pop();
debug_assert!(list_idx.is_some());
},
Event::End(TagEnd::Item) => {
let indent = txt.indentations.pop();
debug_assert_eq!(indent, Some(" "));
},
Event::End(TagEnd::FootnoteDefinition) => {
txt.in_footnote = None;
},
Event::End(TagEnd::DefinitionList)
| Event::End(TagEnd::DefinitionListTitle)
| Event::End(TagEnd::DefinitionListDefinition) => {
unreachable!("Definition lists are not enabled in the parser options")
},
Event::End(TagEnd::Table)
| Event::End(TagEnd::TableHead)
| Event::End(TagEnd::TableRow)
| Event::End(TagEnd::TableCell) => {
unreachable!("Tables are not enabled in the parser options")
},
Event::End(TagEnd::Emphasis)
| Event::End(TagEnd::Strong)
| Event::End(TagEnd::Strikethrough) => {
},
Event::End(TagEnd::Superscript) | Event::End(TagEnd::Subscript) => {
unreachable!("Super/Subscript are not enabled in the parser options")
},
Event::End(TagEnd::Link) | Event::End(TagEnd::Image) => {
let footnote_idx = txt
.footnote_links
.pop()
.expect("Markdown parser found a closing link/image that isn't open");
if footnote_idx != 0 {
txt.add_text_wrapping(&format!(" [{footnote_idx}]"));
}
},
Event::End(TagEnd::MetadataBlock(_)) => {
unreachable!("Metadata blocks are not enabled in the parser options")
},
Event::Text(text) | Event::Code(text) => {
if let Some(footnote_idx) = txt.in_footnote {
txt.footnotes[footnote_idx] += &text;
}
else if txt.code_block {
for line in text.lines() {
*txt += line.trim_end_matches(' ');
txt.trailing_newlines = 0;
txt.newlines(1);
}
} else {
txt.add_text_wrapping(&text);
}
},
Event::InlineMath(_) | Event::DisplayMath(_) => {
unreachable!("Math is not enabled in the parser options")
},
Event::Html(_) | Event::InlineHtml(_) => {
},
Event::FootnoteReference(label) => {
let footnote_idx = txt.get_or_create_footnote(label);
txt.add_text_wrapping(&format!("[{footnote_idx}]"));
},
Event::SoftBreak => {
if let Some(footnote_idx) = txt.in_footnote {
txt.footnotes[footnote_idx].push(' ');
} else {
txt.add_text_wrapping(" ");
}
},
Event::HardBreak => {
if let Some(footnote_idx) = txt.in_footnote {
txt.footnotes[footnote_idx] += "\n";
} else {
while txt.txt.ends_with(' ') {
txt.txt.pop();
}
txt.trailing_newlines = 0;
txt.newlines(1);
}
},
Event::Rule => {
txt.newlines(1);
for _ in 0 .. MAX_LINE_WIDTH {
txt.add_text_unwrapped("-");
}
txt.newlines(1);
},
Event::TaskListMarker(_) => {
unreachable!("Task lists are not enabled in the parser options")
}
}
}
if !txt.footnotes.is_empty() {
txt.newlines(1);
txt.newline_impl(false, true);
debug_assert!(txt.txt.ends_with("\r\n"));
txt.add_text_unwrapped("-- ");
for (i, f) in mem::take(&mut txt.footnotes).into_iter().enumerate() {
let multiline = f.contains('\n');
txt.newlines(1);
if multiline {
txt.newlines(2);
}
let f_label = format!("[{}]: ", i + 1);
for _ in 0 .. 6usize.saturating_sub(f_label.len()) {
txt.txt.push(' ');
}
*txt += &f_label;
txt.indentations.push(" ");
for line in f.lines() {
txt.newlines(1);
txt.add_text_wrapping(line);
txt.trailing_newlines = 0;
}
txt.indentations.pop();
if multiline {
txt.newlines(2);
}
}
}
txt.newline_impl(false, true);
}