use crate::book::Book;
use crate::error::{Error, Result, Source};
use crate::token::Token;
use std::convert::AsRef;
use std::fs::File;
use std::io::Read;
use std::mem;
use std::ops::BitOr;
use std::path::Path;
use comrak::nodes::{AstNode, ListType, NodeValue};
use comrak::{parse_document, Arena};
use rust_i18n::t;
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct Features {
pub image: bool,
pub footnote: bool,
pub blockquote: bool,
pub codeblock: bool,
pub ordered_list: bool,
pub table: bool,
pub url: bool,
pub subscript: bool,
pub superscript: bool,
pub strikethrough: bool,
pub taskitem: bool,
}
impl Features {
pub fn new() -> Features {
Features {
image: false,
blockquote: false,
codeblock: false,
ordered_list: false,
footnote: false,
table: false,
url: false,
subscript: false,
superscript: false,
strikethrough: false,
taskitem: false,
}
}
}
impl Default for Features {
fn default() -> Self {
Self::new()
}
}
impl BitOr for Features {
type Output = Self;
fn bitor(self, rhs: Self) -> Self {
Features {
image: self.image | rhs.image,
blockquote: self.blockquote | rhs.blockquote,
codeblock: self.codeblock | rhs.codeblock,
ordered_list: self.ordered_list | rhs.ordered_list,
footnote: self.footnote | rhs.footnote,
table: self.table | rhs.table,
url: self.url | rhs.url,
subscript: self.subscript | rhs.subscript,
superscript: self.superscript | rhs.superscript,
strikethrough: self.strikethrough | rhs.strikethrough,
taskitem: self.taskitem | rhs.taskitem,
}
}
}
pub struct Parser {
source: Source,
features: Features,
ignore_paragraphs: bool,
html_as_text: bool,
superscript: bool,
parse_frontmatter: bool,
}
impl Parser {
pub fn new() -> Parser {
Parser {
source: Source::empty(),
features: Features::new(),
ignore_paragraphs: false,
html_as_text: true,
superscript: false,
parse_frontmatter: false,
}
}
pub fn from(book: &Book) -> Parser {
let mut parser = Parser::new();
parser.html_as_text = book.options.get_bool("crowbook.html_as_text").unwrap();
parser.parse_frontmatter = book.options.get_bool("input.yaml_blocks").unwrap();
parser.superscript = book
.options
.get_bool("crowbook.markdown.superscript")
.unwrap();
parser
}
pub fn html_as_text(&mut self, b: bool) {
self.html_as_text = b;
}
pub fn set_source_file(&mut self, s: &str) {
self.source = Source::new(s);
}
pub fn parse_file<P: AsRef<Path>>(&mut self, filename: P, yaml_block: Option<&mut String>) -> Result<Vec<Token>> {
let path: &Path = filename.as_ref();
let mut f = File::open(path).map_err(|_| {
Error::file_not_found(
&self.source,
t!("format.markdown"),
format!("{}", path.display()),
)
})?;
let mut s = String::new();
f.read_to_string(&mut s).map_err(|_| {
Error::parser(
&self.source,
t!("error.utf8",
file = path.display()
),
)
})?;
self.parse(&s, yaml_block)
}
pub fn parse(&mut self, s: &str, mut yaml: Option<&mut String>) -> Result<Vec<Token>> {
let arena = Arena::new();
let mut options = comrak::options::Options::default();
options.render.hardbreaks = false;
options.parse.smart = false;
options.extension.strikethrough = true;
options.extension.table = true;
options.extension.autolink = true;
options.extension.tasklist = true;
options.extension.superscript = self.superscript;
options.extension.subscript = self.superscript;
options.extension.footnotes = true;
options.extension.description_lists = true;
if self.parse_frontmatter {
options.extension.front_matter_delimiter = Some("---".to_owned());
}
let root = parse_document(&arena, s, &options);
let mut res = self.parse_node(root, &mut yaml)?;
collapse(&mut res);
find_standalone(&mut res);
Ok(res)
}
pub fn parse_inline(&mut self, s: &str) -> Result<Vec<Token>> {
let mut tokens = self.parse(s, None)?;
if tokens.len() == 1 {
let res = match tokens[0] {
Token::Paragraph(ref mut v) => Some(std::mem::take(v)),
_ => None,
};
match res {
Some(tokens) => Ok(tokens),
_ => Ok(tokens),
}
} else {
Ok(tokens)
}
}
pub fn features(&self) -> Features {
self.features
}
fn parse_node<'a>(&mut self, node: &'a AstNode<'a>, yaml_block: &mut Option<&mut String>) -> Result<Vec<Token>> {
let mut inner = vec![];
if let NodeValue::DescriptionTerm = node.data.borrow().value {
self.ignore_paragraphs = true;
}
for c in node.children() {
let mut v = self.parse_node(c, yaml_block)?;
inner.append(&mut v);
}
if let NodeValue::DescriptionTerm = node.data.borrow().value {
self.ignore_paragraphs = false;
}
inner = match node.data.borrow().value {
NodeValue::Document => inner,
NodeValue::BlockQuote |
NodeValue::MultilineBlockQuote(_) => {
self.features.blockquote = true;
vec![Token::BlockQuote(inner)]
},
NodeValue::FrontMatter(ref v) => {
if let Some(yaml) = yaml_block {
yaml.push_str(v);
}
vec![]
},
NodeValue::List(ref list) => {
match list.list_type {
ListType::Bullet => vec![Token::List(inner)],
ListType::Ordered => vec![Token::OrderedList(list.start, inner)],
}
}
NodeValue::Item(_) => vec![Token::Item(inner)],
NodeValue::DescriptionList => vec![Token::DescriptionList(inner)],
NodeValue::DescriptionItem(_) => vec![Token::DescriptionItem(inner)],
NodeValue::DescriptionTerm => vec![Token::DescriptionTerm(inner)],
NodeValue::DescriptionDetails => vec![Token::DescriptionDetails(inner)],
NodeValue::CodeBlock(ref block) => {
let info = block.info.clone();
let code = block.literal.clone();
self.features.codeblock = true;
vec![Token::CodeBlock(info, code)]
}
NodeValue::HtmlBlock(ref block) => {
let text = block.literal.clone();
if self.html_as_text {
vec![Token::Str(text)]
} else {
debug!("{}", t!("parser.ignore_html", block = text));
vec![]
}
}
NodeValue::HtmlInline(ref html) => {
let text = html.clone();
if self.html_as_text {
vec![Token::Str(text)]
} else {
debug!("{}", t!("parser.ignore_html", block = text));
vec![]
}
}
NodeValue::Paragraph => {
if !self.ignore_paragraphs {
vec![Token::Paragraph(inner)]
} else {
inner
}
}
NodeValue::Heading(ref heading) => vec![Token::Header(heading.level as i32, inner)],
NodeValue::ThematicBreak => vec![Token::Rule],
NodeValue::FootnoteDefinition(ref def) => {
let reference = def.clone();
vec![Token::FootnoteDefinition(reference.name, inner)]
}
NodeValue::Text(ref text) => {
let text = text.clone();
vec![Token::Str(text.to_string())]
}
NodeValue::Code(ref code) => {
let text = code.literal.clone();
vec![Token::Code(text)]
}
NodeValue::SoftBreak => vec![Token::SoftBreak],
NodeValue::LineBreak => vec![Token::HardBreak],
NodeValue::Emph => vec![Token::Emphasis(inner)],
NodeValue::TaskItem(c) => {
self.features.taskitem = true;
let checked = if c.symbol.is_some() { true } else { false };
vec![Token::TaskItem(checked, inner)]
}
NodeValue::Strong => vec![Token::Strong(inner)],
NodeValue::Strikethrough => {
self.features.strikethrough = true;
vec![Token::Strikethrough(inner)]
}
NodeValue::Superscript => vec![Token::Superscript(inner)],
NodeValue::Subscript => vec![Token::Subscript(inner)],
NodeValue::Link(ref link) => {
self.features.url = true;
let url = link.url.clone();
let title = link.title.clone();
vec![Token::Link(url, title, inner)]
}
NodeValue::Image(ref link) => {
self.features.image = true;
let url = link.url.clone();
let title = link.title.clone();
vec![Token::Image(url, title, inner)]
}
NodeValue::FootnoteReference(ref fn_ref) => {
vec![Token::FootnoteReference(fn_ref.name.clone())]
}
NodeValue::TableCell => vec![Token::TableCell(inner)],
NodeValue::TableRow(header) => {
if header {
vec![Token::TableHead(inner)]
} else {
vec![Token::TableRow(inner)]
}
}
NodeValue::Table(ref aligns) => {
self.features.table = true;
vec![Token::Table(aligns.alignments.len() as i32, inner)]
}
NodeValue::HeexBlock(_) |
NodeValue::HeexInline(_) |
NodeValue::Highlight |
NodeValue::ShortCode(_) |
NodeValue::Subtext |
NodeValue::Escaped |
NodeValue::WikiLink(_) |
NodeValue::Math(_) |
NodeValue::Underline |
NodeValue::SpoileredText |
NodeValue::EscapedTag(_) |
NodeValue::Alert(_) |
NodeValue::Raw(_) => {
todo!{"Unsupported markdown feature"};
}
};
Ok(inner)
}
}
impl Default for Parser {
fn default() -> Self {
Self::new()
}
}
fn collapse(ast: &mut Vec<Token>) {
let mut i = 0;
while i < ast.len() {
if ast[i].is_str() && i + 1 < ast.len() {
if ast[i + 1].is_str() {
let token = ast.remove(i + 1);
if let (&mut Token::Str(ref mut dest), Token::Str(ref source)) =
(&mut ast[i], token)
{
dest.push_str(source);
continue;
} else {
unreachable!();
}
} else if ast[i + 1] == Token::SoftBreak {
ast.remove(i + 1);
if let &mut Token::Str(ref mut dest) = &mut ast[i] {
dest.push(' ');
continue;
} else {
unreachable!();
}
}
}
if let Some(ref mut inner) = ast[i].inner_mut() {
collapse(inner);
}
i += 1;
}
}
fn find_standalone(ast: &mut Vec<Token>) {
for token in ast {
let res = if let &mut Token::Paragraph(ref mut inner) = token {
if inner.len() == 1 {
if inner[0].is_image() {
if let Token::Image(source, title, inner) =
mem::replace(&mut inner[0], Token::Rule)
{
Token::StandaloneImage(source, title, inner)
} else {
unreachable!();
}
} else {
if let Token::Link(ref url, ref alt, ref mut inner) = inner[0] {
if inner.len() == 1 && inner[0].is_image() {
if let Token::Image(source, title, inner) =
mem::replace(&mut inner[0], Token::Rule)
{
Token::Link(
url.clone(),
alt.clone(),
vec![Token::StandaloneImage(source, title, inner)],
)
} else {
unreachable!();
}
} else {
continue;
}
} else {
continue;
}
}
} else {
continue;
}
} else {
continue;
};
*token = res;
}
}