#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate log;
use regex::Regex;
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::io::Write;
use std::path::Path;
pub struct Parser {
pub tangle_sources: HashMap<String, std::time::SystemTime>,
pub sources: HashMap<String, std::time::SystemTime>,
stack: Vec<Tree>,
antistack: Vec<Tree>,
indent: usize,
id: usize,
}
#[derive(Debug, Clone)]
pub enum Property {
CustomId { id: String },
Title { title: String },
Author { author: String },
HtmlHead { html_head: String },
HtmlFoot { html_foot: String },
}
#[derive(Debug, Clone)]
pub enum Id {
String(String),
Int(usize),
}
#[derive(Debug, Clone)]
pub enum Tree {
Root {
children: Vec<Tree>,
title: String,
level: usize,
properties: Vec<Property>,
properties_open: bool,
toc: Vec<usize>,
id: Id,
},
Block {
contents: String,
},
Item {
children: Vec<Vec<Tree>>,
indent: usize,
},
Src {
indent: usize,
contents: String,
style: String,
options: Vec<(String, String)>,
id: String,
},
}
impl Parser {
pub fn new() -> Self {
Parser {
tangle_sources: HashMap::new(),
sources: HashMap::new(),
stack: vec![Tree::Root {
children: Vec::new(),
level: 0,
title: String::new(),
properties: Vec::new(),
properties_open: false,
toc: Vec::new(),
id: Id::Int(0),
}],
antistack: Vec::new(),
indent: 0,
id: 0,
}
}
fn reduce_to(&mut self, to_level: usize) -> bool {
loop {
let pop = self.stack.pop().unwrap();
if let Tree::Root {
mut children,
title,
level,
properties,
properties_open,
toc,
id,
} = pop
{
while let Some(anti) = self.antistack.pop() {
children.push(anti)
}
if level <= to_level {
self.stack.push(Tree::Root {
children,
title,
level,
properties,
properties_open,
toc,
id,
});
break;
} else {
self.antistack.push(Tree::Root {
children,
title,
level,
properties,
properties_open,
toc,
id,
});
}
} else {
self.antistack.push(pop)
}
}
self.stack.len() > 1
}
fn reduce(&mut self) -> bool {
loop {
let pop = self.stack.pop().unwrap();
if let Tree::Root {
mut children,
title,
level,
properties,
properties_open,
toc,
id,
} = pop
{
while let Some(anti) = self.antistack.pop() {
children.push(anti)
}
self.stack.push(Tree::Root {
children,
title,
level,
properties,
properties_open,
toc,
id,
});
break;
}
self.antistack.push(pop)
}
self.stack.len() > 1
}
fn reduce_item(&mut self, level: usize) -> bool {
while let Some(pop) = self.stack.pop() {
if let Tree::Item {
mut children,
indent,
} = pop
{
if indent >= level {
let mut child = Vec::new();
while let Some(anti) = self.antistack.pop() {
child.push(anti)
}
if !child.is_empty() {
children.push(child);
}
if indent == level {
self.stack.push(Tree::Item { children, indent });
return true;
} else {
self.antistack.push(Tree::Item { children, indent });
}
} else {
self.antistack.push(Tree::Item { children, indent })
}
} else {
self.antistack.push(pop)
}
}
while let Some(elt) = self.antistack.pop() {
self.stack.push(elt)
}
false
}
pub fn finish(&mut self) -> Tree {
while self.reduce_item(0) {}
while self.reduce_to(0) {}
self.stack.pop().unwrap()
}
pub fn parse<P: AsRef<Path>, F: FnMut(&str)>(
&mut self,
file: P,
mut include_callback: F,
) -> Result<(), failure::Error> {
self.parse_(file, &mut include_callback)
}
fn close_src(&mut self, l: &str) -> bool {
lazy_static! {
static ref END_SRC: Regex = Regex::new(r#"^\s*#\+END_SRC"#).unwrap();
}
if let Some(Tree::Src {
ref mut contents,
ref mut id,
indent,
..
}) = self.stack.last_mut()
{
if !END_SRC.is_match(&l) {
let l = if l.chars().take_while(|c| c.is_whitespace()).count() >= *indent + 2 {
let (_indent, l) = l.split_at(*indent + 2);
l
} else {
l
};
contents.push_str(l);
contents.push('\n');
*id = base64::encode(blake3::hash(contents.as_bytes()).as_bytes());
return true;
}
} else {
return false;
};
self.reduce();
true
}
fn begin_src(&mut self, l: &str, time: std::time::SystemTime) -> bool {
lazy_static! {
static ref BEGIN_SRC: Regex = Regex::new(r#"^(\s*)#\+BEGIN_SRC(\s+.*)?"#).unwrap();
}
if let Some(caps) = BEGIN_SRC.captures(&l) {
let mut style = String::new();
let mut options = Vec::new();
let indent = caps.get(1).unwrap().as_str().len();
let mut it = caps.get(2).unwrap().as_str().split_whitespace();
if let Some(style_) = it.next() {
style.push_str(style_)
}
while let Some(param) = it.next() {
if param.starts_with(":") {
if let Some(value) = it.next() {
if param == ":tangle" {
let e = self.tangle_sources.entry(value.to_string()).or_insert(time);
if *e < time {
*e = time
}
}
options.push((param.to_string(), value.to_string()));
}
}
}
self.stack.push(Tree::Src {
contents: String::new(),
style,
options,
indent,
id: String::new(),
});
true
} else {
false
}
}
fn properties(&mut self, l: &str) -> bool {
lazy_static! {
static ref PROP: Regex = Regex::new(r#"^:([^:]+):(.*)"#).unwrap();
}
if l == ":PROPERTIES:" {
if let Some(Tree::Root {
ref mut properties_open,
..
}) = self.stack.last_mut()
{
*properties_open = true;
}
true
} else if l == ":END" {
if let Some(Tree::Root {
ref mut properties_open,
..
}) = self.stack.last_mut()
{
*properties_open = false;
}
true
} else if let Some(cap) = PROP.captures(l) {
if let Some(Tree::Root {
ref properties_open,
ref mut id,
..
}) = self.stack.last_mut()
{
if *properties_open {
if &cap[1] == "CUSTOM_ID" {
*id = Id::String(cap[2].trim().to_string());
}
}
*properties_open
} else {
false
}
} else {
false
}
}
fn section(&mut self, l: &str) -> bool {
lazy_static! {
static ref SECTION: Regex = Regex::new(r#"^(\*+)\s+(.*)"#).unwrap();
}
if let Some(caps) = SECTION.captures(&l) {
let level = caps.get(1).unwrap().as_str().len();
let title = caps.get(2).unwrap().as_str().to_string();
self.reduce_item(0);
self.reduce_to(level - 1);
self.id += 1;
self.stack.push(Tree::Root {
level,
title,
children: Vec::new(),
properties: Vec::new(),
properties_open: false,
toc: Vec::new(),
id: Id::Int(self.id),
});
true
} else {
false
}
}
fn item(&mut self, l: &str) -> bool {
lazy_static! {
static ref ITEM: Regex = Regex::new(r#"^(\s*)-(\s*)(.*)"#).unwrap();
}
if let Some(caps) = ITEM.captures(&l) {
let indent =
caps.get(1).unwrap().as_str().len() + caps.get(2).unwrap().as_str().len() + 1;
let contents = caps.get(3).unwrap().as_str().to_string();
if !self.reduce_item(indent) {
self.stack.push(Tree::Item {
indent,
children: Vec::new(),
});
}
self.indent = indent;
self.stack.push(Tree::Block { contents });
true
} else {
false
}
}
fn parse_<P: AsRef<Path>, F: FnMut(&str)>(
&mut self,
file: P,
include_callback: &mut F,
) -> Result<(), failure::Error> {
let f = std::fs::read(file.as_ref())?;
let time = std::fs::metadata(file.as_ref())?.modified()?;
self.sources
.insert(file.as_ref().to_str().unwrap().to_string(), time);
let f = String::from_utf8(f)?;
lazy_static! {
static ref PROP: Regex = Regex::new(r#"^\s*#\+([^\s:]*)\s*:\s*(.*)"#).unwrap();
}
for l in f.lines() {
let indent = l
.chars()
.take_while(|c| c.is_whitespace() || *c == '-')
.count();
if indent < self.indent && !l.is_empty() {
self.reduce_item(indent);
self.reduce();
self.indent = indent;
}
if self.close_src(l) {
continue;
}
if self.properties(l) {
continue;
}
if self.begin_src(l, time) {
continue;
}
if let Some(caps) = PROP.captures(&l) {
let prop = caps.get(1).unwrap().as_str();
let value = caps.get(2).unwrap().as_str();
if prop == "INCLUDE" {
let value = value.trim_matches('"');
include_callback(&value);
self.parse_(value, include_callback)?;
} else if prop == "HTML_HEAD" {
if let Some(Tree::Root {
ref mut properties, ..
}) = self.stack.last_mut()
{
properties.push(Property::HtmlHead {
html_head: value.to_string(),
});
}
} else if prop == "HTML_FOOT" {
if let Some(Tree::Root {
ref mut properties, ..
}) = self.stack.last_mut()
{
properties.push(Property::HtmlFoot {
html_foot: value.to_string(),
});
}
} else if prop == "TITLE" {
if let Some(Tree::Root {
ref mut properties, ..
}) = self.stack.last_mut()
{
properties.push(Property::Title {
title: value.to_string(),
});
}
} else if prop == "AUTHOR" {
if let Some(Tree::Root {
ref mut properties, ..
}) = self.stack.last_mut()
{
properties.push(Property::Author {
author: value.to_string(),
});
}
} else {
}
continue;
}
if l.starts_with("#") {
continue;
}
if self.section(l) {
continue;
}
if self.item(l) {
continue;
}
match self.stack.last_mut().unwrap() {
Tree::Root { ref title, .. }
if title.starts_with("DONE") && l.trim_start().starts_with("CLOSED:") => {}
Tree::Root { .. } | Tree::Item { .. } if !l.is_empty() => {
let mut contents = String::new();
contents.push_str(l);
contents.push('\n');
self.stack.push(Tree::Block { contents })
}
Tree::Block { ref mut contents } => {
contents.push_str(l);
contents.push('\n');
}
Tree::Src { .. } => unreachable!(),
_ => {}
}
}
Ok(())
}
}
pub fn tangle(
tree: &Tree,
files: &mut HashSet<String>,
tangle_files: &HashMap<String, std::time::SystemTime>,
) -> Result<(), failure::Error> {
match tree {
Tree::Root { ref children, .. } => {
for c in children.iter() {
tangle(c, files, tangle_files)?
}
Ok(())
}
Tree::Item { ref children, .. } => {
for c in children.iter() {
for c in c.iter() {
tangle(c, files, tangle_files)?
}
}
Ok(())
}
Tree::Src {
ref contents,
ref options,
ref id,
..
} => {
if let Some((_, file)) = options.iter().find(|(k, _)| k == ":tangle") {
let path: &Path = file.as_ref();
let new_file = files.insert(file.to_string());
if new_file {
if let Ok(meta) = std::fs::metadata(file) {
if meta.modified()? > *tangle_files.get(file).unwrap() {
files.remove(file);
return Ok(());
}
}
}
std::fs::create_dir_all(path.parent().unwrap())?;
let mut file_ = std::fs::OpenOptions::new()
.write(true)
.create(true)
.append(!new_file)
.truncate(new_file)
.open(file)?;
debug!("id = {:?}", id);
writeln!(file_, "// org id {}", id)?;
for line in contents.lines() {
file_.write_all(line.trim().as_bytes())?;
file_.write_all(b"\n")?;
}
}
Ok(())
}
Tree::Block { .. } => Ok(()),
}
}
pub fn html_output<W: Write>(
tree: &Tree,
use_syntect: bool,
w: &mut W,
) -> Result<(), failure::Error> {
writeln!(w, "<html><head><meta charset=\"utf-8\"/> ")?;
if let Tree::Root { ref properties, .. } = tree {
for head in properties {
if let Property::HtmlHead { ref html_head } = head {
writeln!(w, "{}", html_head)?
}
}
}
writeln!(w, "</head><body>")?;
if let Tree::Root { ref properties, .. } = tree {
let mut author_ = String::new();
let mut title_ = String::new();
for head in properties {
match head {
Property::Title { ref title } => {
title_.clear();
title_.push_str(title);
}
Property::Author { ref author } => {
author_.clear();
author_.push_str(author)
}
_ => {}
}
}
if !title_.is_empty() {
writeln!(w, "<h1 class=\"title\">{}</h1>", title_)?;
}
if !author_.is_empty() {
writeln!(w, "<h1 class=\"subtitle\">{}</h1>", author_)?;
}
}
table_of_contents(tree, w)?;
std::process::Command::new("cargo")
.args(&["fmt"])
.output()
.expect("failed to execute process");
output_tree(
tree,
w,
&mut vec![],
&mut Output {
use_syntect,
syntax_set: syntect::parsing::SyntaxSet::load_defaults_newlines(),
theme: syntect::highlighting::ThemeSet::load_defaults()
.themes
.remove("InspiredGitHub")
.unwrap(),
fmt: HashMap::new(),
},
)?;
writeln!(w, "</body>")?;
if let Tree::Root { ref properties, .. } = tree {
for head in properties {
if let Property::HtmlFoot { ref html_foot } = head {
writeln!(w, "{}", html_foot)?
}
}
}
writeln!(w, "</html>")?;
Ok(())
}
fn output_string<W: Write>(s: &str, is_title: bool, w: &mut W) -> Result<(), failure::Error> {
lazy_static! {
static ref EMPH: Regex = Regex::new(r#"/(?P<emph>[^/]*)/"#).unwrap();
static ref BOLD: Regex = Regex::new(r#"\*(?P<bold>[^\*]*)\*"#).unwrap();
static ref VERB: Regex = Regex::new(r#"~(?P<verb>[^~]*)~"#).unwrap();
static ref LINK: Regex = Regex::new(r#"\[\[(?P<link>[^\]]*)\]\]"#).unwrap();
static ref LINK2: Regex =
Regex::new(r#"\[\[(?P<link>[^\]]*)\]\[(?P<target>[^\]]*)\]\]"#).unwrap();
static ref ANCHOR: Regex = Regex::new(r#"(<<(?P<anchor>[^>\n]*)>>)|(<)|(>)"#).unwrap();
static ref AMP: Regex = Regex::new(r#"&"#).unwrap();
static ref LT: Regex = Regex::new(r#"<"#).unwrap();
static ref GT: Regex = Regex::new(r#">"#).unwrap();
static ref PARBREAK: Regex = Regex::new("\n\n").unwrap();
static ref LATEX: Regex = Regex::new(r#"\$([^$]*)\$"#).unwrap();
}
let s = AMP.replace_all(s, "&");
let s = ANCHOR.replace_all(&s, |cap: ®ex::Captures| {
if &cap[0] == "<" {
Cow::Borrowed("<")
} else if &cap[0] == ">" {
Cow::Borrowed(">")
} else {
Cow::Owned(format!("<a name=\"{}\"></a>", &cap[2]))
}
});
let s = EMPH.replace_all(&s, "<em>$emph</em>");
let s = BOLD.replace_all(&s, "<strong>$bold</strong>");
let s = VERB.replace_all(&s, "<code>$verb</code>");
let s = LINK.replace_all(&s, "<a href=\"$link\">$link</a>");
let s = LINK2.replace_all(&s, "<a href=\"$link\">$target</a>");
let s = LINK2.replace_all(&s, "<a href=\"$link\">$target</a>");
let s = LATEX.replace_all(&s, |cap: ®ex::Captures| {
if let Some(mml) = itex2mml::MML::parse(&cap[0]) {
mml.as_cstr().to_str().unwrap().to_string()
} else {
s.to_string()
}
});
if is_title && s.starts_with("TODO ") {
let (_, b) = s.split_at(5);
w.write_all(b"<span class=\"todo TODO\">TODO</span> ")?;
w.write_all(b.as_bytes())?;
} else if is_title && s.starts_with("DONE ") {
let (_, b) = s.split_at(5);
w.write_all(b"<span class=\"done DONE\">DONE</span> ")?;
w.write_all(b.as_bytes())?;
} else if is_title {
w.write_all(s.as_bytes())?;
} else {
let s = PARBREAK.replace_all(&s, "</p><p>");
w.write_all(s.as_bytes())?;
}
Ok(())
}
struct Output {
use_syntect: bool,
syntax_set: syntect::parsing::SyntaxSet,
theme: syntect::highlighting::Theme,
fmt: HashMap<String, String>,
}
fn output_tree<W: Write>(
tree: &Tree,
w: &mut W,
num: &mut Vec<usize>,
out: &mut Output,
) -> Result<(), failure::Error> {
match tree {
Tree::Root {
ref children,
ref title,
ref level,
ref properties,
ref id,
..
} => {
for prop in properties {
match prop {
Property::CustomId { .. } => {
}
_ => {}
}
}
match id {
Id::String(ref s) => writeln!(w, "<a name=\"{}\"></a>", s)?,
Id::Int(s) => writeln!(w, "<a name=\"{}\"></a>", s)?,
}
if *level > 0 {
writeln!(w, "<h{}>", level)?;
for i in num.iter() {
write!(w, "{}.", i)?;
}
output_string(title, true, w)?;
writeln!(w, "</h{}>", level)?;
}
num.push(1);
for chi in children {
output_tree(chi, w, num, out)?;
}
num.pop();
if let Some(num) = num.last_mut() {
*num += 1
}
}
Tree::Item { ref children, .. } => {
writeln!(w, "<ul>")?;
for chi in children {
writeln!(w, "<li>")?;
let single_child = if chi.len() == 1 {
if let Tree::Block { ref contents, .. } = chi[0] {
output_string(contents, false, w)?;
true
} else {
false
}
} else {
false
};
if !single_child {
for chi in chi {
output_tree(chi, w, num, out)?;
}
}
writeln!(w, "</li>")?;
}
writeln!(w, "</ul>")?;
}
Tree::Src {
ref contents,
ref style,
ref options,
ref id,
..
} if out.use_syntect && style == "rust" => {
writeln!(w, "")?;
let syntax = out.syntax_set.find_syntax_by_name("Rust").unwrap();
let mut highlighter = syntect::easy::HighlightLines::new(&syntax, &out.theme);
let contents = if let Some((_, file)) = options.iter().find(|(k, _)| k == ":tangle") {
debug!("file = {:?}", file);
if let Some(contents) = out.fmt.get(id) {
contents
} else {
let s = std::fs::read_to_string(file)?;
let mut current_line = String::new();
let mut current_id = None;
lazy_static! {
static ref ID: Regex = Regex::new(r#"^\s*// org id (.*)"#).unwrap();
}
for l in s.lines() {
if let Some(caps) = ID.captures(&l) {
let n = caps.get(1).unwrap().as_str().to_string();
if let Some(m) = current_id.take() {
out.fmt
.insert(m, std::mem::replace(&mut current_line, String::new()));
}
current_id = Some(n);
} else {
current_line.push_str(l);
current_line.push('\n');
}
}
if let Some(m) = current_id.take() {
out.fmt
.insert(m, std::mem::replace(&mut current_line, String::new()));
}
out.fmt.get(id).unwrap_or(contents)
}
} else {
contents
};
let mut output = String::new();
for line in syntect::util::LinesWithEndings::from(contents) {
let regions = highlighter.highlight(line, &out.syntax_set);
syntect::html::append_highlighted_html_for_styled_line(
®ions[..],
syntect::html::IncludeBackground::No,
&mut output,
);
}
if let Some((_, file)) = options.iter().find(|(k, _)| k == ":tangle") {
writeln!(w, "<div class=\"org-src-container\"><div class=\"org-before-pre\">{}</div><pre>{}</pre></div>", file, output)?;
} else {
writeln!(w, "<div class=\"org-src-container\"><div class=\"org-before-pre\">Rust</div><pre>{}</pre></div>", output)?;
}
}
Tree::Src {
ref contents,
ref style,
..
} => {
lazy_static! {
static ref LT: Regex = Regex::new(r#"<"#).unwrap();
static ref GT: Regex = Regex::new(r#">"#).unwrap();
}
let s = LT.replace_all(contents, "<");
let s = GT.replace_all(&s, ">");
writeln!(
w,
"<div class=\"org-src-container\"><pre class=\"src-{}\">{}</pre></div>",
style, s
)?;
}
Tree::Block { ref contents, .. } => {
writeln!(w, "<p>")?;
output_string(contents, false, w)?;
writeln!(w, "</p>")?;
}
}
Ok(())
}
fn table_of_contents<W: Write>(tree: &Tree, w: &mut W) -> Result<(), failure::Error> {
writeln!(w, "<h1>Table of contents</h1>")?;
if let Tree::Root { ref children, .. } = tree {
table_of_contents_(children, w, &mut vec![1])?;
}
Ok(())
}
fn table_of_contents_<W: Write>(
children: &[Tree],
w: &mut W,
num: &mut Vec<usize>,
) -> Result<(), failure::Error> {
writeln!(w, "<ul>")?;
for c in children.iter() {
if let Tree::Root {
ref children,
ref title,
ref id,
..
} = c
{
match *id {
Id::String(ref id) => write!(w, "<li><a href=\"#{}\">", id)?,
Id::Int(ref id) => write!(w, "<li><a href=\"#{}\">", id)?,
}
for i in num.iter() {
write!(w, "{}.", i)?;
}
output_string(title, true, w)?;
writeln!(w, "</a>")?;
num.push(1);
table_of_contents_(children, w, num)?;
num.pop();
*num.last_mut().unwrap() += 1;
writeln!(w, "</li>")?;
}
}
writeln!(w, "</ul>")?;
Ok(())
}